import collections import logging import warnings import elasticsearch import pytest import requests from fatcat_openapi_client import ReleaseEntity, ReleaseContrib from fuzzycat.entities import entity_from_dict, entity_from_json from fuzzycat.matching import anything_to_entity, FuzzyReleaseMatcher warnings.filterwarnings( "ignore") # InsecureRequestWarning: Unverified HTTPS request is being made to host ... from fuzzycat.config import settings from fatcat_openapi_client import ReleaseEntity import pytest import elasticsearch import logging import yaml import glob import json logger = logging.getLogger('test_matching') logger.setLevel(logging.DEBUG) # ad-hoc override search server with: FUZZYCAT_FATCAT_SEARCH_URL=localhost:9200 pytest ... FATCAT_SEARCH_URL = settings.get("FATCAT_SEARCH_URL", "https://search.fatcat.wiki:443") def yaml_to_cases(klass, files="tests/files/fuzzy_release_match_release_exact_title_exact_contrib/*.yaml"): """ Turn yaml files into a collection of named tuple test cases. The glob is relative to the project root (i.e. where you usually run `pytest` from). """ cases = [] for path in glob.glob(files): with open(path) as f: doc = yaml.load(f, Loader=yaml.Loader) cases.append(klass(**doc)) return cases @pytest.fixture def es_client(): return elasticsearch.Elasticsearch([FATCAT_SEARCH_URL]) def test_simple_fuzzy_release_matcher(es_client, caplog): """ Use a single test function to test the higher level match function. We want the result to be sensible, but should also document broken examples here. """ matcher = FuzzyReleaseMatcher(es=es_client) Case = collections.namedtuple("Case", "about input release_year_padding expected") cases = yaml_to_cases(Case, "tests/files/fuzzy_release_matcher/*.yaml") for i, c in enumerate(cases): matcher.release_year_padding = c.release_year_padding entity = entity_from_json(c.input, ReleaseEntity) result = matcher.match(entity) assert set([r.ident for r in result]) == set(c.expected), "[{}] {}".format(c.about, c.input)