aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_matching.py
blob: a8f8f5b9707e0cd5c162c9e2fbf9a53bf718b716 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import collections
import logging
import warnings

import elasticsearch
import pytest
import requests
from fatcat_openapi_client import ReleaseEntity, ReleaseContrib

from fuzzycat.entities import entity_from_dict, entity_from_json
from fuzzycat.matching import anything_to_entity, FuzzyReleaseMatcher

warnings.filterwarnings(
    "ignore")  # InsecureRequestWarning: Unverified HTTPS request is being made to host ...

from fuzzycat.config import settings
from fatcat_openapi_client import ReleaseEntity
import pytest
import elasticsearch
import logging
import yaml
import glob
import json

logger = logging.getLogger('test_matching')
logger.setLevel(logging.DEBUG)

# ad-hoc override search server with: FUZZYCAT_FATCAT_SEARCH_URL=localhost:9200 pytest ...
FATCAT_SEARCH_URL = settings.get("FATCAT_SEARCH_URL", "https://search.fatcat.wiki:443")


def yaml_to_cases(klass, files="tests/files/fuzzy_release_matcher/*.yaml"):
    """
    Turn yaml files into a collection of named tuple test cases. The glob is
    relative to the project root (i.e. where you usually run `pytest` from).
    """
    cases = []
    for path in glob.glob(files):
        with open(path) as f:
            doc = yaml.load(f, Loader=yaml.Loader)
            cases.append(klass(**doc))
    return cases


@pytest.fixture
def es_client():
    return elasticsearch.Elasticsearch([FATCAT_SEARCH_URL])


def test_simple_fuzzy_release_matcher(es_client, caplog):
    """
    Use a single test function to test the higher level match function. We want
    the result to be sensible, but should also document broken examples here.
    """
    matcher = FuzzyReleaseMatcher(es=es_client)
    Case = collections.namedtuple(
        "Case", ["about", "input", "skip_id_matching", "release_year_padding", "expected"])
    cases = yaml_to_cases(Case, "tests/files/fuzzy_release_matcher/*.yaml")
    for i, c in enumerate(cases):
        matcher.release_year_padding = c.release_year_padding
        matcher.skip_id_matching = c.skip_id_matching
        entity = entity_from_json(c.input, ReleaseEntity)
        result = matcher.match(entity)
        assert set([r.ident for r in result]) == set(c.expected), "[{}] {}".format(c.about, c.input)