aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/import_common.py
blob: cd89f914bca02a5b136e33ffee4af0258d6e1538 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import datetime
import json
from typing import Any

import elasticsearch
import fatcat_openapi_client
import fuzzycat.matching
import pytest
from fatcat_openapi_client import ReleaseEntity, ReleaseExtIds
from fixtures import *

from fatcat_tools.importers import EntityImporter
from fatcat_tools.transforms import entity_to_dict


@pytest.fixture(scope="function")
def entity_importer(api, mocker) -> Any:
    es_client = elasticsearch.Elasticsearch("mockbackend")
    mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
    yield EntityImporter(api, es_client=es_client)


def test_fuzzy_match_none(entity_importer, mocker) -> None:
    """
    Simple ES-mocked test for "no search results" case
    """

    es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
    es_raw.side_effect = [
        (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)),
        (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)),
    ]

    release = ReleaseEntity(
        title="some long title which should not match anything because it is for testing",
        ext_ids=ReleaseExtIds(),
    )

    resp = entity_importer.match_existing_release_fuzzy(release)
    assert resp is None


def test_fuzzy_match_different(entity_importer, mocker) -> None:
    """
    Simple fuzzycat-mocked test for "strong match" case
    """

    r1 = ReleaseEntity(
        title="example title: novel work",
        contribs=[ReleaseContrib(raw_name="robin hood")],
        ext_ids=ReleaseExtIds(doi="10.1234/abcdefg"),
    )
    r2 = ReleaseEntity(
        title="Example Title: Novel Work?",
        contribs=[ReleaseContrib(raw_name="robin hood")],
        ext_ids=ReleaseExtIds(),
    )
    r3 = ReleaseEntity(
        title="entirely different",
        contribs=[ReleaseContrib(raw_name="king tut")],
        ext_ids=ReleaseExtIds(),
    )

    match_raw = mocker.patch("fatcat_tools.importers.common.match_release_fuzzy")
    match_raw.side_effect = [[r3, r2, r3, r2]]
    resp = entity_importer.match_existing_release_fuzzy(r1)
    assert (resp[0], resp[2]) == ("STRONG", r2)

    match_raw.side_effect = [[r2, r2, r3, r1]]
    resp = entity_importer.match_existing_release_fuzzy(r1)
    assert (resp[0], resp[2]) == ("EXACT", r1)

    match_raw.side_effect = [[r3]]
    resp = entity_importer.match_existing_release_fuzzy(r1)
    assert resp is None

    match_raw.side_effect = [[]]
    resp = entity_importer.match_existing_release_fuzzy(r1)
    assert resp is None