diff options
author | bnewbold <bnewbold@archive.org> | 2020-12-18 02:13:47 +0000 |
---|---|---|
committer | bnewbold <bnewbold@archive.org> | 2020-12-18 02:13:47 +0000 |
commit | 443243e8cccba3e779b7c56d0cdb6dcd992a3100 (patch) | |
tree | b279887d9038daa19b72e53509658f016eaec452 /python/tests | |
parent | 7d90a0404e3ecb44c7d0ca93b2c32a0f66b8d88a (diff) | |
parent | 5eeb7a9d61beb8cb40fd89bd91fcd9dd820035aa (diff) | |
download | fatcat-443243e8cccba3e779b7c56d0cdb6dcd992a3100.tar.gz fatcat-443243e8cccba3e779b7c56d0cdb6dcd992a3100.zip |
Merge branch 'bnewbold-doaj-fuzzy' into 'master'
DOAJ import fuzzy match filter
See merge request webgroup/fatcat!92
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/fixtures.py | 7 | ||||
-rw-r--r-- | python/tests/import_common.py | 78 | ||||
-rw-r--r-- | python/tests/import_doaj.py | 16 |
3 files changed, 99 insertions, 2 deletions
diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index d71ac21d..e0bb1aae 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -45,6 +45,13 @@ ES_CONTAINER_RANDOM_RESP = { 'took': 50 } +ES_RELEASE_EMPTY_RESP = { + 'timed_out': False, + 'hits': {'total': 0, 'hits': [], 'max_score': 0.0}, + '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, + 'took': 50 +} + @pytest.fixture def full_app(mocker): load_dotenv(dotenv_path="./example.env") diff --git a/python/tests/import_common.py b/python/tests/import_common.py new file mode 100644 index 00000000..d0db014e --- /dev/null +++ b/python/tests/import_common.py @@ -0,0 +1,78 @@ + +import json +import datetime +from typing import Any + +import pytest +import elasticsearch +import fatcat_openapi_client +from fatcat_openapi_client import ReleaseEntity, ReleaseExtIds +import fuzzycat.matching + +from fatcat_tools.importers import EntityImporter +from fatcat_tools.transforms import entity_to_dict +from fixtures import * + + +@pytest.fixture(scope="function") +def entity_importer(api, mocker) -> Any: + es_client = elasticsearch.Elasticsearch("mockbackend") + mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + yield EntityImporter(api, es_client=es_client) + +def test_fuzzy_match_none(entity_importer, mocker) -> None: + """ + Simple ES-mocked test for "no search results" case + """ + + es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw.side_effect = [ + (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)), + (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)), + ] + + release = ReleaseEntity( + title="some long title which should not match anything because it is for testing", + ext_ids=ReleaseExtIds(), + ) + + resp = entity_importer.match_existing_release_fuzzy(release) + assert resp == None + +def test_fuzzy_match_different(entity_importer, mocker) -> None: + """ + Simple fuzzycat-mocked test for "strong match" case + """ + + r1 = ReleaseEntity( + title="example title: novel work", + contribs=[ReleaseContrib(raw_name="robin hood")], + ext_ids=ReleaseExtIds(doi="10.1234/abcdefg"), + ) + r2 = ReleaseEntity( + title="Example Title: Novel Work?", + contribs=[ReleaseContrib(raw_name="robin hood")], + ext_ids=ReleaseExtIds(), + ) + r3 = ReleaseEntity( + title="entirely different", + contribs=[ReleaseContrib(raw_name="king tut")], + ext_ids=ReleaseExtIds(), + ) + + match_raw = mocker.patch('fatcat_tools.importers.common.match_release_fuzzy') + match_raw.side_effect = [[r3, r2, r3, r2]] + resp = entity_importer.match_existing_release_fuzzy(r1) + assert (resp[0], resp[2]) == ("STRONG", r2) + + match_raw.side_effect = [[r2, r2, r3, r1]] + resp = entity_importer.match_existing_release_fuzzy(r1) + assert (resp[0], resp[2]) == ("EXACT", r1) + + match_raw.side_effect = [[r3]] + resp = entity_importer.match_existing_release_fuzzy(r1) + assert resp == None + + match_raw.side_effect = [[]] + resp = entity_importer.match_existing_release_fuzzy(r1) + assert resp == None diff --git a/python/tests/import_doaj.py b/python/tests/import_doaj.py index d69aebd7..17a23257 100644 --- a/python/tests/import_doaj.py +++ b/python/tests/import_doaj.py @@ -3,6 +3,7 @@ import json import datetime import pytest +import elasticsearch import fatcat_openapi_client from fatcat_tools.importers import DoajArticleImporter, JsonLinePusher @@ -11,14 +12,22 @@ from fixtures import * @pytest.fixture(scope="function") -def doaj_importer(api): +def doaj_importer(api, mocker): + es_client = elasticsearch.Elasticsearch("mockbackend") + mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: - yield DoajArticleImporter(api, issn_file, bezerk_mode=True) + yield DoajArticleImporter( + api, + issn_file, + bezerk_mode=True, + es_client=es_client, + ) def test_doaj_importer(doaj_importer): last_index = doaj_importer.api.get_changelog(limit=1)[0].index with open("tests/files/example_doaj_articles.json", "r") as f: doaj_importer.bezerk_mode = True + doaj_importer.do_fuzzy_match = False counts = JsonLinePusher(doaj_importer, f).run() assert counts["insert"] == 5 assert counts["exists"] == 0 @@ -60,6 +69,7 @@ def test_doaj_importer_existing_doi(doaj_importer): doaj_importer.reset() doaj_importer.bezerk_mode = False doaj_importer.do_updates = False + doaj_importer.do_fuzzy_match = False counts = JsonLinePusher(doaj_importer, f).run() print(counts) assert counts["insert"] == 4 @@ -72,6 +82,7 @@ def test_doaj_importer_existing_doi(doaj_importer): doaj_importer.reset() doaj_importer.bezerk_mode = False doaj_importer.do_updates = True + doaj_importer.do_fuzzy_match = False counts = JsonLinePusher(doaj_importer, f).run() print(counts) assert counts["insert"] == 0 @@ -84,6 +95,7 @@ def test_doaj_importer_existing_doi(doaj_importer): doaj_importer.reset() doaj_importer.bezerk_mode = False doaj_importer.do_updates = True + doaj_importer.do_fuzzy_match = False counts = JsonLinePusher(doaj_importer, f).run() print(counts) assert counts["insert"] == 0 |