diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-16 19:54:47 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-16 20:16:09 -0800 |
commit | 38328c25674fee7781a8d8601e1d47de04186f19 (patch) | |
tree | 32d834fb16bb93ad493a053a9b797d9da1e04ac5 /python/tests | |
parent | 20f27677aff762822bbd3aa944caf430c089ab4b (diff) | |
download | fatcat-38328c25674fee7781a8d8601e1d47de04186f19.tar.gz fatcat-38328c25674fee7781a8d8601e1d47de04186f19.zip |
add fuzzy matching helper to importer base class
Using fuzzycat. Add basic test coverage.
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/fixtures.py | 7 | ||||
-rw-r--r-- | python/tests/import_common.py | 78 |
2 files changed, 85 insertions, 0 deletions
diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index d71ac21d..e0bb1aae 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -45,6 +45,13 @@ ES_CONTAINER_RANDOM_RESP = { 'took': 50 } +ES_RELEASE_EMPTY_RESP = { + 'timed_out': False, + 'hits': {'total': 0, 'hits': [], 'max_score': 0.0}, + '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, + 'took': 50 +} + @pytest.fixture def full_app(mocker): load_dotenv(dotenv_path="./example.env") diff --git a/python/tests/import_common.py b/python/tests/import_common.py new file mode 100644 index 00000000..9f04ebe0 --- /dev/null +++ b/python/tests/import_common.py @@ -0,0 +1,78 @@ + +import json +import datetime +from typing import Any + +import pytest +import elasticsearch +import fatcat_openapi_client +from fatcat_openapi_client import ReleaseEntity, ReleaseExtIds +import fuzzycat.matching + +from fatcat_tools.importers import EntityImporter +from fatcat_tools.transforms import entity_to_dict +from fixtures import * + + +@pytest.fixture(scope="function") +def entity_importer(api, mocker) -> Any: + es_client = elasticsearch.Elasticsearch("mockbackend") + mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + yield EntityImporter(api, es_client=es_client) + +def test_fuzzy_match_none(entity_importer, mocker) -> None: + """ + Simple ES-mocked test for "no search results" case + """ + + es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw.side_effect = [ + (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)), + (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)), + ] + + release = ReleaseEntity( + title="some long title which should not match anything because it is for testing", + ext_ids=ReleaseExtIds(), + ) + + resp = entity_importer.match_existing_release_fuzzy(release) + assert resp == None + +def test_fuzzy_match_different(entity_importer, mocker) -> None: + """ + Simple fuzzycat-mocked test for "strong match" case + """ + + r1 = ReleaseEntity( + title="example title: novel work", + contribs=[ReleaseContrib(raw_name="robin hood")], + ext_ids=ReleaseExtIds(doi="10.1234/abcdefg"), + ) + r2 = ReleaseEntity( + title="Example Title: Novel Work?", + contribs=[ReleaseContrib(raw_name="robin hood")], + ext_ids=ReleaseExtIds(), + ) + r3 = ReleaseEntity( + title="entirely different", + contribs=[ReleaseContrib(raw_name="king tut")], + ext_ids=ReleaseExtIds(), + ) + + match_raw = mocker.patch('fatcat_tools.importers.common.match_release_fuzzy') + match_raw.side_effect = [[r3, r2, r3, r2]] + resp = entity_importer.match_existing_release_fuzzy(r1) + assert resp == ("STRONG", r2) + + match_raw.side_effect = [[r2, r2, r3, r1]] + resp = entity_importer.match_existing_release_fuzzy(r1) + assert resp == ("EXACT", r1) + + match_raw.side_effect = [[r3]] + resp = entity_importer.match_existing_release_fuzzy(r1) + assert resp == None + + match_raw.side_effect = [[]] + resp = entity_importer.match_existing_release_fuzzy(r1) + assert resp == None |