aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2020-12-18 02:13:47 +0000
committerbnewbold <bnewbold@archive.org>2020-12-18 02:13:47 +0000
commit443243e8cccba3e779b7c56d0cdb6dcd992a3100 (patch)
treeb279887d9038daa19b72e53509658f016eaec452 /python/tests
parent7d90a0404e3ecb44c7d0ca93b2c32a0f66b8d88a (diff)
parent5eeb7a9d61beb8cb40fd89bd91fcd9dd820035aa (diff)
downloadfatcat-443243e8cccba3e779b7c56d0cdb6dcd992a3100.tar.gz
fatcat-443243e8cccba3e779b7c56d0cdb6dcd992a3100.zip
Merge branch 'bnewbold-doaj-fuzzy' into 'master'
DOAJ import fuzzy match filter See merge request webgroup/fatcat!92
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/fixtures.py7
-rw-r--r--python/tests/import_common.py78
-rw-r--r--python/tests/import_doaj.py16
3 files changed, 99 insertions, 2 deletions
diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py
index d71ac21d..e0bb1aae 100644
--- a/python/tests/fixtures.py
+++ b/python/tests/fixtures.py
@@ -45,6 +45,13 @@ ES_CONTAINER_RANDOM_RESP = {
'took': 50
}
+ES_RELEASE_EMPTY_RESP = {
+ 'timed_out': False,
+ 'hits': {'total': 0, 'hits': [], 'max_score': 0.0},
+ '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0},
+ 'took': 50
+}
+
@pytest.fixture
def full_app(mocker):
load_dotenv(dotenv_path="./example.env")
diff --git a/python/tests/import_common.py b/python/tests/import_common.py
new file mode 100644
index 00000000..d0db014e
--- /dev/null
+++ b/python/tests/import_common.py
@@ -0,0 +1,78 @@
+
+import json
+import datetime
+from typing import Any
+
+import pytest
+import elasticsearch
+import fatcat_openapi_client
+from fatcat_openapi_client import ReleaseEntity, ReleaseExtIds
+import fuzzycat.matching
+
+from fatcat_tools.importers import EntityImporter
+from fatcat_tools.transforms import entity_to_dict
+from fixtures import *
+
+
+@pytest.fixture(scope="function")
+def entity_importer(api, mocker) -> Any:
+ es_client = elasticsearch.Elasticsearch("mockbackend")
+ mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ yield EntityImporter(api, es_client=es_client)
+
+def test_fuzzy_match_none(entity_importer, mocker) -> None:
+ """
+ Simple ES-mocked test for "no search results" case
+ """
+
+ es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw.side_effect = [
+ (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)),
+ (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)),
+ ]
+
+ release = ReleaseEntity(
+ title="some long title which should not match anything because it is for testing",
+ ext_ids=ReleaseExtIds(),
+ )
+
+ resp = entity_importer.match_existing_release_fuzzy(release)
+ assert resp == None
+
+def test_fuzzy_match_different(entity_importer, mocker) -> None:
+ """
+ Simple fuzzycat-mocked test for "strong match" case
+ """
+
+ r1 = ReleaseEntity(
+ title="example title: novel work",
+ contribs=[ReleaseContrib(raw_name="robin hood")],
+ ext_ids=ReleaseExtIds(doi="10.1234/abcdefg"),
+ )
+ r2 = ReleaseEntity(
+ title="Example Title: Novel Work?",
+ contribs=[ReleaseContrib(raw_name="robin hood")],
+ ext_ids=ReleaseExtIds(),
+ )
+ r3 = ReleaseEntity(
+ title="entirely different",
+ contribs=[ReleaseContrib(raw_name="king tut")],
+ ext_ids=ReleaseExtIds(),
+ )
+
+ match_raw = mocker.patch('fatcat_tools.importers.common.match_release_fuzzy')
+ match_raw.side_effect = [[r3, r2, r3, r2]]
+ resp = entity_importer.match_existing_release_fuzzy(r1)
+ assert (resp[0], resp[2]) == ("STRONG", r2)
+
+ match_raw.side_effect = [[r2, r2, r3, r1]]
+ resp = entity_importer.match_existing_release_fuzzy(r1)
+ assert (resp[0], resp[2]) == ("EXACT", r1)
+
+ match_raw.side_effect = [[r3]]
+ resp = entity_importer.match_existing_release_fuzzy(r1)
+ assert resp == None
+
+ match_raw.side_effect = [[]]
+ resp = entity_importer.match_existing_release_fuzzy(r1)
+ assert resp == None
diff --git a/python/tests/import_doaj.py b/python/tests/import_doaj.py
index d69aebd7..17a23257 100644
--- a/python/tests/import_doaj.py
+++ b/python/tests/import_doaj.py
@@ -3,6 +3,7 @@ import json
import datetime
import pytest
+import elasticsearch
import fatcat_openapi_client
from fatcat_tools.importers import DoajArticleImporter, JsonLinePusher
@@ -11,14 +12,22 @@ from fixtures import *
@pytest.fixture(scope="function")
-def doaj_importer(api):
+def doaj_importer(api, mocker):
+ es_client = elasticsearch.Elasticsearch("mockbackend")
+ mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
- yield DoajArticleImporter(api, issn_file, bezerk_mode=True)
+ yield DoajArticleImporter(
+ api,
+ issn_file,
+ bezerk_mode=True,
+ es_client=es_client,
+ )
def test_doaj_importer(doaj_importer):
last_index = doaj_importer.api.get_changelog(limit=1)[0].index
with open("tests/files/example_doaj_articles.json", "r") as f:
doaj_importer.bezerk_mode = True
+ doaj_importer.do_fuzzy_match = False
counts = JsonLinePusher(doaj_importer, f).run()
assert counts["insert"] == 5
assert counts["exists"] == 0
@@ -60,6 +69,7 @@ def test_doaj_importer_existing_doi(doaj_importer):
doaj_importer.reset()
doaj_importer.bezerk_mode = False
doaj_importer.do_updates = False
+ doaj_importer.do_fuzzy_match = False
counts = JsonLinePusher(doaj_importer, f).run()
print(counts)
assert counts["insert"] == 4
@@ -72,6 +82,7 @@ def test_doaj_importer_existing_doi(doaj_importer):
doaj_importer.reset()
doaj_importer.bezerk_mode = False
doaj_importer.do_updates = True
+ doaj_importer.do_fuzzy_match = False
counts = JsonLinePusher(doaj_importer, f).run()
print(counts)
assert counts["insert"] == 0
@@ -84,6 +95,7 @@ def test_doaj_importer_existing_doi(doaj_importer):
doaj_importer.reset()
doaj_importer.bezerk_mode = False
doaj_importer.do_updates = True
+ doaj_importer.do_fuzzy_match = False
counts = JsonLinePusher(doaj_importer, f).run()
print(counts)
assert counts["insert"] == 0