From 8c204018ad9c1aa7f0296b2ae8d23d6f2577309c Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 26 May 2020 18:02:52 -0700 Subject: regression test for release_stage mismatch with ingest request --- python/tests/files/example_ingest.json | 3 ++- python/tests/import_ingest.py | 45 +++++++++++++++++++++++++++++----- 2 files changed, 41 insertions(+), 7 deletions(-) (limited to 'python') diff --git a/python/tests/files/example_ingest.json b/python/tests/files/example_ingest.json index 18804c1b..cea67fa7 100644 --- a/python/tests/files/example_ingest.json +++ b/python/tests/files/example_ingest.json @@ -1 +1,2 @@ -{"file_meta": {"sha1hex": "00242a192acc258bdfdb151943419437f440c313", "md5hex": "f4de91152c7ab9fdc2a128f962faebff", "sha256hex": "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size_bytes": 255629, "mimetype": "application/pdf"}, "request": {"project": "unit-tests", "ext_ids": {"doi": "10.123/abc"}}, "terminal": {"terminal_url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable", "terminal_dt": "20170227164644", "terminal_sha1hex": "00242a192acc258bdfdb151943419437f440c313"}, "cdx": { "datetime": "20170227164644", "url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable" }, "grobid": {"status_code": 200 } } +{"file_meta": {"sha1hex": "00242a192acc258bdfdb151943419437f440c313", "md5hex": "f4de91152c7ab9fdc2a128f962faebff", "sha256hex": "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size_bytes": 255629, "mimetype": "application/pdf"}, "request": {"ingest_request_source": "fatcat-changelog", "link_source": "doi", "link_source_id":"10.123/abc","ext_ids": {"doi": "10.123/abc"}}, "terminal": {"terminal_url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable", "terminal_dt": "20170227164644", "terminal_sha1hex": "00242a192acc258bdfdb151943419437f440c313"}, "cdx": { "datetime": "20170227164644", "url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable" }, "grobid": {"status_code": 200 }, "hit": true, "status": "success"} +{"request":{"ingest_type":"pdf","ingest_request_source":"fatcat-changelog","base_url":"https://doi.org/10.3917/popav.748.0017","release_stage":"published","fatcat":{"release_ident":"weeqjkvsx5abze2bhithyrx6wu","work_ident":"ujatsk25yrdw5gofubw7nogzgq"},"ext_ids":{"doi":"10.3917/popav.748.0017"},"link_source":"doi","link_source_id":"10.3917/popav.748.0017"},"hit":false,"hops":["https://doi.org/10.3917/popav.748.0017"],"status":"wayback-error","error_message":"replay fetch didn't return X-Archive-Src in headers"} diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py index 5089d99e..02486de6 100644 --- a/python/tests/import_ingest.py +++ b/python/tests/import_ingest.py @@ -2,7 +2,7 @@ import json import pytest from fatcat_tools.importers import IngestFileResultImporter, JsonLinePusher -from fixtures import api +from fixtures import * @pytest.fixture(scope="function") @@ -14,15 +14,14 @@ def test_ingest_importer_basic(ingest_importer): with open('tests/files/example_ingest.json', 'r') as f: JsonLinePusher(ingest_importer, f).run() -@pytest.mark.skip("tests not flushed out yet") def test_ingest_importer(ingest_importer): last_index = ingest_importer.api.get_changelog(limit=1)[0].index with open('tests/files/example_ingest.json', 'r') as f: ingest_importer.bezerk_mode = True counts = JsonLinePusher(ingest_importer, f).run() - assert counts['insert'] == 2 + assert counts['insert'] == 1 assert counts['exists'] == 0 - assert counts['skip'] == 11 + assert counts['skip'] == 1 # fetch most recent editgroup change = ingest_importer.api.get_changelog_entry(index=last_index+1) @@ -38,8 +37,42 @@ def test_ingest_importer(ingest_importer): ingest_importer.bezerk_mode = False counts = JsonLinePusher(ingest_importer, f).run() assert counts['insert'] == 0 - assert counts['exists'] == 2 - assert counts['skip'] == 11 + assert counts['exists'] == 1 + assert counts['skip'] == 1 + +def test_ingest_importer_stage(ingest_importer, api): + """ + Tests that ingest importer correctly handles release stage matching + """ + test_table = [ + dict(request_stage=None, release_stage=None, status="insert"), + dict(request_stage="published", release_stage=None, status="insert"), + dict(request_stage=None, release_stage="draft", status="insert"), + dict(request_stage="published", release_stage="published", status="insert"), + dict(request_stage="draft", release_stage="published", status="skip-release-stage"), + dict(request_stage="published", release_stage="draft", status="skip-release-stage"), + ] + ingest_importer.bezerk_mode = True + with open('tests/files/example_ingest.json', 'r') as f: + raw = json.loads(f.readline()) + for row in test_table: + print(row) + + # set dummy record stage + eg = quick_eg(api) + r1 = api.lookup_release(doi="10.123/abc") + r1.release_stage = row['release_stage'] + c1 = api.update_release(eg.editgroup_id, r1.ident, r1) + api.accept_editgroup(eg.editgroup_id) + + # set ingest request stage + raw['request']['release_stage'] = row['request_stage'] + ingest_importer.reset() + ingest_importer.push_record(raw) + counts = ingest_importer.finish() + print(counts) + assert counts["total"] == 1 + assert counts[row['status']] == 1 def test_ingest_dict_parse(ingest_importer): with open('tests/files/example_ingest.json', 'r') as f: -- cgit v1.2.3 From 9bbc8f4ceec54563d71c68564b35ceb14ac60840 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 26 May 2020 18:03:17 -0700 Subject: ingest importer: check that stage is consistent with release --- python/fatcat_tools/importers/ingest.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'python') diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 6cf1604b..2b630e67 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -109,6 +109,11 @@ class IngestFileResultImporter(EntityImporter): self.counts['warn-extid-invalid'] += 1 continue raise err + # verify release_stage + if request.get('release_stage') and release.release_stage: + if request['release_stage'] != release.release_stage: + self.counts['skip-release-stage'] += 1 + return None release_ident = release.ident break if self.use_glutton_match and not release_ident and row.get('grobid'): -- cgit v1.2.3