From 8b6b1447cc37fb76865fd80377c55463e59db3b9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 5 Nov 2020 17:19:37 -0800 Subject: ingest: basic checks for ingest_type --- python/tests/files/example_ingest.json | 2 +- python/tests/import_ingest.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'python/tests') diff --git a/python/tests/files/example_ingest.json b/python/tests/files/example_ingest.json index cea67fa7..a9791587 100644 --- a/python/tests/files/example_ingest.json +++ b/python/tests/files/example_ingest.json @@ -1,2 +1,2 @@ -{"file_meta": {"sha1hex": "00242a192acc258bdfdb151943419437f440c313", "md5hex": "f4de91152c7ab9fdc2a128f962faebff", "sha256hex": "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size_bytes": 255629, "mimetype": "application/pdf"}, "request": {"ingest_request_source": "fatcat-changelog", "link_source": "doi", "link_source_id":"10.123/abc","ext_ids": {"doi": "10.123/abc"}}, "terminal": {"terminal_url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable", "terminal_dt": "20170227164644", "terminal_sha1hex": "00242a192acc258bdfdb151943419437f440c313"}, "cdx": { "datetime": "20170227164644", "url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable" }, "grobid": {"status_code": 200 }, "hit": true, "status": "success"} +{"file_meta": {"sha1hex": "00242a192acc258bdfdb151943419437f440c313", "md5hex": "f4de91152c7ab9fdc2a128f962faebff", "sha256hex": "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", "size_bytes": 255629, "mimetype": "application/pdf"}, "request": {"ingest_type": "pdf", "ingest_request_source": "fatcat-changelog", "link_source": "doi", "link_source_id":"10.123/abc","ext_ids": {"doi": "10.123/abc"}}, "terminal": {"terminal_url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable", "terminal_dt": "20170227164644", "terminal_sha1hex": "00242a192acc258bdfdb151943419437f440c313"}, "cdx": { "datetime": "20170227164644", "url": "http://journals.plos.org/plosmedicine/article/file?id=10.1371/journal.pmed.0020124&type=printable" }, "grobid": {"status_code": 200 }, "hit": true, "status": "success"} {"request":{"ingest_type":"pdf","ingest_request_source":"fatcat-changelog","base_url":"https://doi.org/10.3917/popav.748.0017","release_stage":"published","fatcat":{"release_ident":"weeqjkvsx5abze2bhithyrx6wu","work_ident":"ujatsk25yrdw5gofubw7nogzgq"},"ext_ids":{"doi":"10.3917/popav.748.0017"},"link_source":"doi","link_source_id":"10.3917/popav.748.0017"},"hit":false,"hops":["https://doi.org/10.3917/popav.748.0017"],"status":"wayback-error","error_message":"replay fetch didn't return X-Archive-Src in headers"} diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py index 4a46232a..05287af4 100644 --- a/python/tests/import_ingest.py +++ b/python/tests/import_ingest.py @@ -94,6 +94,12 @@ def test_ingest_dict_parse(ingest_importer): def test_ingest_dict_parse_old(ingest_importer): with open('tests/files/example_ingest.old.json', 'r') as f: raw = json.loads(f.readline()) + + # ancient ingest requests had no type; skip them + f = ingest_importer.parse_record(raw) + assert f == None + raw['request']['ingest_type'] = 'pdf' + f = ingest_importer.parse_record(raw) assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" -- cgit v1.2.3