diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 18:13:14 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 18:13:14 -0700 |
commit | cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch) | |
tree | 5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python | |
parent | 78f08280edea4ff65ca613ad30005c45cc48dea6 (diff) | |
download | fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip |
fmt (black): tests/
Diffstat (limited to 'python')
55 files changed, 1852 insertions, 1430 deletions
diff --git a/python/tests/api_annotations.py b/python/tests/api_annotations.py index 79acaa4b..ac898fa2 100644 --- a/python/tests/api_annotations.py +++ b/python/tests/api_annotations.py @@ -1,4 +1,3 @@ - from fatcat_openapi_client import * from fixtures import * @@ -14,14 +13,13 @@ def test_annotations(api): # create an annotation! api.create_editgroup_annotation( eg.editgroup_id, - EditgroupAnnotation( - comment_markdown="some *annotation*", - extra=dict(thing="thang"))) + EditgroupAnnotation(comment_markdown="some *annotation*", extra=dict(thing="thang")), + ) # check that we can fetch it all sorts of ways a = api.get_editgroup_annotations(eg.editgroup_id) assert len(a) == 1 - assert a[0].extra['thing'] == "thang" + assert a[0].extra["thing"] == "thang" # the editor persists, so this is a hack to find a "recent" one a2 = api.get_editor_annotations(eg.editor_id, limit=100) @@ -31,4 +29,4 @@ def test_annotations(api): found = thing break assert found - assert found.extra['thing'] == "thang" + assert found.extra["thing"] == "thang" diff --git a/python/tests/api_containers.py b/python/tests/api_containers.py index 0f6c85b1..d6fd421a 100644 --- a/python/tests/api_containers.py +++ b/python/tests/api_containers.py @@ -1,4 +1,3 @@ - import pytest from fatcat_openapi_client import * from fixtures import * @@ -63,6 +62,7 @@ def test_container(api): c2 = api.get_container(c2.ident) assert c2.state == "deleted" + def test_container_bad_idents(api): # all the fields! @@ -81,9 +81,10 @@ def test_container_bad_idents(api): with pytest.raises(ValueError): c1.issne = "asdf-hhhh" + def test_container_examples(api): - c1 = api.get_container('aaaaaaaaaaaaaeiraaaaaaaaam') + c1 = api.get_container("aaaaaaaaaaaaaeiraaaaaaaaam") assert c1.name == "PLOS Medicine" assert c1.issnl == "1549-1277" assert c1.issne == "1549-1676" diff --git a/python/tests/api_creators.py b/python/tests/api_creators.py index b271e2b3..fe2bd91b 100644 --- a/python/tests/api_creators.py +++ b/python/tests/api_creators.py @@ -1,4 +1,3 @@ - from fatcat_openapi_client import * from fixtures import * @@ -56,10 +55,12 @@ def test_creators(api): c2 = api.get_creator(c2.ident) assert c2.state == "deleted" + # TODO: test expansion of new creator/release pair (release get)? + def test_creators_examples(api): # ident: aaaaaaaaaaaaaircaaaaaaaaam - c1 = api.lookup_creator(orcid='0000-0003-3118-6859') + c1 = api.lookup_creator(orcid="0000-0003-3118-6859") assert c1.ident == "aaaaaaaaaaaaaircaaaaaaaaam" diff --git a/python/tests/api_editgroups.py b/python/tests/api_editgroups.py index 9f870fb1..f77f2202 100644 --- a/python/tests/api_editgroups.py +++ b/python/tests/api_editgroups.py @@ -1,4 +1,3 @@ - import datetime import pytest @@ -45,7 +44,7 @@ def test_editgroup_submit(api): reviewable = api.get_editgroups_reviewable(limit=100) assert eg.editgroup_id not in [v.editgroup_id for v in reviewable] eg3 = api.get_editgroup(eg.editgroup_id) - #print(eg3) + # print(eg3) assert eg3.submitted assert eg3.changelog_index @@ -63,23 +62,29 @@ def test_editgroup_ordering(api): api.update_editgroup( eg1.editgroup_id, Editgroup(editgroup_id=eg1.editgroup_id, description="FAIL"), - submit=True) + submit=True, + ) api.update_editgroup( eg2.editgroup_id, Editgroup(editgroup_id=eg2.editgroup_id, description="FAIL"), - submit=True) + submit=True, + ) r1 = api.get_editgroups_reviewable() - #print(r1) + # print(r1) assert not r1[0].description assert not r1[1].description assert r1[0].submitted >= r1[1].submitted # should be no editgroups "in the future" (since now + 1sec) - r1 = api.get_editgroups_reviewable(since=(datetime.datetime.utcnow() + datetime.timedelta(seconds=1)).isoformat()+"Z") + r1 = api.get_editgroups_reviewable( + since=(datetime.datetime.utcnow() + datetime.timedelta(seconds=1)).isoformat() + "Z" + ) assert not r1 - r1 = api.get_editgroups_reviewable(since=(datetime.datetime.utcnow() - datetime.timedelta(seconds=5)).isoformat()+"Z") + r1 = api.get_editgroups_reviewable( + since=(datetime.datetime.utcnow() - datetime.timedelta(seconds=5)).isoformat() + "Z" + ) assert r1[0].submitted <= r1[1].submitted @@ -88,9 +93,9 @@ def test_editgroup_auto_batch(api): c1 = CreatorEntity(display_name="test auto_batch") c2 = CreatorEntity(display_name="test another auto_batch") - eg1 = api.create_creator_auto_batch(CreatorAutoBatch( - editgroup=Editgroup(), - entity_list=[c1, c2])) + eg1 = api.create_creator_auto_batch( + CreatorAutoBatch(editgroup=Editgroup(), entity_list=[c1, c2]) + ) assert eg1.changelog_index assert len(eg1.edits.creators) == 2 @@ -103,11 +108,11 @@ def test_batch_params(api): desc = "test description" extra = dict(a=75, q="thing") - eg1 = api.create_creator_auto_batch(CreatorAutoBatch( - editgroup=Editgroup( - description=desc, - extra=extra), - entity_list=[c1, c2])) + eg1 = api.create_creator_auto_batch( + CreatorAutoBatch( + editgroup=Editgroup(description=desc, extra=extra), entity_list=[c1, c2] + ) + ) assert eg1.description == desc assert eg1.extra == extra diff --git a/python/tests/api_editor.py b/python/tests/api_editor.py index 0c680a2f..ef70b42c 100644 --- a/python/tests/api_editor.py +++ b/python/tests/api_editor.py @@ -1,4 +1,3 @@ - import fatcat_openapi_client import pytest from fixtures import api @@ -18,11 +17,13 @@ def test_editor_update(api): check = api.get_editor(editor_id) assert check == orig + def test_editor_get(api): editor_id = api.editor_id api.get_editor(editor_id) + def test_editor_lookup(api): editor_id = api.editor_id diff --git a/python/tests/api_entity_editing.py b/python/tests/api_entity_editing.py index 5f22e9f2..4e5b07d7 100644 --- a/python/tests/api_entity_editing.py +++ b/python/tests/api_entity_editing.py @@ -1,4 +1,3 @@ - import pytest from fatcat_openapi_client import * from fixtures import * @@ -53,7 +52,7 @@ def test_edit_after_accept(api): except fatcat_openapi_client.rest.ApiException as e: assert 400 <= e.status < 500 # TODO: need better message - #assert "accepted" in e.body + # assert "accepted" in e.body # cleanup eg = quick_eg(api) @@ -142,8 +141,8 @@ def test_wip_revision(api): def test_edit_get_all(api_dummy_entities): ade = api_dummy_entities - api = ade['api'] - eg = api.get_editgroup(ade['editgroup'].editgroup_id) + api = ade["api"] + eg = api.get_editgroup(ade["editgroup"].editgroup_id) assert api.get_creator_edit(eg.edits.creators[0].edit_id) assert api.get_container_edit(eg.edits.containers[0].edit_id) @@ -153,11 +152,12 @@ def test_edit_get_all(api_dummy_entities): assert api.get_release_edit(eg.edits.releases[0].edit_id) assert api.get_work_edit(eg.edits.works[0].edit_id) + def test_edit_delete_all(api_dummy_entities): ade = api_dummy_entities - api = ade['api'] - eg = api.get_editgroup(ade['editgroup'].editgroup_id) + api = ade["api"] + eg = api.get_editgroup(ade["editgroup"].editgroup_id) api.delete_creator_edit(eg.editgroup_id, eg.edits.creators[0].edit_id) api.delete_container_edit(eg.editgroup_id, eg.edits.containers[0].edit_id) @@ -168,7 +168,7 @@ def test_edit_delete_all(api_dummy_entities): api.delete_work_edit(eg.editgroup_id, eg.edits.works[0].edit_id) # verify deletion - eg = api.get_editgroup(ade['editgroup'].editgroup_id) + eg = api.get_editgroup(ade["editgroup"].editgroup_id) assert len(eg.edits.creators) == 0 assert len(eg.edits.containers) == 0 assert len(eg.edits.files) == 0 diff --git a/python/tests/api_entity_state.py b/python/tests/api_entity_state.py index 58e1d42b..954ea29a 100644 --- a/python/tests/api_entity_state.py +++ b/python/tests/api_entity_state.py @@ -1,4 +1,3 @@ - import pytest from fatcat_openapi_client import * from fatcat_openapi_client.rest import ApiException @@ -17,8 +16,8 @@ def test_redirect_entity(api): offset = 0 while True: offset += 1 - o1 = '0000-0000-1111-%04d' % offset - o2 = '0000-0000-2222-%04d' % offset + o1 = "0000-0000-1111-%04d" % offset + o2 = "0000-0000-2222-%04d" % offset try: api.lookup_creator(orcid=o1) continue @@ -55,7 +54,9 @@ def test_redirect_entity(api): assert redirs == [] api.accept_editgroup(eg.editgroup_id) redirs = api.get_creator_redirects(c1.ident) - assert redirs == [c2.ident, ] + assert redirs == [ + c2.ident, + ] # get both by ident res = api.get_creator(c1.ident) @@ -95,7 +96,9 @@ def test_redirect_entity(api): assert res.display_name is None assert res.revision is None redirs = api.get_creator_redirects(c1.ident) - assert redirs == [c2.ident, ] + assert redirs == [ + c2.ident, + ] # undelete first; check that second is a redirect eg = quick_eg(api) @@ -106,7 +109,9 @@ def test_redirect_entity(api): assert res.state == "redirect" assert res.display_name == "test one one" redirs = api.get_creator_redirects(c1.ident) - assert redirs == [c2.ident, ] + assert redirs == [ + c2.ident, + ] # split second entity back out assert c2.revision @@ -116,7 +121,9 @@ def test_redirect_entity(api): # prev_revision should be none after an un-redirect assert update_edit.prev_revision is None redirs = api.get_creator_redirects(c1.ident) - assert redirs == [c2.ident, ] + assert redirs == [ + c2.ident, + ] api.accept_editgroup(eg.editgroup_id) res = api.get_creator(c2.ident) assert res.state == "active" @@ -138,7 +145,7 @@ def test_delete_entity(api): offset = 0 while True: offset += 1 - o1 = '0000-0000-1111-%04d' % offset + o1 = "0000-0000-1111-%04d" % offset try: api.lookup_creator(orcid=o1) continue @@ -177,10 +184,10 @@ def test_delete_entity(api): try: # can't re-delete an entity api.delete_creator(eg.editgroup_id, c1.ident) - #api.accept_editgroup(eg.editgroup_id) + # api.accept_editgroup(eg.editgroup_id) assert False except fatcat_openapi_client.rest.ApiException as e: - assert 400 <= e.status < 500 # error is 4xx + assert 400 <= e.status < 500 # error is 4xx # undelete eg = quick_eg(api) @@ -204,9 +211,9 @@ def test_recursive_redirects_entity(api): offset = 0 while True: offset += 1 - o1 = '0000-0000-1111-%04d' % offset - o2 = '0000-0000-2222-%04d' % offset - o3 = '0000-0000-3333-%04d' % offset + o1 = "0000-0000-1111-%04d" % offset + o2 = "0000-0000-2222-%04d" % offset + o3 = "0000-0000-3333-%04d" % offset try: api.lookup_creator(orcid=o1) continue @@ -449,6 +456,7 @@ def test_required_entity_fields(api): assert 400 <= e.status < 500 assert "title" in e.body + def test_revert_current_status(api): c1 = CreatorEntity(display_name="test updates") diff --git a/python/tests/api_files.py b/python/tests/api_files.py index 65eda993..443fe63f 100644 --- a/python/tests/api_files.py +++ b/python/tests/api_files.py @@ -1,4 +1,3 @@ - from fatcat_openapi_client import * from fixtures import * @@ -15,7 +14,10 @@ def test_file(api): sha256="f1f4f18a904e76818863ccbc6141fce92b0dcb47b0d6041aec98bc6806e393c3", mimetype="application/pdf", urls=[ - FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"), + FileUrl( + url="https://web.archive.org/web/12345542/something.com/blah.pdf", + rel="webarchive", + ), ], release_ids=[], extra=dict(a=2, b=5), @@ -53,12 +55,15 @@ def test_file(api): f2 = api.get_file(f2.ident) assert f2.state == "deleted" + def test_file_examples(api): - f1 = api.lookup_file(sha256='ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362') + f1 = api.lookup_file( + sha256="ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362" + ) assert f1.releases is None - f1 = api.get_file('aaaaaaaaaaaaamztaaaaaaaaam', expand="releases") + f1 = api.get_file("aaaaaaaaaaaaamztaaaaaaaaam", expand="releases") assert f1.sha256 == "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362" assert f1.releases[0].ident assert f1.releases[0].abstracts is None diff --git a/python/tests/api_filesets.py b/python/tests/api_filesets.py index f351dc9f..1ec0df17 100644 --- a/python/tests/api_filesets.py +++ b/python/tests/api_filesets.py @@ -1,4 +1,3 @@ - import pytest from fatcat_openapi_client import * from fixtures import * @@ -11,7 +10,7 @@ def test_fileset(api): r1edit = api.create_release(eg.editgroup_id, r1) fs1 = FilesetEntity( - manifest = [ + manifest=[ FilesetFile( path="data/thing.tar.gz", size=54321, @@ -30,11 +29,11 @@ def test_fileset(api): extra={"x": 1, "y": "q"}, ), ], - urls = [ + urls=[ FilesetUrl(url="https://archive.org/download/fileset-123/", rel="repository"), FilesetUrl(url="https://humble-host.com/~user123/dataset/", rel="web"), ], - release_ids = [r1edit.ident], + release_ids=[r1edit.ident], extra=dict(t=4, u=9), edit_extra=dict(test_key="filesets rule"), ) @@ -59,7 +58,9 @@ def test_fileset(api): # expansion r1 = api.get_release(r1edit.ident, expand="filesets") # XXX: manifest return order is *NOT* currently stable - assert (r1.filesets[0].manifest == fs1.manifest) or (r1.filesets[0].manifest == list(reversed(fs1.manifest))) + assert (r1.filesets[0].manifest == fs1.manifest) or ( + r1.filesets[0].manifest == list(reversed(fs1.manifest)) + ) # get redirects (none) assert api.get_fileset_redirects(fs2.ident) == [] @@ -71,28 +72,30 @@ def test_fileset(api): fs2 = api.get_fileset(fs2.ident) assert fs2.state == "deleted" + def test_fileset_examples(api): - fs3 = api.get_fileset('aaaaaaaaaaaaaztgaaaaaaaaam') + fs3 = api.get_fileset("aaaaaaaaaaaaaztgaaaaaaaaam") assert fs3.releases is None - fs3 = api.get_fileset('aaaaaaaaaaaaaztgaaaaaaaaam', expand="releases") + fs3 = api.get_fileset("aaaaaaaaaaaaaztgaaaaaaaaam", expand="releases") - assert fs3.urls[0].url == 'http://other-personal-blog.name/dataset/' - assert fs3.urls[1].rel == 'archive' - assert fs3.manifest[1].md5 == 'f4de91152c7ab9fdc2a128f962faebff' - assert fs3.manifest[1].mimetype == 'application/gzip' + assert fs3.urls[0].url == "http://other-personal-blog.name/dataset/" + assert fs3.urls[1].rel == "archive" + assert fs3.manifest[1].md5 == "f4de91152c7ab9fdc2a128f962faebff" + assert fs3.manifest[1].mimetype == "application/gzip" assert fs3.manifest[1].extra is not None assert fs3.releases[0].ident assert fs3.releases[0].abstracts is None assert fs3.releases[0].refs is None + def test_bad_fileset(api): eg = quick_eg(api) bad_list = [ # good (for testing test itself) - #FilesetEntity(manifest=[FilesetFile(path="123.jpg", size=1234)]), - #FilesetEntity(urls=[FilesetUrl(url="thing", rel="blah")]), + # FilesetEntity(manifest=[FilesetFile(path="123.jpg", size=1234)]), + # FilesetEntity(urls=[FilesetUrl(url="thing", rel="blah")]), FilesetEntity(manifest=[FilesetFile(path="123.jpg", size="big")]), FilesetEntity(release_ids=["asdf"]), ] diff --git a/python/tests/api_misc.py b/python/tests/api_misc.py index 4c9ac9a6..7bb7023b 100644 --- a/python/tests/api_misc.py +++ b/python/tests/api_misc.py @@ -1,35 +1,42 @@ - from fatcat_openapi_client import * from fixtures import * def test_lookup_hide_extend(api): - r = api.lookup_release(doi='10.1371/journal.pmed.0020124') + r = api.lookup_release(doi="10.1371/journal.pmed.0020124") assert len(r.refs) >= 2 assert r.files is None assert r.container is None assert len(r.container_id) > 10 assert r.abstracts == [] - r = api.lookup_release(doi='10.1371/journal.pmed.0020124', expand='files', hide='refs,abstracts') + r = api.lookup_release( + doi="10.1371/journal.pmed.0020124", expand="files", hide="refs,abstracts" + ) assert r.refs is None assert len(r.files[0].sha1) == 40 assert r.container is None assert r.abstracts is None - r = api.lookup_release(doi='10.1371/journal.pmed.0020124', expand='container,abstracts') + r = api.lookup_release(doi="10.1371/journal.pmed.0020124", expand="container,abstracts") assert len(r.refs) >= 2 assert r.files is None assert r.container.issnl assert r.abstracts == [] - f = api.lookup_file(sha256='ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362') + f = api.lookup_file( + sha256="ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362" + ) assert f.releases is None - f = api.lookup_file(sha256='ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362', expand='releases') + f = api.lookup_file( + sha256="ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362", + expand="releases", + ) assert f.releases[0].ident == f.release_ids[0] + def test_unexpected_body(api): eg = quick_eg(api) diff --git a/python/tests/api_releases.py b/python/tests/api_releases.py index a823d4cd..39c09270 100644 --- a/python/tests/api_releases.py +++ b/python/tests/api_releases.py @@ -1,4 +1,3 @@ - import datetime import pytest @@ -44,26 +43,16 @@ def test_release(api): language="en", license_slug="CC-0", contribs=[ - ReleaseContrib( - given_name="Paul", - surname="Otlet"), - ReleaseContrib( - raw_name="Cindy Sherman", - given_name="Cindy", - surname="Sherman"), - ReleaseContrib( - raw_name="Andy Warhol"), + ReleaseContrib(given_name="Paul", surname="Otlet"), + ReleaseContrib(raw_name="Cindy Sherman", given_name="Cindy", surname="Sherman"), + ReleaseContrib(raw_name="Andy Warhol"), ], refs=[], abstracts=[ + ReleaseAbstract(content="this is some abstract", mimetype="text/plain", lang="en"), ReleaseAbstract( - content="this is some abstract", - mimetype="text/plain", - lang="en"), - ReleaseAbstract( - content="this is some other abstract", - mimetype="text/plain", - lang="de"), + content="this is some other abstract", mimetype="text/plain", lang="de" + ), ], extra=dict(a=1, b=2), edit_extra=dict(test_key="releases rule"), @@ -138,33 +127,34 @@ def test_release(api): r2 = api.get_release(r2.ident) assert r2.state == "deleted" + def test_release_examples(api): - api.lookup_release(pmid='54321') - api.lookup_release(doi='10.123/abc') - api.lookup_release(isbn13='978-3-16-148410-0') - api.lookup_release(arxiv='1905.03769v1') - api.lookup_release(jstor='1819117828') - api.lookup_release(ark='ark:/13030/m53r5pzm') - api.lookup_release(mag='992489213') - api.lookup_release(hdl='20.500.23456/ABC/DUMMY') - api.lookup_release(hdl='20.500.23456/abc/dummy') + api.lookup_release(pmid="54321") + api.lookup_release(doi="10.123/abc") + api.lookup_release(isbn13="978-3-16-148410-0") + api.lookup_release(arxiv="1905.03769v1") + api.lookup_release(jstor="1819117828") + api.lookup_release(ark="ark:/13030/m53r5pzm") + api.lookup_release(mag="992489213") + api.lookup_release(hdl="20.500.23456/ABC/DUMMY") + api.lookup_release(hdl="20.500.23456/abc/dummy") # failed lookup exception type try: - api.lookup_release(pmid='5432100') + api.lookup_release(pmid="5432100") except fatcat_openapi_client.rest.ApiException as ae: assert ae.status == 404 assert "DatabaseRowNotFound" in ae.body # failed lookup formatting try: - api.lookup_release(doi='blah') + api.lookup_release(doi="blah") except fatcat_openapi_client.rest.ApiException as ae: assert ae.status == 400 assert "MalformedExternalId" in ae.body - r1 = api.get_release('aaaaaaaaaaaaarceaaaaaaaaai') + r1 = api.get_release("aaaaaaaaaaaaarceaaaaaaaaai") assert r1.title.startswith("A bigger example") assert len(r1.refs) == 5 assert r1.contribs[14].role == "editor" @@ -174,6 +164,7 @@ def test_release_examples(api): api.get_release_filesets(r1.ident) api.get_release_webcaptures(r1.ident) + def test_empty_fields(api): eg = quick_eg(api) @@ -181,33 +172,43 @@ def test_empty_fields(api): r1 = ReleaseEntity( title="something", contribs=[ReleaseContrib(raw_name="somebody")], - ext_ids=ReleaseExtIds()) + ext_ids=ReleaseExtIds(), + ) api.create_release(eg.editgroup_id, r1) with pytest.raises(fatcat_openapi_client.rest.ApiException): r2 = ReleaseEntity(title="", ext_ids=ReleaseExtIds()) api.create_release(eg.editgroup_id, r2) with pytest.raises(fatcat_openapi_client.rest.ApiException): - r2 = ReleaseEntity(title="something", contribs=[ReleaseContrib(raw_name="")], ext_ids=ReleaseExtIds()) + r2 = ReleaseEntity( + title="something", contribs=[ReleaseContrib(raw_name="")], ext_ids=ReleaseExtIds() + ) api.create_release(eg.editgroup_id, r2) + def test_controlled_vocab(api): eg = quick_eg(api) - r1 = ReleaseEntity(title="something", release_type="journal-thingie", ext_ids=ReleaseExtIds()) + r1 = ReleaseEntity( + title="something", release_type="journal-thingie", ext_ids=ReleaseExtIds() + ) with pytest.raises(fatcat_openapi_client.rest.ApiException): api.create_release(eg.editgroup_id, r1) r1.release_type = "article" api.create_release(eg.editgroup_id, r1) - r2 = ReleaseEntity(title="something else", release_stage="pre-print", ext_ids=ReleaseExtIds()) + r2 = ReleaseEntity( + title="something else", release_stage="pre-print", ext_ids=ReleaseExtIds() + ) with pytest.raises(fatcat_openapi_client.rest.ApiException): api.create_release(eg.editgroup_id, r2) r2.release_stage = "published" api.create_release(eg.editgroup_id, r2) - r3 = ReleaseEntity(title="something else", withdrawn_status="boondogle", ext_ids=ReleaseExtIds()) + r3 = ReleaseEntity( + title="something else", withdrawn_status="boondogle", ext_ids=ReleaseExtIds() + ) with pytest.raises(fatcat_openapi_client.rest.ApiException): api.create_release(eg.editgroup_id, r3) r3.withdrawn_status = "spam" diff --git a/python/tests/api_webcaptures.py b/python/tests/api_webcaptures.py index 510d398c..36bf40e9 100644 --- a/python/tests/api_webcaptures.py +++ b/python/tests/api_webcaptures.py @@ -1,4 +1,3 @@ - import datetime import pytest @@ -13,13 +12,13 @@ def test_webcapture(api): r1edit = api.create_release(eg.editgroup_id, r1) wc1 = WebcaptureEntity( - original_url = "http://example.site", - #timestamp = "2012-01-02T03:04:05Z", - timestamp = datetime.datetime.now(datetime.timezone.utc), - cdx = [ + original_url="http://example.site", + # timestamp = "2012-01-02T03:04:05Z", + timestamp=datetime.datetime.now(datetime.timezone.utc), + cdx=[ WebcaptureCdxLine( surt="site,example,)/data/thing.tar.gz", - #timestamp="2012-01-02T03:04:05Z", + # timestamp="2012-01-02T03:04:05Z", timestamp=datetime.datetime.now(datetime.timezone.utc), url="http://example.site/data/thing.tar.gz", mimetype="application/gzip", @@ -30,7 +29,7 @@ def test_webcapture(api): ), WebcaptureCdxLine( surt="site,example,)/README.md", - #timestamp="2012-01-02T03:04:05Z", + # timestamp="2012-01-02T03:04:05Z", timestamp=datetime.datetime.now(datetime.timezone.utc), url="http://example.site/README.md", mimetype="text/markdown", @@ -40,10 +39,10 @@ def test_webcapture(api): sha256="429bcafa4d3d0072d5b2511e12c85c1aac1d304011d1c406da14707f7b9cd905", ), ], - archive_urls = [ + archive_urls=[ FileUrl(rel="wayback", url="https://web.archive.org/web/"), ], - release_ids = [r1edit.ident], + release_ids=[r1edit.ident], extra=dict(c=1, b=2), edit_extra=dict(test_key="webcaptures rule"), ) @@ -61,9 +60,9 @@ def test_webcapture(api): # check that fields match # I don't know why these aren't equal... - #print(wc1.archive_urls) - #print(wc2.archive_urls) - #assert wc1.archive_urls == wc2.archive_urls + # print(wc1.archive_urls) + # print(wc2.archive_urls) + # assert wc1.archive_urls == wc2.archive_urls assert wc1.archive_urls[0].rel == wc2.archive_urls[0].rel assert wc1.archive_urls[0].url == wc2.archive_urls[0].url assert wc1.cdx[0] == wc2.cdx[0] @@ -89,14 +88,15 @@ def test_webcapture(api): wc2 = api.get_webcapture(wc2.ident) assert wc2.state == "deleted" + def test_webcapture_examples(api): - wc3 = api.get_webcapture('aaaaaaaaaaaaa53xaaaaaaaaam') + wc3 = api.get_webcapture("aaaaaaaaaaaaa53xaaaaaaaaam") assert wc3.releases is None - wc3 = api.get_webcapture('aaaaaaaaaaaaa53xaaaaaaaaam', expand="releases") + wc3 = api.get_webcapture("aaaaaaaaaaaaa53xaaaaaaaaam", expand="releases") - assert wc3.cdx[0].surt == 'org,asheesh)/' - assert wc3.cdx[1].sha1 == 'a637f1d27d9bcb237310ed29f19c07e1c8cf0aa5' - assert wc3.archive_urls[1].rel == 'warc' + assert wc3.cdx[0].surt == "org,asheesh)/" + assert wc3.cdx[1].sha1 == "a637f1d27d9bcb237310ed29f19c07e1c8cf0aa5" + assert wc3.archive_urls[1].rel == "warc" assert wc3.releases[0].ident assert wc3.releases[0].abstracts is None assert wc3.releases[0].refs is None @@ -110,44 +110,60 @@ def test_bad_webcapture(api): good = WebcaptureEntity( original_url="http://example.site/123.jpg", timestamp="2012-01-02T03:04:05Z", - cdx=[WebcaptureCdxLine( - surt="site,example,)/123.jpg", - url="http://example.site/123.jpg", - sha1="455face3598611458efe1f072e58624790a67266", - timestamp="2012-01-02T03:04:05Z")]) + cdx=[ + WebcaptureCdxLine( + surt="site,example,)/123.jpg", + url="http://example.site/123.jpg", + sha1="455face3598611458efe1f072e58624790a67266", + timestamp="2012-01-02T03:04:05Z", + ) + ], + ) bad_list = [ # uncomment to "test the test" - #good, + # good, # CDX timestamp format WebcaptureEntity( original_url="http://example.site/123.jpg", timestamp="2012-01-02T03:04:05Z", - cdx=[WebcaptureCdxLine( - surt="site,example,)/123.jpg", - url="http://example.site/123.jpg", - sha1="455face3598611458efe1f072e58624790a67266", - size=123, - timestamp="20120102030405")]), + cdx=[ + WebcaptureCdxLine( + surt="site,example,)/123.jpg", + url="http://example.site/123.jpg", + sha1="455face3598611458efe1f072e58624790a67266", + size=123, + timestamp="20120102030405", + ) + ], + ), # CDX timestamp format (int) WebcaptureEntity( original_url="http://example.site/123.jpg", timestamp="2012-01-02T03:04:05Z", - cdx=[WebcaptureCdxLine( - surt="site,example,)/123.jpg", - url="http://example.site/123.jpg", - sha1="455face3598611458efe1f072e58624790a67266", - timestamp=20120102030405)]), + cdx=[ + WebcaptureCdxLine( + surt="site,example,)/123.jpg", + url="http://example.site/123.jpg", + sha1="455face3598611458efe1f072e58624790a67266", + timestamp=20120102030405, + ) + ], + ), # negative size WebcaptureEntity( original_url="http://example.site/123.jpg", timestamp="2012-01-02T03:04:05Z", - cdx=[WebcaptureCdxLine( - surt="site,example,)/123.jpg", - url="http://example.site/123.jpg", - sha1="455face3598611458efe1f072e58624790a67266", - size=-123, - timestamp="20120102030405")]), + cdx=[ + WebcaptureCdxLine( + surt="site,example,)/123.jpg", + url="http://example.site/123.jpg", + sha1="455face3598611458efe1f072e58624790a67266", + size=-123, + timestamp="20120102030405", + ) + ], + ), ] api.create_webcapture(eg.editgroup_id, good) @@ -160,21 +176,27 @@ def test_bad_webcapture(api): WebcaptureEntity( original_url="http://example.site/123.jpg", timestamp="2012-01-02T03:04:05Z", - cdx=[WebcaptureCdxLine( - #url="http://example.site/123.jpg", - surt="site,example,)/123.jpg", - sha1="455face3598611458efe1f072e58624790a67266", - timestamp="2012-01-02T03:04:05Z", - )]) + cdx=[ + WebcaptureCdxLine( + # url="http://example.site/123.jpg", + surt="site,example,)/123.jpg", + sha1="455face3598611458efe1f072e58624790a67266", + timestamp="2012-01-02T03:04:05Z", + ) + ], + ) with pytest.raises(ValueError): # missing/empty CDX timestamp WebcaptureEntity( original_url="http://example.site/123.jpg", timestamp="2012-01-02T03:04:05Z", - cdx=[WebcaptureCdxLine( - url="http://example.site/123.jpg", - surt="site,example,)/123.jpg", - sha1="455face3598611458efe1f072e58624790a67266", - #timestamp="2012-01-02T03:04:05Z", - )]) + cdx=[ + WebcaptureCdxLine( + url="http://example.site/123.jpg", + surt="site,example,)/123.jpg", + sha1="455face3598611458efe1f072e58624790a67266", + # timestamp="2012-01-02T03:04:05Z", + ) + ], + ) diff --git a/python/tests/citation_efficiency.py b/python/tests/citation_efficiency.py index 1d57dfeb..2aeb28ea 100644 --- a/python/tests/citation_efficiency.py +++ b/python/tests/citation_efficiency.py @@ -1,4 +1,3 @@ - from fatcat_openapi_client import * from fixtures import * @@ -34,6 +33,7 @@ def test_citation_indexing(api): assert r1.refs[1].index == 1 assert r1.refs[1].key == "third" + def test_citation_targets(api): # invariant to linking citations # also, updates work @@ -56,12 +56,13 @@ def test_citation_targets(api): r2 = api.get_release(r2.ident) assert r2.refs[0].key == "first" assert r2.refs[1].key == "second" - assert r2.refs[0].index == 0 # TODO: one-indexing? + assert r2.refs[0].index == 0 # TODO: one-indexing? assert r2.refs[1].index == 1 assert r2.refs[0].target_release_id is None assert r2.refs[1].target_release_id == r1.ident assert len(r2.refs) == 2 + def test_citation_empty_array(api): # distinction between empty array (no citations) and no array (hidden) @@ -83,17 +84,24 @@ def test_citation_empty_array(api): r1b = api.get_release(r1.ident, hide="refs") assert r1b.refs is None + def test_citation_encoding(api): # escape-only changes (eg, \u1234 whatever for ASCII) r1 = ReleaseEntity(title="citation encoding", ext_ids=ReleaseExtIds()) - title = "title-unicode \\u0050 \\\" " + title = 'title-unicode \\u0050 \\" ' container = "container-unicode ☃︎ ä ö ü スティー" - extra = extra={'a': 1, 'b': 2, 'ö': 3} + extra = extra = {"a": 1, "b": 2, "ö": 3} locator = "p123" r1.refs = [ - ReleaseRef(key="1", year=1923, title=title, container_name=container, - extra=extra, locator=locator), + ReleaseRef( + key="1", + year=1923, + title=title, + container_name=container, + extra=extra, + locator=locator, + ), ReleaseRef(key="2"), ] diff --git a/python/tests/clean_files.py b/python/tests/clean_files.py index c8b935bb..34a00ebb 100644 --- a/python/tests/clean_files.py +++ b/python/tests/clean_files.py @@ -1,4 +1,3 @@ - import copy import pytest @@ -12,6 +11,7 @@ from fatcat_tools.cleanups import FileCleaner def file_cleaner(api): yield FileCleaner(api) + def test_url_cleanups(file_cleaner): f = FileEntity( @@ -20,41 +20,62 @@ def test_url_cleanups(file_cleaner): ) f.urls = [ - FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"), - FileUrl(url="https://web.archive.org/web/None/something.com/blah.pdf", rel="webarchive"), - FileUrl(url="https://archive.org/details/None/something.com/blah.pdf", rel="repository"), + FileUrl( + url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive" + ), + FileUrl( + url="https://web.archive.org/web/None/something.com/blah.pdf", rel="webarchive" + ), + FileUrl( + url="https://archive.org/details/None/something.com/blah.pdf", rel="repository" + ), ] f = file_cleaner.clean_entity(f) # remove None wayback links assert len(f.urls) == 2 for u in f.urls: - assert 'web/None' not in u.url + assert "web/None" not in u.url assert f == file_cleaner.clean_entity(f) assert f == file_cleaner.clean_entity(copy.deepcopy(f)) # rel=repository -> rel=archive for archive.org links - assert f.urls[1].rel == 'archive' + assert f.urls[1].rel == "archive" # short wayback dates f.urls = [ - FileUrl(url="http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"), - FileUrl(url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"), + FileUrl( + url="http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", + rel="webarchive", + ), + FileUrl( + url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", + rel="webarchive", + ), ] f = file_cleaner.clean_entity(f) assert len(f.urls) == 1 - assert f.urls[0].url == 'http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf' + assert ( + f.urls[0].url + == "http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf" + ) assert f == file_cleaner.clean_entity(f) assert f == file_cleaner.clean_entity(copy.deepcopy(f)) f.urls = [ - FileUrl(url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"), + FileUrl( + url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", + rel="webarchive", + ), ] f = file_cleaner.clean_entity(f) assert len(f.urls) == 1 - assert f.urls[0].url == 'http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf' + assert ( + f.urls[0].url + == "http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf" + ) assert f == file_cleaner.clean_entity(f) assert f == file_cleaner.clean_entity(copy.deepcopy(f)) diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index a9b81bda..0950087f 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -1,4 +1,3 @@ - import elasticsearch import fatcat_openapi_client import pytest @@ -9,73 +8,77 @@ import fatcat_web from fatcat_tools import authenticated_api ES_CONTAINER_STATS_RESP = { - 'timed_out': False, - 'aggregations': { - 'container_stats': {'buckets': { - 'is_preserved': {'doc_count': 461939}, - 'in_kbart': {'doc_count': 461939}, - 'in_web': {'doc_count': 2797}, - }}, - 'preservation': { - 'buckets': [ - {'key': 'bright', 'doc_count': 444}, - {'key': 'dark', 'doc_count': 111}, + "timed_out": False, + "aggregations": { + "container_stats": { + "buckets": { + "is_preserved": {"doc_count": 461939}, + "in_kbart": {"doc_count": 461939}, + "in_web": {"doc_count": 2797}, + } + }, + "preservation": { + "buckets": [ + {"key": "bright", "doc_count": 444}, + {"key": "dark", "doc_count": 111}, ], - 'sum_other_doc_count': 0, + "sum_other_doc_count": 0, }, - 'release_type': { - 'buckets': [ - {'key': 'article-journal', 'doc_count': 456}, - {'key': 'book', 'doc_count': 123}, + "release_type": { + "buckets": [ + {"key": "article-journal", "doc_count": 456}, + {"key": "book", "doc_count": 123}, ], - 'sum_other_doc_count': 0, + "sum_other_doc_count": 0, }, }, - 'hits': {'total': 461939, 'hits': [], 'max_score': 0.0}, - '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, - 'took': 50 + "hits": {"total": 461939, "hits": [], "max_score": 0.0}, + "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0}, + "took": 50, } # TODO: this should not be empty ES_CONTAINER_RANDOM_RESP = { - 'timed_out': False, - 'hits': {'total': 461939, 'hits': [], 'max_score': 0.0}, - '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, - 'took': 50 + "timed_out": False, + "hits": {"total": 461939, "hits": [], "max_score": 0.0}, + "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0}, + "took": 50, } ES_RELEASE_EMPTY_RESP = { - 'timed_out': False, - 'hits': {'total': 0, 'hits': [], 'max_score': 0.0}, - '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, - 'took': 50 + "timed_out": False, + "hits": {"total": 0, "hits": [], "max_score": 0.0}, + "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0}, + "took": 50, } + @pytest.fixture def full_app(mocker): load_dotenv(dotenv_path="./example.env") fatcat_web.app.testing = True fatcat_web.app.debug = False - fatcat_web.app.config['WTF_CSRF_ENABLED'] = False + fatcat_web.app.config["WTF_CSRF_ENABLED"] = False # mock out ES client requests, so they at least fail fast fatcat_web.app.es_client = elasticsearch.Elasticsearch("mockbackend") - mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") return fatcat_web.app + @pytest.fixture def app(full_app): return full_app.test_client() + @pytest.fixture def app_admin(app): ADMIN_DEV_TOKEN = "AgEPZGV2LmZhdGNhdC53aWtpAhYyMDE5MDEwMS1kZXYtZHVtbXkta2V5AAImZWRpdG9yX2lkID0gYWFhYWFhYWFhYWFhYmt2a2FhYWFhYWFhYWkAAht0aW1lID4gMjAxOS0wNC0wNFQyMzozMjo0NloAAAYgrN3jjy0mgEqIydTFfsOLYSS55dz6Fh2d1CGMNQFLwcQ=" - rv = app.post('/auth/token_login', - data=dict(token=ADMIN_DEV_TOKEN), - follow_redirects=True) + rv = app.post("/auth/token_login", data=dict(token=ADMIN_DEV_TOKEN), follow_redirects=True) assert rv.status_code == 200 return app + @pytest.fixture def api(): load_dotenv(dotenv_path="./example.env") @@ -83,6 +86,7 @@ def api(): api_client.editor_id = "aaaaaaaaaaaabkvkaaaaaaaaae" return api_client + @pytest.fixture def api_dummy_entities(api): """ @@ -121,13 +125,16 @@ def api_dummy_entities(api): "work": w1, } + def test_get_changelog_entry(api): """Check that fixture is working""" cl = api.get_changelog_entry(1) assert cl + ## Helpers ################################################################## + def quick_eg(api_inst): eg = api_inst.create_editgroup(fatcat_openapi_client.Editgroup()) return eg diff --git a/python/tests/harvest_crossref.py b/python/tests/harvest_crossref.py index fb43e3f6..12ad34fe 100644 --- a/python/tests/harvest_crossref.py +++ b/python/tests/harvest_crossref.py @@ -1,4 +1,3 @@ - import datetime import json @@ -12,13 +11,14 @@ def test_crossref_harvest_date(mocker): # mock out the harvest state object so it doesn't try to actually connect # to Kafka - mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka') + mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka") # mock day request to crossref API - with open('tests/files/crossref_api_works.json', 'r') as f: + with open("tests/files/crossref_api_works.json", "r") as f: crossref_resp = json.loads(f.readline()) - responses.add(responses.GET, 'https://api.crossref.org/works', - json=crossref_resp, status=200) + responses.add( + responses.GET, "https://api.crossref.org/works", json=crossref_resp, status=200 + ) harvester = HarvestCrossrefWorker( kafka_hosts="dummy", @@ -34,7 +34,7 @@ def test_crossref_harvest_date(mocker): assert len(responses.calls) == 1 # ensure email was included in User-Agent - assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers['User-Agent'] + assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers["User-Agent"] # check that correct date param was passed as expected assert "filter=from-update-date%3A2019-02-03" in responses.calls[0].request.url diff --git a/python/tests/harvest_datacite.py b/python/tests/harvest_datacite.py index 0a7584bd..40e5f711 100644 --- a/python/tests/harvest_datacite.py +++ b/python/tests/harvest_datacite.py @@ -1,4 +1,3 @@ - import datetime import json @@ -12,13 +11,12 @@ def test_datacite_harvest_date(mocker): # mock out the harvest state object so it doesn't try to actually connect # to Kafka - mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka') + mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka") # mock day request to datacite API - with open('tests/files/datacite_api.json', 'r') as f: + with open("tests/files/datacite_api.json", "r") as f: resp = json.loads(f.readline()) - responses.add(responses.GET, 'https://api.datacite.org/dois', - json=resp, status=200) + responses.add(responses.GET, "https://api.datacite.org/dois", json=resp, status=200) harvester = HarvestDataciteWorker( kafka_hosts="dummy", @@ -34,10 +32,13 @@ def test_datacite_harvest_date(mocker): assert len(responses.calls) == 1 # ensure email was included in User-Agent - assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers['User-Agent'] + assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers["User-Agent"] # check that correct date param was passed as expected - assert "query=updated%3A%5B2019-02-03T00%3A00%3A00.000Z+TO+2019-02-03T23%3A59%3A59.999Z%5D" in responses.calls[0].request.url + assert ( + "query=updated%3A%5B2019-02-03T00%3A00%3A00.000Z+TO+2019-02-03T23%3A59%3A59.999Z%5D" + in responses.calls[0].request.url + ) # check that we published the expected number of DOI objects were published # to the (mock) kafka topic diff --git a/python/tests/harvest_pubmed.py b/python/tests/harvest_pubmed.py index aba766ba..422870f2 100644 --- a/python/tests/harvest_pubmed.py +++ b/python/tests/harvest_pubmed.py @@ -14,20 +14,20 @@ def test_pubmed_harvest_date(mocker): # mock out the harvest state object so it doesn't try to actually connect # to Kafka - mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka') + mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka") # Mocking a file fetched from FTP, should contain some 'PubmedArticle' elements. # $ zcat tests/files/pubmedsample_2019.xml.gz | grep -c '<PubmedArticle>' # 176 - file_to_retrieve = os.path.join(os.path.dirname(__file__), 'files/pubmedsample_2019.xml.gz') - ftpretr = mocker.patch('fatcat_tools.harvest.pubmed.ftpretr') + file_to_retrieve = os.path.join(os.path.dirname(__file__), "files/pubmedsample_2019.xml.gz") + ftpretr = mocker.patch("fatcat_tools.harvest.pubmed.ftpretr") ftpretr.return_value = file_to_retrieve - test_date = '2020-02-20' + test_date = "2020-02-20" # We'll need one entry in the date_file_map. - generate_date_file_map = mocker.patch('fatcat_tools.harvest.pubmed.generate_date_file_map') - generate_date_file_map.return_value = {test_date: set(['dummy'])} + generate_date_file_map = mocker.patch("fatcat_tools.harvest.pubmed.generate_date_file_map") + generate_date_file_map.return_value = {test_date: set(["dummy"])} # For cleanup. os.remove = mocker.Mock() @@ -41,7 +41,7 @@ def test_pubmed_harvest_date(mocker): harvester.producer = mocker.Mock() harvester.date_file_map = generate_date_file_map() # Since we mock out the FTP fetch, the concrete date does not matter here. - harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d')) + harvester.fetch_date(datetime.datetime.strptime(test_date, "%Y-%m-%d")) # check that we published the expected number of DOI objects were published # to the (mock) kafka topic @@ -49,20 +49,23 @@ def test_pubmed_harvest_date(mocker): assert harvester.producer.flush.call_count == 1 assert os.remove.call_count == 2 + def test_pubmed_harvest_date_no_pmid(mocker): # mock out the harvest state object so it doesn't try to actually connect # to Kafka - mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka') + mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka") - file_to_retrieve = os.path.join(os.path.dirname(__file__), 'files/pubmedsample_no_pmid_2019.xml.gz') - ftpretr = mocker.patch('fatcat_tools.harvest.pubmed.ftpretr') + file_to_retrieve = os.path.join( + os.path.dirname(__file__), "files/pubmedsample_no_pmid_2019.xml.gz" + ) + ftpretr = mocker.patch("fatcat_tools.harvest.pubmed.ftpretr") ftpretr.return_value = file_to_retrieve - test_date = '2020-02-20' + test_date = "2020-02-20" # We'll need one entry in the date_file_map. - generate_date_file_map = mocker.patch('fatcat_tools.harvest.pubmed.generate_date_file_map') - generate_date_file_map.return_value = {test_date: set(['dummy'])} + generate_date_file_map = mocker.patch("fatcat_tools.harvest.pubmed.generate_date_file_map") + generate_date_file_map.return_value = {test_date: set(["dummy"])} harvester = PubmedFTPWorker( kafka_hosts="dummy", @@ -74,4 +77,4 @@ def test_pubmed_harvest_date_no_pmid(mocker): # The file has not PMID, not importable. with pytest.raises(ValueError): - harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d')) + harvester.fetch_date(datetime.datetime.strptime(test_date, "%Y-%m-%d")) diff --git a/python/tests/harvest_state.py b/python/tests/harvest_state.py index ecd276ed..039794b9 100644 --- a/python/tests/harvest_state.py +++ b/python/tests/harvest_state.py @@ -1,4 +1,3 @@ - import datetime from fatcat_tools.harvest import * @@ -18,21 +17,21 @@ def test_harvest_state(): assert hs.next_span() is None hs = HarvestState( - start_date=datetime.date(2000,1,1), - end_date=datetime.date(2000,1,3), + start_date=datetime.date(2000, 1, 1), + end_date=datetime.date(2000, 1, 3), ) assert len(hs.to_process) == 3 hs = HarvestState( - start_date=datetime.date(2000,1,29), - end_date=datetime.date(2000,2,2), + start_date=datetime.date(2000, 1, 29), + end_date=datetime.date(2000, 2, 2), ) assert len(hs.to_process) == 5 hs = HarvestState(catchup_days=0) assert hs.next_span() is None hs.enqueue_period( - start_date=datetime.date(2000,1,1), - end_date=datetime.date(2000,1,3), + start_date=datetime.date(2000, 1, 1), + end_date=datetime.date(2000, 1, 3), ) assert len(hs.to_process) == 3 hs.update('{"completed-date": "2000-01-02"}') diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py index 6f73d817..90e6d05b 100644 --- a/python/tests/import_arabesque.py +++ b/python/tests/import_arabesque.py @@ -1,4 +1,3 @@ - import json import pytest @@ -11,41 +10,51 @@ from fatcat_tools.importers import ArabesqueMatchImporter, JsonLinePusher, Sqlit def arabesque_importer(api): yield ArabesqueMatchImporter(api, extid_type="doi", crawl_id="DUMMY123") + # TODO: use API to check that entities actually created... def test_arabesque_importer_basic(arabesque_importer): - SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() + SqlitePusher( + arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result" + ).run() + def test_arabesque_importer_json(arabesque_importer): - with open('tests/files/arabesque_example.json', 'r') as f: + with open("tests/files/arabesque_example.json", "r") as f: JsonLinePusher(arabesque_importer, f).run() + def test_arabesque_importer(arabesque_importer): last_index = arabesque_importer.api.get_changelog(limit=1)[0].index arabesque_importer.bezerk_mode = True - counts = SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() - assert counts['insert'] == 1 - assert counts['exists'] == 0 - assert counts['skip'] == 490 + counts = SqlitePusher( + arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result" + ).run() + assert counts["insert"] == 1 + assert counts["exists"] == 0 + assert counts["skip"] == 490 # fetch most recent editgroup - change = arabesque_importer.api.get_changelog_entry(index=last_index+1) + change = arabesque_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "identifier/url seedlist" in eg.description.lower() - assert eg.extra['git_rev'] - assert eg.extra['crawl_id'] == "DUMMY123" - assert "fatcat_tools.ArabesqueMatchImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert eg.extra["crawl_id"] == "DUMMY123" + assert "fatcat_tools.ArabesqueMatchImporter" in eg.extra["agent"] # re-insert; should skip arabesque_importer.reset() arabesque_importer.bezerk_mode = False - counts = SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() - assert counts['insert'] == 0 - assert counts['exists'] == 1 - assert counts['skip'] == 490 + counts = SqlitePusher( + arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result" + ).run() + assert counts["insert"] == 0 + assert counts["exists"] == 1 + assert counts["skip"] == 490 + def test_arabesque_dict_parse(arabesque_importer): - with open('tests/files/arabesque_example.json', 'r') as f: + with open("tests/files/arabesque_example.json", "r") as f: raw = json.loads(f.readline()) f = arabesque_importer.parse_record(raw) assert f.sha1 == "bdd78be55800bb1c9a5e47005bac5e4124793c7b" diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py index 9368fb06..2a297c00 100644 --- a/python/tests/import_arxiv.py +++ b/python/tests/import_arxiv.py @@ -1,4 +1,3 @@ - import pytest from bs4 import BeautifulSoup from fixtures import * @@ -12,35 +11,37 @@ def arxiv_importer(api): ari._test_override = True return ari + def test_arxiv_importer(arxiv_importer): last_index = arxiv_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/arxivraw_1810.09584.xml', 'r') as f: + with open("tests/files/arxivraw_1810.09584.xml", "r") as f: arxiv_importer.bezerk_mode = True counts = Bs4XmlFilePusher(arxiv_importer, f, "record").run() - assert counts['insert'] == 2 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 2 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = arxiv_importer.api.get_changelog_entry(index=last_index+1) + change = arxiv_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "arxiv" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.ArxivRawImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.ArxivRawImporter" in eg.extra["agent"] last_index = arxiv_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/arxivraw_1810.09584.xml', 'r') as f: + with open("tests/files/arxivraw_1810.09584.xml", "r") as f: arxiv_importer.bezerk_mode = False arxiv_importer.reset() counts = Bs4XmlFilePusher(arxiv_importer, f, "record").run() - assert counts['insert'] == 0 - assert counts['exists'] == 2 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 2 + assert counts["skip"] == 0 assert last_index == arxiv_importer.api.get_changelog(limit=1)[0].index + def test_arxiv_xml_parse(arxiv_importer): - with open('tests/files/arxivraw_1810.09584.xml', 'r') as f: + with open("tests/files/arxivraw_1810.09584.xml", "r") as f: soup = BeautifulSoup(f, "xml") r = arxiv_importer.parse_record(soup.find_all("record")[0]) @@ -67,11 +68,15 @@ def test_arxiv_xml_parse(arxiv_importer): assert r2.release_year == 2019 assert str(r2.release_date) == "2019-01-13" # matched by ISSN, so shouldn't be in there? - #assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London" + # assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London" assert len(r1.contribs) == 4 - assert r1.extra['arxiv']['categories'] == ['cond-mat.stat-mech', 'physics.bio-ph', 'physics.data-an'] - assert r1.extra['arxiv']['base_id'] == '1810.09584' - assert r1.extra['superceded'] is True + assert r1.extra["arxiv"]["categories"] == [ + "cond-mat.stat-mech", + "physics.bio-ph", + "physics.data-an", + ] + assert r1.extra["arxiv"]["base_id"] == "1810.09584" + assert r1.extra["superceded"] is True assert r1.contribs[0].raw_name == "Raphael Chetrite" assert r1.contribs[0].role == "author" @@ -90,11 +95,15 @@ def test_arxiv_xml_parse(arxiv_importer): assert r1.abstracts == r2.abstracts - assert r1.extra['arxiv']['comments'] == "7 pages, 2 figures" - assert r1.extra['arxiv']['categories'] == ["cond-mat.stat-mech", "physics.bio-ph", "physics.data-an"] + assert r1.extra["arxiv"]["comments"] == "7 pages, 2 figures" + assert r1.extra["arxiv"]["categories"] == [ + "cond-mat.stat-mech", + "physics.bio-ph", + "physics.data-an", + ] - assert not r2.extra.get('superceded') - r2.extra['superceded'] = True + assert not r2.extra.get("superceded") + r2.extra["superceded"] = True assert r1.extra == r2.extra assert not r1.refs diff --git a/python/tests/import_common.py b/python/tests/import_common.py index 2b7c935a..cd89f914 100644 --- a/python/tests/import_common.py +++ b/python/tests/import_common.py @@ -1,4 +1,3 @@ - import datetime import json from typing import Any @@ -17,15 +16,16 @@ from fatcat_tools.transforms import entity_to_dict @pytest.fixture(scope="function") def entity_importer(api, mocker) -> Any: es_client = elasticsearch.Elasticsearch("mockbackend") - mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") yield EntityImporter(api, es_client=es_client) + def test_fuzzy_match_none(entity_importer, mocker) -> None: """ Simple ES-mocked test for "no search results" case """ - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)), (200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)), @@ -39,6 +39,7 @@ def test_fuzzy_match_none(entity_importer, mocker) -> None: resp = entity_importer.match_existing_release_fuzzy(release) assert resp is None + def test_fuzzy_match_different(entity_importer, mocker) -> None: """ Simple fuzzycat-mocked test for "strong match" case @@ -60,7 +61,7 @@ def test_fuzzy_match_different(entity_importer, mocker) -> None: ext_ids=ReleaseExtIds(), ) - match_raw = mocker.patch('fatcat_tools.importers.common.match_release_fuzzy') + match_raw = mocker.patch("fatcat_tools.importers.common.match_release_fuzzy") match_raw.side_effect = [[r3, r2, r3, r2]] resp = entity_importer.match_existing_release_fuzzy(r1) assert (resp[0], resp[2]) == ("STRONG", r2) diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index a8a1ec44..eb931eb1 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -1,4 +1,3 @@ - import gzip import json @@ -10,67 +9,79 @@ from fatcat_tools.importers import CrossrefImporter, JsonLinePusher @pytest.fixture(scope="function") def crossref_importer(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True) + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield CrossrefImporter( + api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True + ) + @pytest.fixture(scope="function") def crossref_importer_existing(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False) + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield CrossrefImporter( + api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False + ) + -@pytest.mark.skip(reason="slow/huge crossref import is a corner-case and slows tests significantly") +@pytest.mark.skip( + reason="slow/huge crossref import is a corner-case and slows tests significantly" +) def test_crossref_importer_huge(crossref_importer): last_index = crossref_importer.api.get_changelog(limit=1)[0].index - with gzip.open('tests/files/huge_crossref_doi.json.gz', 'rt') as f: + with gzip.open("tests/files/huge_crossref_doi.json.gz", "rt") as f: crossref_importer.bezerk_mode = True line = f.readline() mega_blob = [line for i in range(95)] counts = JsonLinePusher(crossref_importer, mega_blob).run() - assert counts['insert'] == 95 - change = crossref_importer.api.get_changelog_entry(index=last_index+1) + assert counts["insert"] == 95 + change = crossref_importer.api.get_changelog_entry(index=last_index + 1) release = crossref_importer.api.get_release(change.editgroup.edits.releases[0].ident) assert len(release.contribs) == 1014 + def test_crossref_importer(crossref_importer): last_index = crossref_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + with open("tests/files/crossref-works.2018-01-21.badsample.json", "r") as f: crossref_importer.bezerk_mode = True counts = JsonLinePusher(crossref_importer, f).run() - assert counts['insert'] == 14 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 14 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = crossref_importer.api.get_changelog_entry(index=last_index+1) + change = crossref_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "crossref" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.CrossrefImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.CrossrefImporter" in eg.extra["agent"] last_index = crossref_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + with open("tests/files/crossref-works.2018-01-21.badsample.json", "r") as f: crossref_importer.bezerk_mode = False crossref_importer.reset() counts = JsonLinePusher(crossref_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 14 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 14 + assert counts["skip"] == 0 assert last_index == crossref_importer.api.get_changelog(limit=1)[0].index + def test_crossref_mappings(crossref_importer): - assert crossref_importer.map_release_type('journal-article') == "article-journal" - assert crossref_importer.map_release_type('asdf') is None - assert crossref_importer.map_release_type('book-series') is None - assert crossref_importer.map_release_type('standard') == 'standard' + assert crossref_importer.map_release_type("journal-article") == "article-journal" + assert crossref_importer.map_release_type("asdf") is None + assert crossref_importer.map_release_type("book-series") is None + assert crossref_importer.map_release_type("standard") == "standard" + def test_crossref_importer_create(crossref_importer): crossref_importer.create_containers = True - with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + with open("tests/files/crossref-works.2018-01-21.badsample.json", "r") as f: JsonLinePusher(crossref_importer, f).run() + def test_crossref_dict_parse(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: + with open("tests/files/crossref-works.single.json", "r") as f: # not a single line raw = json.loads(f.read()) r = crossref_importer.parse_record(raw) @@ -78,7 +89,10 @@ def test_crossref_dict_parse(crossref_importer): JsonLinePusher(crossref_importer, [json.dumps(raw)]).run() print(r.extra) - assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" + assert ( + r.title + == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" + ) assert r.publisher == "Wiley-Blackwell" assert r.release_type == "article-journal" assert r.release_stage == "published" @@ -88,22 +102,22 @@ def test_crossref_dict_parse(crossref_importer): assert r.ext_ids.isbn13 == "978-3-16-148410-0" assert r.language == "fr" assert r.subtitle is None - assert 'subtitle' not in r.extra - assert 'subtitle' not in r.extra['crossref'] - assert 'funder' not in r.extra - assert 'funder' not in r.extra['crossref'] + assert "subtitle" not in r.extra + assert "subtitle" not in r.extra["crossref"] + assert "funder" not in r.extra + assert "funder" not in r.extra["crossref"] # matched by ISSN, so shouldn't be in there - #assert extra['container_name'] == "International Journal of Quantum Chemistry" - assert r.extra['aliases'] == ["some other title"] - assert r.extra['crossref']['archive'] == ['Portico', 'LOCKSS'] + # assert extra['container_name'] == "International Journal of Quantum Chemistry" + assert r.extra["aliases"] == ["some other title"] + assert r.extra["crossref"]["archive"] == ["Portico", "LOCKSS"] assert len(r.contribs) == 6 assert r.contribs[0].raw_name == "Marcelo D. Radicioni" assert r.contribs[0].given_name == "Marcelo D." assert r.contribs[0].surname == "Radicioni" assert r.contribs[0].index == 0 - assert r.contribs[0].extra['seq'] == "first" + assert r.contribs[0].extra["seq"] == "first" assert r.contribs[1].raw_affiliation == "Some University" - assert r.contribs[1].extra['more_affiliations'] == ["Some Department"] + assert r.contribs[1].extra["more_affiliations"] == ["Some Department"] assert r.contribs[1].role == "author" assert r.contribs[4].role == "editor" assert r.contribs[4].index is None @@ -115,32 +129,47 @@ def test_crossref_dict_parse(crossref_importer): assert r.refs[0].year == 1972 assert r.refs[0].locator == "1734" assert r.refs[0].container_name == "J. Chem. Phys." - assert r.refs[0].extra == {"volume": "57", "authors": ["Swenson"], "doi": "10.1063/1.1678462", "medium": "DVD"} - assert r.refs[2].key == 'BIB3' - assert r.refs[2].extra.get('author') is None - assert r.refs[2].container_name == "Hypervirial Theorem's, Lecture Notes in Chemistry <3" - assert r.refs[3].container_name == "Large Order Perturbation Theory and Summation Methods in Quantum Mechanics, Lecture Notes in Chemistry" + assert r.refs[0].extra == { + "volume": "57", + "authors": ["Swenson"], + "doi": "10.1063/1.1678462", + "medium": "DVD", + } + assert r.refs[2].key == "BIB3" + assert r.refs[2].extra.get("author") is None + assert ( + r.refs[2].container_name == "Hypervirial Theorem's, Lecture Notes in Chemistry <3" + ) + assert ( + r.refs[3].container_name + == "Large Order Perturbation Theory and Summation Methods in Quantum Mechanics, Lecture Notes in Chemistry" + ) + def test_crossref_subtitle(crossref_importer): """ Tests new subtitle field, explicitly """ - with open('tests/files/crossref-works.single.json', 'r') as f: + with open("tests/files/crossref-works.single.json", "r") as f: # not a single line raw = json.loads(f.read()) - raw['subtitle'] = ["some bogus subtitle", "blah"] + raw["subtitle"] = ["some bogus subtitle", "blah"] r = crossref_importer.parse_record(raw) # ensure the API server is ok with format JsonLinePusher(crossref_importer, [json.dumps(raw)]).run() print(r.extra) - assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" + assert ( + r.title + == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" + ) assert r.subtitle == "some bogus subtitle" - assert 'subtitle' not in r.extra - assert 'subtitle' not in r.extra['crossref'] + assert "subtitle" not in r.extra + assert "subtitle" not in r.extra["crossref"] + def test_stateful_checking(crossref_importer_existing): - with open('tests/files/crossref-works.single.json', 'r') as f: + with open("tests/files/crossref-works.single.json", "r") as f: # not a single line, a whole document raw = f.read() # might not exist yet... diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index e1c79bc4..220dc0f6 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -54,9 +54,7 @@ def test_datacite_importer_huge(datacite_importer): counts = JsonLinePusher(datacite_importer, f).run() assert counts["insert"] == 998 change = datacite_importer.api.get_changelog_entry(index=last_index + 1) - release = datacite_importer.api.get_release( - change.editgroup.edits.releases[0].ident - ) + release = datacite_importer.api.get_release(change.editgroup.edits.releases[0].ident) assert len(release.contribs) == 3 @@ -76,17 +74,26 @@ def test_find_original_language_title(): ), Case( "empty subdict is ignored", - {"title": "Noise Reduction", "original_language_title": {},}, + { + "title": "Noise Reduction", + "original_language_title": {}, + }, None, ), Case( "unknown subdict keys are ignored", - {"title": "Noise Reduction", "original_language_title": {"broken": "kv"},}, + { + "title": "Noise Reduction", + "original_language_title": {"broken": "kv"}, + }, None, ), Case( "original string", - {"title": "Noise Reduction", "original_language_title": "Подавление шума",}, + { + "title": "Noise Reduction", + "original_language_title": "Подавление шума", + }, "Подавление шума", ), Case( @@ -163,7 +170,10 @@ def test_parse_datacite_titles(): ), Case( "multiple titles, first wins", - [{"title": "Total carbon dioxide"}, {"title": "Meeting Heterogeneity"},], + [ + {"title": "Total carbon dioxide"}, + {"title": "Meeting Heterogeneity"}, + ], ("Total carbon dioxide", None, None), ), Case( @@ -201,7 +211,9 @@ def test_parse_datacite_titles(): [ { "title": "Total carbon dioxide", - "original_language_title": {"__content__": "Total carbon dioxide",}, + "original_language_title": { + "__content__": "Total carbon dioxide", + }, }, {"title": "Station TT043_7-9", "titleType": "Subtitle"}, ], @@ -239,9 +251,7 @@ def test_parse_datacite_dates(): Case("year only yields year only", [{"date": "2019"}], (None, None, 2019)), Case("int year", [{"date": 2019}], (None, None, 2019)), Case("first wins", [{"date": "2019"}, {"date": "2020"}], (None, None, 2019)), - Case( - "skip bogus year", [{"date": "abc"}, {"date": "2020"}], (None, None, 2020) - ), + Case("skip bogus year", [{"date": "abc"}, {"date": "2020"}], (None, None, 2020)), Case( "first with type", [{"date": "2019", "dateType": "Accepted"}, {"date": "2020"}], @@ -249,7 +259,9 @@ def test_parse_datacite_dates(): ), Case( "full date", - [{"date": "2019-12-01", "dateType": "Valid"},], + [ + {"date": "2019-12-01", "dateType": "Valid"}, + ], (datetime.date(2019, 12, 1), 12, 2019), ), Case( @@ -294,22 +306,30 @@ def test_parse_datacite_dates(): ), Case( "fuzzy year only", - [{"date": "Year 2010", "dateType": "Issued"},], + [ + {"date": "Year 2010", "dateType": "Issued"}, + ], (None, None, 2010), ), Case( "fuzzy year and month", - [{"date": "Year 2010 Feb", "dateType": "Issued"},], + [ + {"date": "Year 2010 Feb", "dateType": "Issued"}, + ], (None, 2, 2010), ), Case( "fuzzy year, month, day", - [{"date": "Year 2010 Feb 24", "dateType": "Issued"},], + [ + {"date": "Year 2010 Feb 24", "dateType": "Issued"}, + ], (datetime.date(2010, 2, 24), 2, 2010), ), Case( "ignore broken date", - [{"date": "Febrrr 45", "dateType": "Updated"},], + [ + {"date": "Febrrr 45", "dateType": "Updated"}, + ], (None, None, None), ), ] @@ -317,13 +337,19 @@ def test_parse_datacite_dates(): result = parse_datacite_dates(case.input) assert result == case.result, case.about + def test_datacite_spammy_title(datacite_importer): - r = datacite_importer.parse_record({"title": """HD! My Hero academia + r = datacite_importer.parse_record( + { + "title": """HD! My Hero academia Heroes: Rising [2020]Full Movie Watch Online And Free Download""", - "attributes": {"doi": "10.1234/1234"}}) + "attributes": {"doi": "10.1234/1234"}, + } + ) assert r is False + def test_datacite_importer(datacite_importer): last_index = datacite_importer.api.get_changelog(limit=1)[0].index with open("tests/files/datacite_sample.jsonl", "r") as f: @@ -361,9 +387,7 @@ def test_datacite_dict_parse(datacite_importer): print(r.extra) assert r.title == "Triticum turgidum L. subsp. durum (Desf.) Husn. 97090" - assert ( - r.publisher == "International Centre for Agricultural Research in Dry Areas" - ) + assert r.publisher == "International Centre for Agricultural Research in Dry Areas" assert r.release_type == "article" assert r.release_stage == "published" assert r.license_slug is None @@ -424,9 +448,7 @@ def test_index_form_to_display_name(): ), Case("Solomon, P. M.", "P. M. Solomon"), Case("Sujeevan Ratnasingham", "Sujeevan Ratnasingham"), - Case( - "Paul Stöckli (1906-1991), Künstler", "Paul Stöckli (1906-1991), Künstler" - ), + Case("Paul Stöckli (1906-1991), Künstler", "Paul Stöckli (1906-1991), Künstler"), ] for c in cases: @@ -450,9 +472,7 @@ def test_lookup_license_slug(): "https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess.xhtml", "ADS-UK", ), - Case( - "https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess", "ADS-UK" - ), + Case("https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess", "ADS-UK"), Case("https://creativecommons.org/public-domain/cc0", "CC-0"), Case("https://creativecommons.org/publicdomain/zero/1.0", "CC-0"), Case("https://creativecommons.org/share-your-work/public-domain/cc0", "CC-0"), diff --git a/python/tests/import_dblp.py b/python/tests/import_dblp.py index 4fb178b8..399523e3 100644 --- a/python/tests/import_dblp.py +++ b/python/tests/import_dblp.py @@ -1,4 +1,3 @@ - import io import pytest @@ -15,39 +14,45 @@ from fatcat_tools.importers import ( @pytest.fixture(scope="function") def dblp_importer(api): - with open('tests/files/dblp_container_map.tsv', 'r') as tsv_file: + with open("tests/files/dblp_container_map.tsv", "r") as tsv_file: yield DblpReleaseImporter(api, tsv_file, bezerk_mode=True) + @pytest.fixture(scope="function") def dblp_container_importer(api): - with open('tests/files/dblp_container_map.tsv', 'r') as tsv_file: - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield DblpContainerImporter(api, issn_file, tsv_file, io.StringIO(), bezerk_mode=True) + with open("tests/files/dblp_container_map.tsv", "r") as tsv_file: + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield DblpContainerImporter( + api, issn_file, tsv_file, io.StringIO(), bezerk_mode=True + ) + def test_dblp_importer(dblp_importer): last_index = dblp_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/example_dblp.xml', 'rb') as f: + with open("tests/files/example_dblp.xml", "rb") as f: dblp_importer.bezerk_mode = True - counts = Bs4XmlLargeFilePusher(dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True).run() - #print(counts) - assert counts['insert'] == 3 - assert counts['exists'] == 0 - assert counts['skip'] == 1 + counts = Bs4XmlLargeFilePusher( + dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True + ).run() + # print(counts) + assert counts["insert"] == 3 + assert counts["exists"] == 0 + assert counts["skip"] == 1 # fetch most recent editgroup - change = dblp_importer.api.get_changelog_entry(index=last_index+1) + change = dblp_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "dblp" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.DblpReleaseImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.DblpReleaseImporter" in eg.extra["agent"] # check that entity name mangling was fixed on import eg = dblp_importer.api.get_editgroup(eg.editgroup_id) release = dblp_importer.api.get_release(eg.edits.releases[0].ident) for r_edit in eg.edits.releases: release = dblp_importer.api.get_release(r_edit.ident) - #print(release.ext_ids.dblp) + # print(release.ext_ids.dblp) if release.ext_ids.dblp == "conf/er/Norrie08": break assert release.ext_ids.dblp == "conf/er/Norrie08" @@ -55,34 +60,37 @@ def test_dblp_importer(dblp_importer): assert release.contribs[1].raw_name == "Michael H. Böhlen" last_index = dblp_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/example_dblp.xml', 'rb') as f: + with open("tests/files/example_dblp.xml", "rb") as f: dblp_importer.bezerk_mode = False dblp_importer.reset() - counts = Bs4XmlLargeFilePusher(dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True).run() - #print(counts) - assert counts['insert'] == 0 - assert counts['exists'] == 3 - assert counts['skip'] == 1 + counts = Bs4XmlLargeFilePusher( + dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True + ).run() + # print(counts) + assert counts["insert"] == 0 + assert counts["exists"] == 3 + assert counts["skip"] == 1 assert last_index == dblp_importer.api.get_changelog(limit=1)[0].index + def test_dblp_container_importer(dblp_container_importer): last_index = dblp_container_importer.api.get_changelog(limit=1)[0].index output_tsv_map = io.StringIO() - with open('tests/files/example_dblp_containers.json', 'r') as f: + with open("tests/files/example_dblp_containers.json", "r") as f: dblp_container_importer.bezerk_mode = True dblp_container_importer.dblp_container_map_output = output_tsv_map counts = JsonLinePusher(dblp_container_importer, f).run() - assert counts['insert'] == 10 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 10 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = dblp_container_importer.api.get_changelog_entry(index=last_index+1) + change = dblp_container_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "dblp" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.DblpContainerImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.DblpContainerImporter" in eg.extra["agent"] # check that entity name mangling was fixed on import eg = dblp_container_importer.api.get_editgroup(eg.editgroup_id) @@ -93,34 +101,35 @@ def test_dblp_container_importer(dblp_container_importer): assert container.name == "Atlantis Thinking Machines" assert container.issnl == "1877-3273" assert container.container_type == "book-series" - assert container.extra['dblp']['prefix'] == "series/atlantis" - assert container.extra['urls'] == ["http://link.springer.com/bookseries/10077"] + assert container.extra["dblp"]["prefix"] == "series/atlantis" + assert container.extra["urls"] == ["http://link.springer.com/bookseries/10077"] last_index = dblp_container_importer.api.get_changelog(limit=1)[0].index output_tsv_map.seek(0) - #print(output_tsv_map.read()) - #output_tsv_map.seek(0) - with open('tests/files/example_dblp_containers.json', 'r') as f: + # print(output_tsv_map.read()) + # output_tsv_map.seek(0) + with open("tests/files/example_dblp_containers.json", "r") as f: dblp_container_importer.reset() dblp_container_importer.bezerk_mode = False dblp_container_importer.dblp_container_map_output = io.StringIO() dblp_container_importer.read_dblp_container_map_file(output_tsv_map) counts = JsonLinePusher(dblp_container_importer, f).run() print(counts) - assert counts['insert'] == 0 - assert counts['exists'] == 10 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 10 + assert counts["skip"] == 0 assert last_index == dblp_container_importer.api.get_changelog(limit=1)[0].index + def test_dblp_xml_parse(dblp_importer): - with open('tests/files/example_dblp_article.xml', 'r') as f: + with open("tests/files/example_dblp_article.xml", "r") as f: soup = BeautifulSoup(f, "xml") r1 = dblp_importer.parse_record(soup.find_all("article")[0]) assert r1.title == "Jim Gray, astronomer" assert r1.contribs[0].raw_name == "Alexander S. Szalay" # tested above, in LXML import path - #assert r1.contribs[1].raw_name == "Michael H. Bohlen" + # assert r1.contribs[1].raw_name == "Michael H. Bohlen" assert r1.contribs[2].raw_name == "Nicolas Heist" # XXX: assert r1.contribs[2].extra['orcid'] == "0000-0002-4354-9138" assert r1.contribs[3].raw_name == "Jens Lehmann" @@ -130,5 +139,5 @@ def test_dblp_xml_parse(dblp_importer): assert r1.issue == "11" assert r1.volume == "51" assert r1.release_year == 2008 - #assert r1.extra['container_name'] == "Commun. ACM" - assert r1.extra['dblp']['type'] == "article" + # assert r1.extra['container_name'] == "Commun. ACM" + assert r1.extra["dblp"]["type"] == "article" diff --git a/python/tests/import_doaj.py b/python/tests/import_doaj.py index 4cd3b518..9c4ba552 100644 --- a/python/tests/import_doaj.py +++ b/python/tests/import_doaj.py @@ -1,4 +1,3 @@ - import datetime import json @@ -14,7 +13,7 @@ from fatcat_tools.transforms import entity_to_dict @pytest.fixture(scope="function") def doaj_importer(api, mocker): es_client = elasticsearch.Elasticsearch("mockbackend") - mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: yield DoajArticleImporter( api, @@ -23,6 +22,7 @@ def doaj_importer(api, mocker): es_client=es_client, ) + def test_doaj_importer(doaj_importer): last_index = doaj_importer.api.get_changelog(limit=1)[0].index with open("tests/files/example_doaj_articles.json", "r") as f: @@ -60,6 +60,7 @@ def test_doaj_importer(doaj_importer): doaj_importer.api.delete_release(eg.editgroup_id, release_edit.ident) doaj_importer.api.accept_editgroup(eg.editgroup_id) + def test_doaj_importer_existing_doi(doaj_importer): """ One of the DOAJ test entities has a dummy DOI (10.123/abc); this test @@ -118,12 +119,16 @@ def test_doaj_importer_existing_doi(doaj_importer): ) doaj_importer.api.accept_editgroup(eg.editgroup_id) + def test_doaj_dict_parse(doaj_importer): with open("tests/files/example_doaj_articles.json", "r") as f: raw = json.loads(f.readline()) r = doaj_importer.parse_record(raw) - assert r.title == "Effect of hydrogen on tensile properties and fracture behavior of PH 13-8 Mo steel" + assert ( + r.title + == "Effect of hydrogen on tensile properties and fracture behavior of PH 13-8 Mo steel" + ) assert r.publisher == "Elsevier" assert r.release_type == "article-journal" assert r.release_stage == "published" @@ -140,7 +145,7 @@ def test_doaj_dict_parse(doaj_importer): assert r.version is None assert r.language == "en" # matched by ISSN, so wouldn't be defined normally - assert r.extra['container_name'] == "Materials & Design" + assert r.extra["container_name"] == "Materials & Design" assert len(r.abstracts) == 1 assert len(r.abstracts[0].content) == 1033 assert len(r.contribs) == 5 @@ -149,6 +154,6 @@ def test_doaj_dict_parse(doaj_importer): assert r.contribs[0].surname is None assert not r.refs - #print(r.extra) - assert r.extra['release_month'] == 10 - assert r.extra['country'] == 'gb' + # print(r.extra) + assert r.extra["release_month"] == 10 + assert r.extra["country"] == "gb" diff --git a/python/tests/import_file_generic.py b/python/tests/import_file_generic.py index e1d987e8..9eeeb2bb 100644 --- a/python/tests/import_file_generic.py +++ b/python/tests/import_file_generic.py @@ -1,4 +1,3 @@ - import pytest from fatcat_openapi_client import * @@ -23,77 +22,116 @@ def test_file_update_generic(): url_sets = [ # dummy { - 'before': [], - 'after': [], + "before": [], + "after": [], }, # social => academicsocial { - 'before': [ + "before": [ FileUrl(url="https://academic.edu/blah.pdf", rel="social"), ], - 'after': [ + "after": [ FileUrl(url="https://academic.edu/blah.pdf", rel="academicsocial"), ], }, # archive.org repository => archive { - 'before': [ + "before": [ FileUrl(url="https://archive.org/download/item/blah.pdf", rel="repository"), ], - 'after': [ + "after": [ FileUrl(url="https://archive.org/download/item/blah.pdf", rel="archive"), ], }, # :80 in URL is redundant { - 'before': [ - FileUrl(url="http://homepages.math.uic.edu/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", rel="web"), - FileUrl(url="http://homepages.math.uic.edu:80/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", rel="web"), + "before": [ + FileUrl( + url="http://homepages.math.uic.edu/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", + rel="web", + ), + FileUrl( + url="http://homepages.math.uic.edu:80/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", + rel="web", + ), FileUrl(url="http://mit.edu/item/blah.pdf", rel="web"), FileUrl(url="http://mit.edu:80/item/blah.pdf", rel="web"), ], - 'after': [ - FileUrl(url="http://homepages.math.uic.edu/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", rel="web"), + "after": [ + FileUrl( + url="http://homepages.math.uic.edu/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", + rel="web", + ), FileUrl(url="http://mit.edu/item/blah.pdf", rel="web"), ], }, { - 'before': [ + "before": [ FileUrl(url="http://mit.edu:80/item/blah.pdf", rel="web"), ], - 'after': [ + "after": [ FileUrl(url="http://mit.edu:80/item/blah.pdf", rel="web"), ], }, # http/https redundant { - 'before': [ - FileUrl(url="https://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", rel="web"), - FileUrl(url="http://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", rel="web"), + "before": [ + FileUrl( + url="https://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", + rel="web", + ), + FileUrl( + url="http://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", + rel="web", + ), FileUrl(url="https://mit.edu/item/blah.pdf", rel="web"), - FileUrl(url="https://web.archive.org/web/12345542/http://mit.edu/item/blah.pdf", rel="webarchive"), + FileUrl( + url="https://web.archive.org/web/12345542/http://mit.edu/item/blah.pdf", + rel="webarchive", + ), FileUrl(url="http://mit.edu/item/blah.pdf", rel="web"), - FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"), + FileUrl( + url="https://web.archive.org/web/12345542/something.com/blah.pdf", + rel="webarchive", + ), ], - 'after': [ - FileUrl(url="https://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", rel="web"), + "after": [ + FileUrl( + url="https://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", + rel="web", + ), FileUrl(url="https://mit.edu/item/blah.pdf", rel="web"), - FileUrl(url="https://web.archive.org/web/12345542/http://mit.edu/item/blah.pdf", rel="webarchive"), - FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"), + FileUrl( + url="https://web.archive.org/web/12345542/http://mit.edu/item/blah.pdf", + rel="webarchive", + ), + FileUrl( + url="https://web.archive.org/web/12345542/something.com/blah.pdf", + rel="webarchive", + ), ], }, # short /2017/ wayback datetime { - 'before': [ - FileUrl(url="https://web.archive.org/web/2017/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", rel="webarchive"), - FileUrl(url="https://web.archive.org/web/20170922010835/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", rel="webarchive"), + "before": [ + FileUrl( + url="https://web.archive.org/web/2017/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", + rel="webarchive", + ), + FileUrl( + url="https://web.archive.org/web/20170922010835/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", + rel="webarchive", + ), ], - 'after': [ - FileUrl(url="https://web.archive.org/web/20170922010835/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", rel="webarchive"), + "after": [ + FileUrl( + url="https://web.archive.org/web/20170922010835/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", + rel="webarchive", + ), ], }, ] for pair in url_sets: - f1.urls = pair['before'] - assert EntityImporter.generic_file_cleanups(f1).urls == pair['after'] + f1.urls = pair["before"] + assert EntityImporter.generic_file_cleanups(f1).urls == pair["after"] diff --git a/python/tests/import_file_meta.py b/python/tests/import_file_meta.py index 949a1075..2555f351 100644 --- a/python/tests/import_file_meta.py +++ b/python/tests/import_file_meta.py @@ -1,4 +1,3 @@ - import json import pytest @@ -12,35 +11,42 @@ from fatcat_tools.importers import FileMetaImporter, JsonLinePusher def file_meta_importer(api): yield FileMetaImporter(api) + def test_file_meta_importer_basic(file_meta_importer): # insert two file entities api = file_meta_importer.api eg = quick_eg(file_meta_importer.api) # with full metadata - f1edit = api.create_file(eg.editgroup_id, FileEntity( - size=372121, - md5="e1fd97475c8aa102568f5d70a1bd0c07", - sha1="0000045687dad717ed6512e395b04ec9c00995b7", - sha256="51bdc9e40cc175089fcb60b0b188e6cbcdcddb1ff8acbe6b039b8f8fff0afff0", - mimetype="application/pdf", - )) + f1edit = api.create_file( + eg.editgroup_id, + FileEntity( + size=372121, + md5="e1fd97475c8aa102568f5d70a1bd0c07", + sha1="0000045687dad717ed6512e395b04ec9c00995b7", + sha256="51bdc9e40cc175089fcb60b0b188e6cbcdcddb1ff8acbe6b039b8f8fff0afff0", + mimetype="application/pdf", + ), + ) # partial/stub metadata - f2edit = api.create_file(eg.editgroup_id, FileEntity( - sha1="00000376ad49f56145721503f1eb5e6e49e779fd", - mimetype="application/pdf", - )) + f2edit = api.create_file( + eg.editgroup_id, + FileEntity( + sha1="00000376ad49f56145721503f1eb5e6e49e779fd", + mimetype="application/pdf", + ), + ) api.accept_editgroup(eg.editgroup_id) - with open('tests/files/example_file_meta.json', 'r') as f: + with open("tests/files/example_file_meta.json", "r") as f: counts = JsonLinePusher(file_meta_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 0 - assert counts['update'] == 1 - assert counts['skip-no-match'] == 4 - assert counts['skip-missing-field'] == 1 - assert counts['skip-existing-complete'] == 1 + assert counts["insert"] == 0 + assert counts["exists"] == 0 + assert counts["update"] == 1 + assert counts["skip-no-match"] == 4 + assert counts["skip-missing-field"] == 1 + assert counts["skip-existing-complete"] == 1 # cleanup file entities eg = quick_eg(file_meta_importer.api) @@ -48,8 +54,9 @@ def test_file_meta_importer_basic(file_meta_importer): api.delete_file(eg.editgroup_id, f2edit.ident) api.accept_editgroup(eg.editgroup_id) + def test_file_meta_dict_parse(file_meta_importer): - with open('tests/files/example_file_meta.json', 'r') as f: + with open("tests/files/example_file_meta.json", "r") as f: raw = json.loads(f.readline()) f = file_meta_importer.parse_record(raw) diff --git a/python/tests/import_fileset_generic.py b/python/tests/import_fileset_generic.py index 8d30a83b..eadf5c43 100644 --- a/python/tests/import_fileset_generic.py +++ b/python/tests/import_fileset_generic.py @@ -1,4 +1,3 @@ - import json import pytest @@ -11,48 +10,54 @@ from fatcat_tools.importers import FilesetImporter, JsonLinePusher def fileset_importer(api): yield FilesetImporter(api) + # TODO: use API to check that entities actually created... def test_fileset_importer_basic(fileset_importer): - with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f: + with open("tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json", "r") as f: JsonLinePusher(fileset_importer, f).run() + def test_fileset_importer(fileset_importer): last_index = fileset_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f: + with open("tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json", "r") as f: fileset_importer.bezerk_mode = True counts = JsonLinePusher(fileset_importer, f).run() - assert counts['insert'] == 1 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 1 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = fileset_importer.api.get_changelog_entry(index=last_index+1) + change = fileset_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "generic fileset" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.FilesetImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.FilesetImporter" in eg.extra["agent"] # re-insert; should skip - with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f: + with open("tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json", "r") as f: fileset_importer.reset() fileset_importer.bezerk_mode = False counts = JsonLinePusher(fileset_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 1 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 1 + assert counts["skip"] == 0 + def test_fileset_dict_parse(fileset_importer): - with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f: + with open("tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json", "r") as f: raw = json.loads(f.readline()) fs = fileset_importer.parse_record(raw) assert fs.manifest[0].sha1 == "cc9bd558ca79b30b2966714da7ef4129537fde0c" assert fs.manifest[0].md5 == "742c40404c9a4dbbd77c0985201c639f" - assert fs.manifest[0].sha256 == "3a7c07ad17ce3638d5a1dd21f995a496e430b952eef00270ad741d506984370f" + assert ( + fs.manifest[0].sha256 + == "3a7c07ad17ce3638d5a1dd21f995a496e430b952eef00270ad741d506984370f" + ) assert fs.manifest[0].size == 640500 assert fs.manifest[0].path == "070111_LatA_100nM.txt" - assert fs.manifest[0].extra['mimetype'] == "text/plain" + assert fs.manifest[0].extra["mimetype"] == "text/plain" assert len(fs.urls) == 3 for u in fs.urls: if u.rel == "repo": diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py index 862b310f..a06e7aa4 100644 --- a/python/tests/import_grobid_metadata.py +++ b/python/tests/import_grobid_metadata.py @@ -1,4 +1,3 @@ - import base64 import json import os @@ -13,17 +12,21 @@ WARNING: these tests are currently very fragile because they have database side-effects. Should probably be disabled or re-written. """ + @pytest.fixture(scope="function") def grobid_metadata_importer(api): yield GrobidMetadataImporter(api) def test_grobid_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - raw = json.loads(f.readline().split('\t')[4]) + with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f: + raw = json.loads(f.readline().split("\t")[4]) re = grobid_metadata_importer.parse_grobid_json(raw) assert re - assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" + assert ( + re.title + == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" + ) assert len(re.contribs) == 5 print(re.contribs) assert re.contribs[0].raw_name == "Wahyu Ary" @@ -31,19 +34,21 @@ def test_grobid_metadata_parse(grobid_metadata_importer): assert re.contribs[0].surname == "Ary" assert re.publisher is None if re.extra: - assert re.extra.get('container_name') is None + assert re.extra.get("container_name") is None assert len(re.refs) == 27 + def test_file_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f: f.readline() - raw = f.readline().split('\t') + raw = f.readline().split("\t") # randomize sha1 so tests are repeatable - random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper()) + random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode("utf-8").upper()) fe = grobid_metadata_importer.parse_file_metadata( - random_sha1, json.loads(raw[1]), raw[2], int(raw[3])) + random_sha1, json.loads(raw[1]), raw[2], int(raw[3]) + ) assert fe - #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" + # assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" assert fe.md5 is None assert fe.mimetype == "application/pdf" assert fe.size == 142710 @@ -53,29 +58,30 @@ def test_file_metadata_parse(grobid_metadata_importer): assert fe.urls[0].rel == "webarchive" assert len(fe.release_ids) == 0 + def test_grobid_metadata_importer(grobid_metadata_importer): last_index = grobid_metadata_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f: grobid_metadata_importer.bezerk_mode = True counts = LinePusher(grobid_metadata_importer, f).run() - assert counts['insert'] == 10 - assert counts['inserted.release'] == 10 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 10 + assert counts["inserted.release"] == 10 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = grobid_metadata_importer.api.get_changelog_entry(index=last_index+1) + change = grobid_metadata_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "grobid" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.GrobidMetadataImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.GrobidMetadataImporter" in eg.extra["agent"] - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f: grobid_metadata_importer.reset() grobid_metadata_importer.bezerk_mode = False counts = LinePusher(grobid_metadata_importer, f).run() - assert counts['insert'] == 0 - assert counts['inserted.release'] == 0 - assert counts['exists'] == 10 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["inserted.release"] == 0 + assert counts["exists"] == 10 + assert counts["skip"] == 0 diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py index 04a8e7f6..d9e7d294 100644 --- a/python/tests/import_ingest.py +++ b/python/tests/import_ingest.py @@ -1,4 +1,3 @@ - import json import pytest @@ -15,94 +14,100 @@ from fatcat_tools.importers import ( def ingest_importer(api): yield IngestFileResultImporter(api) + @pytest.fixture(scope="function") def ingest_web_importer(api): yield IngestWebResultImporter(api) + # TODO: use API to check that entities actually created... def test_ingest_importer_basic(ingest_importer): - with open('tests/files/example_ingest.json', 'r') as f: + with open("tests/files/example_ingest.json", "r") as f: JsonLinePusher(ingest_importer, f).run() + def test_ingest_importer(ingest_importer): last_index = ingest_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/example_ingest.json', 'r') as f: + with open("tests/files/example_ingest.json", "r") as f: ingest_importer.bezerk_mode = True counts = JsonLinePusher(ingest_importer, f).run() - assert counts['insert'] == 1 - assert counts['exists'] == 0 - assert counts['skip'] == 1 + assert counts["insert"] == 1 + assert counts["exists"] == 0 + assert counts["skip"] == 1 # fetch most recent editgroup - change = ingest_importer.api.get_changelog_entry(index=last_index+1) + change = ingest_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "crawled from web" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.IngestFileResultImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.IngestFileResultImporter" in eg.extra["agent"] # re-insert; should skip - with open('tests/files/example_ingest.json', 'r') as f: + with open("tests/files/example_ingest.json", "r") as f: ingest_importer.reset() ingest_importer.bezerk_mode = False counts = JsonLinePusher(ingest_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 1 - assert counts['skip'] == 1 + assert counts["insert"] == 0 + assert counts["exists"] == 1 + assert counts["skip"] == 1 + def test_ingest_importer_xml(ingest_importer): last_index = ingest_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/example_ingest_xml.json', 'r') as f: + with open("tests/files/example_ingest_xml.json", "r") as f: ingest_importer.bezerk_mode = True counts = JsonLinePusher(ingest_importer, f).run() print(counts) - assert counts['insert'] == 1 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 1 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = ingest_importer.api.get_changelog_entry(index=last_index+1) + change = ingest_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "crawled from web" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.IngestFileResultImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.IngestFileResultImporter" in eg.extra["agent"] # re-import should skip - with open('tests/files/example_ingest_xml.json', 'r') as f: + with open("tests/files/example_ingest_xml.json", "r") as f: ingest_importer.reset() ingest_importer.bezerk_mode = False counts = JsonLinePusher(ingest_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 1 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 1 + assert counts["skip"] == 0 + def test_ingest_importer_web(ingest_web_importer): last_index = ingest_web_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/example_ingest_html.json', 'r') as f: + with open("tests/files/example_ingest_html.json", "r") as f: ingest_web_importer.bezerk_mode = True counts = JsonLinePusher(ingest_web_importer, f).run() print(counts) - assert counts['insert'] == 1 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 1 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = ingest_web_importer.api.get_changelog_entry(index=last_index+1) + change = ingest_web_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "crawled from web" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.IngestWebResultImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.IngestWebResultImporter" in eg.extra["agent"] # re-import should skip - with open('tests/files/example_ingest_html.json', 'r') as f: + with open("tests/files/example_ingest_html.json", "r") as f: ingest_web_importer.reset() ingest_web_importer.bezerk_mode = False counts = JsonLinePusher(ingest_web_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 1 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 1 + assert counts["skip"] == 0 + def test_ingest_importer_stage(ingest_importer, api): """ @@ -117,29 +122,30 @@ def test_ingest_importer_stage(ingest_importer, api): dict(request_stage="published", release_stage="draft", status="skip-release-stage"), ] ingest_importer.bezerk_mode = True - with open('tests/files/example_ingest.json', 'r') as f: + with open("tests/files/example_ingest.json", "r") as f: raw = json.loads(f.readline()) for row in test_table: - #print(row) + # print(row) # set dummy record stage eg = quick_eg(api) r1 = api.lookup_release(doi="10.123/abc") - r1.release_stage = row['release_stage'] + r1.release_stage = row["release_stage"] api.update_release(eg.editgroup_id, r1.ident, r1) api.accept_editgroup(eg.editgroup_id) # set ingest request stage - raw['request']['release_stage'] = row['request_stage'] + raw["request"]["release_stage"] = row["request_stage"] ingest_importer.reset() ingest_importer.push_record(raw) counts = ingest_importer.finish() print(counts) assert counts["total"] == 1 - assert counts[row['status']] == 1 + assert counts[row["status"]] == 1 + def test_ingest_dict_parse(ingest_importer): - with open('tests/files/example_ingest.json', 'r') as f: + with open("tests/files/example_ingest.json", "r") as f: raw = json.loads(f.readline()) f = ingest_importer.parse_record(raw) assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" @@ -154,14 +160,15 @@ def test_ingest_dict_parse(ingest_importer): assert u.url.startswith("https://web.archive.org/") assert len(f.release_ids) == 1 + def test_ingest_dict_parse_old(ingest_importer): - with open('tests/files/example_ingest.old.json', 'r') as f: + with open("tests/files/example_ingest.old.json", "r") as f: raw = json.loads(f.readline()) # ancient ingest requests had no type; skip them f = ingest_importer.parse_record(raw) assert f is None - raw['request']['ingest_type'] = 'pdf' + raw["request"]["ingest_type"] = "pdf" f = ingest_importer.parse_record(raw) assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" diff --git a/python/tests/import_jalc.py b/python/tests/import_jalc.py index c11e2307..4ebc87b4 100644 --- a/python/tests/import_jalc.py +++ b/python/tests/import_jalc.py @@ -1,4 +1,3 @@ - import pytest from bs4 import BeautifulSoup from fixtures import * @@ -8,70 +7,78 @@ from fatcat_tools.importers import Bs4XmlFilePusher, Bs4XmlLinesPusher, JalcImpo @pytest.fixture(scope="function") def jalc_importer(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield JalcImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True) + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield JalcImporter( + api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True + ) + @pytest.fixture(scope="function") def jalc_importer_existing(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield JalcImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False) + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield JalcImporter( + api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False + ) + def test_jalc_importer(jalc_importer): last_index = jalc_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/jalc_lod_sample.xml', 'r') as f: + with open("tests/files/jalc_lod_sample.xml", "r") as f: jalc_importer.bezerk_mode = True counts = Bs4XmlFilePusher(jalc_importer, f, "Description").run() - assert counts['insert'] == 2 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 2 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = jalc_importer.api.get_changelog_entry(index=last_index+1) + change = jalc_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "jalc" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.JalcImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.JalcImporter" in eg.extra["agent"] last_index = jalc_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/jalc_lod_sample.xml', 'r') as f: + with open("tests/files/jalc_lod_sample.xml", "r") as f: jalc_importer.bezerk_mode = False jalc_importer.reset() counts = Bs4XmlFilePusher(jalc_importer, f, "Description").run() - assert counts['insert'] == 0 - assert counts['exists'] == 2 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 2 + assert counts["skip"] == 0 assert last_index == jalc_importer.api.get_changelog(limit=1)[0].index + def test_jalc_importer_lines(jalc_importer): last_index = jalc_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/jalc_rdf_sample_100.xml', 'r') as f: + with open("tests/files/jalc_rdf_sample_100.xml", "r") as f: jalc_importer.bezerk_mode = True counts = Bs4XmlLinesPusher(jalc_importer, f, "<rdf:Description").run() - assert counts['insert'] == 93 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 93 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = jalc_importer.api.get_changelog_entry(index=last_index+1) + change = jalc_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "jalc" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.JalcImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.JalcImporter" in eg.extra["agent"] last_index = jalc_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/jalc_rdf_sample_100.xml', 'r') as f: + with open("tests/files/jalc_rdf_sample_100.xml", "r") as f: jalc_importer.bezerk_mode = False jalc_importer.reset() counts = Bs4XmlLinesPusher(jalc_importer, f, "<rdf:Description").run() - assert counts['insert'] == 0 - assert counts['exists'] == 93 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 93 + assert counts["skip"] == 0 assert last_index == jalc_importer.api.get_changelog(limit=1)[0].index + def test_jalc_xml_parse(jalc_importer): - with open('tests/files/jalc_lod_sample.xml', 'r') as f: + with open("tests/files/jalc_lod_sample.xml", "r") as f: soup = BeautifulSoup(f, "xml") r = jalc_importer.parse_record(soup.find_all("Description")[0]) @@ -90,21 +97,21 @@ def test_jalc_xml_parse(jalc_importer): assert r.pages == "898-902" assert r.release_year == 1989 # matched by ISSN, so shouldn't be in there? - #assert extra['container_name'] == "International Journal of Quantum Chemistry" + # assert extra['container_name'] == "International Journal of Quantum Chemistry" assert len(r.contribs) == 4 assert r.contribs[0].raw_name == "Yasuhiko Hashimoto" assert r.contribs[0].given_name == "Yasuhiko" assert r.contribs[0].surname == "Hashimoto" - assert r.contribs[0].extra['original_name']['raw_name'] == "橋本 雍彦" - assert r.contribs[0].extra['original_name']['given_name'] == "雍彦" - assert r.contribs[0].extra['original_name']['surname'] == "橋本" + assert r.contribs[0].extra["original_name"]["raw_name"] == "橋本 雍彦" + assert r.contribs[0].extra["original_name"]["given_name"] == "雍彦" + assert r.contribs[0].extra["original_name"]["surname"] == "橋本" assert r.contribs[3].raw_name == "Teruo Takahashi" assert r.contribs[3].given_name == "Teruo" assert r.contribs[3].surname == "Takahashi" - assert r.contribs[3].extra['original_name']['raw_name'] == "高橋 輝男" - assert r.contribs[3].extra['original_name']['given_name'] == "輝男" - assert r.contribs[3].extra['original_name']['surname'] == "高橋" + assert r.contribs[3].extra["original_name"]["raw_name"] == "高橋 輝男" + assert r.contribs[3].extra["original_name"]["given_name"] == "輝男" + assert r.contribs[3].extra["original_name"]["surname"] == "高橋" assert not r.refs diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py index cdf4e8ed..ff9dcb7a 100644 --- a/python/tests/import_journal_metadata.py +++ b/python/tests/import_journal_metadata.py @@ -1,4 +1,3 @@ - import pytest from fixtures import * @@ -9,32 +8,34 @@ from fatcat_tools.importers import JournalMetadataImporter, JsonLinePusher def journal_metadata_importer(api): yield JournalMetadataImporter(api) + # TODO: use API to check that entities actually created... def test_journal_metadata_importer_batch(journal_metadata_importer): - with open('tests/files/journal_metadata.sample.json', 'r') as f: + with open("tests/files/journal_metadata.sample.json", "r") as f: JsonLinePusher(journal_metadata_importer, f).run() + def test_journal_metadata_importer(journal_metadata_importer): last_index = journal_metadata_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/journal_metadata.sample.json', 'r') as f: + with open("tests/files/journal_metadata.sample.json", "r") as f: journal_metadata_importer.bezerk_mode = True counts = JsonLinePusher(journal_metadata_importer, f).run() - assert counts['insert'] == 20 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 20 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = journal_metadata_importer.api.get_changelog_entry(index=last_index+1) + change = journal_metadata_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "container" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.JournalMetadataImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.JournalMetadataImporter" in eg.extra["agent"] - with open('tests/files/journal_metadata.sample.json', 'r') as f: + with open("tests/files/journal_metadata.sample.json", "r") as f: journal_metadata_importer.reset() journal_metadata_importer.bezerk_mode = False counts = JsonLinePusher(journal_metadata_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 20 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 20 + assert counts["skip"] == 0 diff --git a/python/tests/import_jstor.py b/python/tests/import_jstor.py index 270da3ec..8ad550b3 100644 --- a/python/tests/import_jstor.py +++ b/python/tests/import_jstor.py @@ -1,4 +1,3 @@ - import pytest from bs4 import BeautifulSoup from fixtures import * @@ -8,48 +7,58 @@ from fatcat_tools.importers import Bs4XmlFilePusher, JstorImporter @pytest.fixture(scope="function") def jstor_importer(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield JstorImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True) + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield JstorImporter( + api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True + ) + @pytest.fixture(scope="function") def jstor_importer_existing(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield JstorImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False) + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield JstorImporter( + api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False + ) + def test_jstor_importer(jstor_importer): last_index = jstor_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/jstor-article-10.2307_111039.xml', 'r') as f: + with open("tests/files/jstor-article-10.2307_111039.xml", "r") as f: jstor_importer.bezerk_mode = True counts = Bs4XmlFilePusher(jstor_importer, f, "article").run() - assert counts['insert'] == 1 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 1 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = jstor_importer.api.get_changelog_entry(index=last_index+1) + change = jstor_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "jstor" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.JstorImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.JstorImporter" in eg.extra["agent"] last_index = jstor_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/jstor-article-10.2307_111039.xml', 'r') as f: + with open("tests/files/jstor-article-10.2307_111039.xml", "r") as f: jstor_importer.bezerk_mode = False jstor_importer.reset() counts = Bs4XmlFilePusher(jstor_importer, f, "article").run() - assert counts['insert'] == 0 - assert counts['exists'] == 1 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 1 + assert counts["skip"] == 0 assert last_index == jstor_importer.api.get_changelog(limit=1)[0].index + def test_jstor_xml_parse(jstor_importer): - with open('tests/files/jstor-article-10.2307_111039.xml', 'r') as f: + with open("tests/files/jstor-article-10.2307_111039.xml", "r") as f: soup = BeautifulSoup(f, "xml") r = jstor_importer.parse_record(soup.find_all("article")[0]) print(r.extra) - assert r.title == "On the Universal Law of Attraction, Including that of Gravitation, as a Particular Case of Approximation Deducible from the Principle that Equal and Similar Particles of Matter Move Similarly, Relatively to Each other. [Abstract]" + assert ( + r.title + == "On the Universal Law of Attraction, Including that of Gravitation, as a Particular Case of Approximation Deducible from the Principle that Equal and Similar Particles of Matter Move Similarly, Relatively to Each other. [Abstract]" + ) assert r.subtitle is None assert r.original_title is None assert r.publisher == "The Royal Society" @@ -66,9 +75,9 @@ def test_jstor_xml_parse(jstor_importer): assert r.release_date is None assert r.release_year == 1843 # matched by ISSN, so shouldn't be in there? - #assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London" + # assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London" assert len(r.contribs) == 1 - assert r.extra['jstor']['journal_ids'] == ['abstpapecommroya', 'j100687'] + assert r.extra["jstor"]["journal_ids"] == ["abstpapecommroya", "j100687"] assert r.contribs[0].raw_name == "John Kinnersley Smythies" assert r.contribs[0].given_name == "John Kinnersley" diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py index aad8e799..4ade6e04 100644 --- a/python/tests/import_matched.py +++ b/python/tests/import_matched.py @@ -1,4 +1,3 @@ - import json import pytest @@ -11,39 +10,42 @@ from fatcat_tools.importers import JsonLinePusher, MatchedImporter def matched_importer(api): yield MatchedImporter(api) + # TODO: use API to check that entities actually created... def test_matched_importer_basic(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: + with open("tests/files/example_matched.json", "r") as f: JsonLinePusher(matched_importer, f).run() + def test_matched_importer(matched_importer): last_index = matched_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/example_matched.json', 'r') as f: + with open("tests/files/example_matched.json", "r") as f: matched_importer.bezerk_mode = True counts = JsonLinePusher(matched_importer, f).run() - assert counts['insert'] == 2 - assert counts['exists'] == 0 - assert counts['skip'] == 11 + assert counts["insert"] == 2 + assert counts["exists"] == 0 + assert counts["skip"] == 11 # fetch most recent editgroup - change = matched_importer.api.get_changelog_entry(index=last_index+1) + change = matched_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "file-to-release" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.MatchedImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.MatchedImporter" in eg.extra["agent"] # re-insert; should skip - with open('tests/files/example_matched.json', 'r') as f: + with open("tests/files/example_matched.json", "r") as f: matched_importer.reset() matched_importer.bezerk_mode = False counts = JsonLinePusher(matched_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 2 - assert counts['skip'] == 11 + assert counts["insert"] == 0 + assert counts["exists"] == 2 + assert counts["skip"] == 11 + def test_matched_dict_parse(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: + with open("tests/files/example_matched.json", "r") as f: raw = json.loads(f.readline()) f = matched_importer.parse_record(raw) assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py index 88a4b871..9566719b 100644 --- a/python/tests/import_orcid.py +++ b/python/tests/import_orcid.py @@ -1,4 +1,3 @@ - import json import pytest @@ -11,44 +10,48 @@ from fatcat_tools.importers import JsonLinePusher, OrcidImporter def orcid_importer(api): yield OrcidImporter(api) + def test_orcid_importer_badid(orcid_importer): - with open('tests/files/0000-0001-8254-710X.json', 'r') as f: + with open("tests/files/0000-0001-8254-710X.json", "r") as f: JsonLinePusher(orcid_importer, f).run() + # TODO: use API to check that entities actually created... def test_orcid_importer(orcid_importer): last_index = orcid_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/0000-0001-8254-7103.json', 'r') as f: + with open("tests/files/0000-0001-8254-7103.json", "r") as f: orcid_importer.bezerk_mode = True counts = JsonLinePusher(orcid_importer, f).run() - assert counts['insert'] == 1 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 1 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = orcid_importer.api.get_changelog_entry(index=last_index+1) + change = orcid_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "orcid" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.OrcidImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.OrcidImporter" in eg.extra["agent"] - with open('tests/files/0000-0001-8254-7103.json', 'r') as f: + with open("tests/files/0000-0001-8254-7103.json", "r") as f: orcid_importer.reset() orcid_importer.bezerk_mode = False counts = JsonLinePusher(orcid_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 1 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 1 + assert counts["skip"] == 0 + def test_orcid_importer_x(orcid_importer): - with open('tests/files/0000-0003-3953-765X.json', 'r') as f: + with open("tests/files/0000-0003-3953-765X.json", "r") as f: JsonLinePusher(orcid_importer, f).run() c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X") assert c is not None + def test_orcid_dict_parse(orcid_importer): - with open('tests/files/0000-0001-8254-7103.json', 'r') as f: + with open("tests/files/0000-0001-8254-7103.json", "r") as f: raw = json.loads(f.readline()) c = orcid_importer.parse_record(raw) assert c.given_name == "Man-Hui" diff --git a/python/tests/import_pubmed.py b/python/tests/import_pubmed.py index 371c59bb..a5301f29 100644 --- a/python/tests/import_pubmed.py +++ b/python/tests/import_pubmed.py @@ -1,4 +1,3 @@ - import pytest from bs4 import BeautifulSoup from fixtures import * @@ -8,43 +7,58 @@ from fatcat_tools.importers import Bs4XmlLargeFilePusher, PubmedImporter @pytest.fixture(scope="function") def pubmed_importer(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield PubmedImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True, lookup_refs=True) + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield PubmedImporter( + api, + issn_file, + extid_map_file="tests/files/example_map.sqlite3", + bezerk_mode=True, + lookup_refs=True, + ) + @pytest.fixture(scope="function") def pubmed_importer_existing(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield PubmedImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False, lookup_refs=True) + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: + yield PubmedImporter( + api, + issn_file, + extid_map_file="tests/files/example_map.sqlite3", + bezerk_mode=False, + lookup_refs=True, + ) + def test_pubmed_importer(pubmed_importer): last_index = pubmed_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/pubmedsample_2019.xml', 'r') as f: + with open("tests/files/pubmedsample_2019.xml", "r") as f: pubmed_importer.bezerk_mode = True counts = Bs4XmlLargeFilePusher(pubmed_importer, f, ["PubmedArticle"]).run() - assert counts['insert'] == 176 - assert counts['exists'] == 0 - assert counts['skip'] == 0 + assert counts["insert"] == 176 + assert counts["exists"] == 0 + assert counts["skip"] == 0 # fetch most recent editgroup - change = pubmed_importer.api.get_changelog_entry(index=last_index+1) + change = pubmed_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "pubmed" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.PubmedImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.PubmedImporter" in eg.extra["agent"] last_index = pubmed_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/pubmedsample_2019.xml', 'r') as f: + with open("tests/files/pubmedsample_2019.xml", "r") as f: pubmed_importer.bezerk_mode = False pubmed_importer.reset() counts = Bs4XmlLargeFilePusher(pubmed_importer, f, ["PubmedArticle"]).run() - assert counts['insert'] == 0 - assert counts['exists'] == 176 - assert counts['skip'] == 0 + assert counts["insert"] == 0 + assert counts["exists"] == 176 + assert counts["skip"] == 0 assert last_index == pubmed_importer.api.get_changelog(limit=1)[0].index + def test_pubmed_xml_parse(pubmed_importer): - with open('tests/files/pubmedsample_2019.xml', 'r') as f: + with open("tests/files/pubmedsample_2019.xml", "r") as f: soup = BeautifulSoup(f, "xml") r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0]) r2 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[-1]) @@ -62,10 +76,10 @@ def test_pubmed_xml_parse(pubmed_importer): assert r1.volume == "3" assert r1.issue == "1" assert r1.pages == "69-81" - assert r1.release_date is None # not "1976-12-03", which is medline ingest date + assert r1.release_date is None # not "1976-12-03", which is medline ingest date assert r1.release_year == 1976 # matched by ISSN, so shouldn't be in there? - #assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London" + # assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London" assert len(r1.contribs) == 1 assert r1.contribs[0].raw_name == "F R Blume" @@ -73,10 +87,13 @@ def test_pubmed_xml_parse(pubmed_importer): assert r1.contribs[0].surname == "Blume" print(r1.extra) - assert r1.extra['pubmed']['pub_types'] == ['Journal Article'] + assert r1.extra["pubmed"]["pub_types"] == ["Journal Article"] assert not r1.refs - assert r2.title == "Synthesis and Antibacterial Activity of Metal(loid) Nanostructures by Environmental Multi-Metal(loid) Resistant Bacteria and Metal(loid)-Reducing Flavoproteins" + assert ( + r2.title + == "Synthesis and Antibacterial Activity of Metal(loid) Nanostructures by Environmental Multi-Metal(loid) Resistant Bacteria and Metal(loid)-Reducing Flavoproteins" + ) assert r2.subtitle is None assert r2.original_title is None assert r2.publisher is None @@ -93,46 +110,56 @@ def test_pubmed_xml_parse(pubmed_importer): assert str(r2.release_date) == "2018-05-15" assert r2.release_year == 2018 # matched by ISSN, so shouldn't be in there? - #assert extra['container_name'] == "Frontiers in microbiology" + # assert extra['container_name'] == "Frontiers in microbiology" assert len(r2.contribs) > 3 assert r2.contribs[0].raw_name == "Maximiliano Figueroa" assert r2.contribs[0].given_name == "Maximiliano" assert r2.contribs[0].surname == "Figueroa" - assert r2.contribs[0].raw_affiliation == "Laboratorio Microbiología Molecular, Departamento de Biología, Facultad de Química y Biología, Universidad de Santiago de Chile, Santiago, Chile." + assert ( + r2.contribs[0].raw_affiliation + == "Laboratorio Microbiología Molecular, Departamento de Biología, Facultad de Química y Biología, Universidad de Santiago de Chile, Santiago, Chile." + ) assert r2.contribs[4].surname == "Muñoz-Villagrán" assert r2.contribs[7].surname == "Latorre" - assert r2.contribs[7].raw_affiliation == "Mathomics, Centro de Modelamiento Matemático, Universidad de Chile, Beauchef, Santiago, Chile." - assert r2.contribs[7].extra['more_affiliations'] == [ + assert ( + r2.contribs[7].raw_affiliation + == "Mathomics, Centro de Modelamiento Matemático, Universidad de Chile, Beauchef, Santiago, Chile." + ) + assert r2.contribs[7].extra["more_affiliations"] == [ "Fondap-Center of Genome Regulation, Facultad de Ciencias, Universidad de Chile, Santiago, Chile.", "Laboratorio de Bioinformática y Expresión Génica, INTA, Universidad de Chile, Santiago, Chile.", "Instituto de Ciencias de la Ingeniería, Universidad de O'Higgins, Rancagua, Chile.", ] assert r2.contribs[-1].raw_name == "Felipe Arenas" - assert r2.abstracts[0].content.startswith("Microbes are suitable candidates to recover and decontaminate different environments from soluble metal ions, either via reduction") + assert r2.abstracts[0].content.startswith( + "Microbes are suitable candidates to recover and decontaminate different environments from soluble metal ions, either via reduction" + ) assert r2.abstracts[0].lang == "en" print(r2.extra) - assert r2.extra['pubmed']['pub_types'] == ['Journal Article'] + assert r2.extra["pubmed"]["pub_types"] == ["Journal Article"] - assert r2.refs[0].extra['unstructured'] == "Microbiology. 2009 Jun;155(Pt 6):1840-6" - assert r2.refs[0].extra['pmid'] == "19383690" + assert r2.refs[0].extra["unstructured"] == "Microbiology. 2009 Jun;155(Pt 6):1840-6" + assert r2.refs[0].extra["pmid"] == "19383690" assert len(r2.refs) > 1 + def test_pubmed_xml_dates(pubmed_importer): - with open('tests/files/pubmed_31393839.xml', 'r') as f: + with open("tests/files/pubmed_31393839.xml", "r") as f: soup = BeautifulSoup(f, "xml") r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0]) assert r1.release_year == 2019 + def test_pubmed_xml_parse_refs(pubmed_importer): """ Tests the case of multiple nested ReferenceList/Reference objects, instead of a single ReferenceList with multiple Reference """ - with open('tests/files/pubmed_19129924.xml', 'r') as f: + with open("tests/files/pubmed_19129924.xml", "r") as f: soup = BeautifulSoup(f, "xml") r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0]) diff --git a/python/tests/import_shadow.py b/python/tests/import_shadow.py index 1caaf780..66661084 100644 --- a/python/tests/import_shadow.py +++ b/python/tests/import_shadow.py @@ -1,4 +1,3 @@ - import json import pytest @@ -11,39 +10,42 @@ from fatcat_tools.importers import JsonLinePusher, ShadowLibraryImporter def shadow_importer(api): yield ShadowLibraryImporter(api) + # TODO: use API to check that entities actually created... def test_shadow_importer_basic(shadow_importer): - with open('tests/files/example_shadow.json', 'r') as f: + with open("tests/files/example_shadow.json", "r") as f: JsonLinePusher(shadow_importer, f).run() + def test_shadow_importer(shadow_importer): last_index = shadow_importer.api.get_changelog(limit=1)[0].index - with open('tests/files/example_shadow.json', 'r') as f: + with open("tests/files/example_shadow.json", "r") as f: shadow_importer.bezerk_mode = True counts = JsonLinePusher(shadow_importer, f).run() - assert counts['insert'] == 2 - assert counts['exists'] == 0 - assert counts['skip'] == 8 + assert counts["insert"] == 2 + assert counts["exists"] == 0 + assert counts["skip"] == 8 # fetch most recent editgroup - change = shadow_importer.api.get_changelog_entry(index=last_index+1) + change = shadow_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "shadow library" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.ShadowLibraryImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert "fatcat_tools.ShadowLibraryImporter" in eg.extra["agent"] # re-insert; should skip - with open('tests/files/example_shadow.json', 'r') as f: + with open("tests/files/example_shadow.json", "r") as f: shadow_importer.reset() shadow_importer.bezerk_mode = False counts = JsonLinePusher(shadow_importer, f).run() - assert counts['insert'] == 0 - assert counts['exists'] == 2 - assert counts['skip'] == 8 + assert counts["insert"] == 0 + assert counts["exists"] == 2 + assert counts["skip"] == 8 + def test_shadow_dict_parse(shadow_importer): - with open('tests/files/example_shadow.json', 'r') as f: + with open("tests/files/example_shadow.json", "r") as f: raw = json.loads(f.readline()) f = shadow_importer.parse_record(raw) @@ -55,7 +57,9 @@ def test_shadow_dict_parse(shadow_importer): assert len(f.urls) == 2 for u in f.urls: if u.rel == "publisher": - assert u.url.startswith("https://link.springer.com/content/pdf/10.1007%2Fs11626-008-9119-8.pdf") + assert u.url.startswith( + "https://link.springer.com/content/pdf/10.1007%2Fs11626-008-9119-8.pdf" + ) if u.rel == "webarchive": assert u.url.startswith("https://web.archive.org/") assert "20180729135948" in u.url diff --git a/python/tests/importer.py b/python/tests/importer.py index 1f0e942f..76baa83d 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -1,22 +1,22 @@ - from fixtures import * from fatcat_tools.importers import CrossrefImporter, OrcidImporter def test_issnl_mapping_lookup(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: fi = CrossrefImporter(api, issn_map_file=issn_file) - assert fi.issn2issnl('0000-0027') == '0002-0027' - assert fi.issn2issnl('0002-0027') == '0002-0027' - assert fi.issn2issnl('9999-0027') is None + assert fi.issn2issnl("0000-0027") == "0002-0027" + assert fi.issn2issnl("0002-0027") == "0002-0027" + assert fi.issn2issnl("9999-0027") is None + + assert fi.lookup_issnl("9999-9999") is None - assert fi.lookup_issnl('9999-9999') is None def test_identifiers(api): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: + with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file: ci = CrossrefImporter(api, issn_map_file=issn_file) assert ci.is_issnl("1234-5678") is True diff --git a/python/tests/subentity_state.py b/python/tests/subentity_state.py index 2843176d..fda4b582 100644 --- a/python/tests/subentity_state.py +++ b/python/tests/subentity_state.py @@ -1,4 +1,3 @@ - import json from fatcat_openapi_client import * @@ -19,6 +18,7 @@ Current set of such references: => work -> release """ + def test_relation_states(api, app, mocker): j1 = ContainerEntity(name="test journal") @@ -31,14 +31,14 @@ def test_relation_states(api, app, mocker): # WIP container # these are basic ES stats for the container view pages - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(ES_CONTAINER_STATS_RESP)), (200, {}, json.dumps(ES_CONTAINER_RANDOM_RESP)), ] eg = quick_eg(api) j2 = api.get_container(api.create_container(eg.editgroup_id, j2).ident) - rv = app.get('/container/{}'.format(j2.ident)) + rv = app.get("/container/{}".format(j2.ident)) assert rv.status_code == 200 # create inter-related entities @@ -59,15 +59,15 @@ def test_relation_states(api, app, mocker): assert r1.contribs[0].creator_id == c1.ident assert r1.contribs[0].creator.display_name == "test person" assert r1.contribs[0].creator.state == "active" - rv = app.get('/release/{}'.format(r1.ident)) + rv = app.get("/release/{}".format(r1.ident)) assert rv.status_code == 200 # delete creator eg = quick_eg(api) api.delete_creator(eg.editgroup_id, c1.ident) api.accept_editgroup(eg.editgroup_id) - rv = app.get('/creator/{}'.format(c1.ident)) - assert rv.status_code == 200 # TODO: HTTP status "Gone"? + rv = app.get("/creator/{}".format(c1.ident)) + assert rv.status_code == 200 # TODO: HTTP status "Gone"? c1_deleted = api.get_creator(c1.ident) assert c1_deleted.state == "deleted" @@ -79,7 +79,7 @@ def test_relation_states(api, app, mocker): assert r1.contribs[0].creator_id == c1.ident assert r1.contribs[0].creator.display_name is None assert r1.contribs[0].creator.state == "deleted" - rv = app.get('/release/{}'.format(r1.ident)) + rv = app.get("/release/{}".format(r1.ident)) assert rv.status_code == 200 # wip container @@ -96,14 +96,16 @@ def test_relation_states(api, app, mocker): assert r1.contribs[0].creator_id == c1.ident assert r1.contribs[0].creator.display_name is None assert r1.contribs[0].creator.state == "deleted" - rv = app.get('/release/{}'.format(r1.ident)) + rv = app.get("/release/{}".format(r1.ident)) assert rv.status_code == 200 # redirect release r2 = api.get_release(r2.ident, expand="container,creators,files") assert r2.files == [] eg = quick_eg(api) - api.update_release(eg.editgroup_id, r2.ident, ReleaseEntity(redirect=r1.ident, ext_ids=ReleaseExtIds())) + api.update_release( + eg.editgroup_id, r2.ident, ReleaseEntity(redirect=r1.ident, ext_ids=ReleaseExtIds()) + ) f2.release_ids = [r2.ident] f2 = api.get_file(api.create_file(eg.editgroup_id, f2).ident) api.accept_editgroup(eg.editgroup_id) @@ -116,9 +118,9 @@ def test_relation_states(api, app, mocker): assert r2.contribs[0].creator_id == c1.ident assert r2.contribs[0].creator.display_name is None assert r2.contribs[0].creator.state == "deleted" - rv = app.get('/release/{}'.format(r2.ident)) + rv = app.get("/release/{}".format(r2.ident)) assert rv.status_code == 302 - rv = app.get('/file/{}'.format(f2.ident)) + rv = app.get("/file/{}".format(f2.ident)) assert rv.status_code == 200 # delete release @@ -130,9 +132,9 @@ def test_relation_states(api, app, mocker): assert r2.container is None assert r2.files is None assert r2.contribs is None - rv = app.get('/release/{}'.format(r2.ident)) - assert rv.status_code == 200 # TODO: HTTP Gone? - rv = app.get('/file/{}'.format(f2.ident)) + rv = app.get("/release/{}".format(r2.ident)) + assert rv.status_code == 200 # TODO: HTTP Gone? + rv = app.get("/file/{}".format(f2.ident)) print(rv.data) assert rv.status_code == 200 @@ -140,7 +142,7 @@ def test_relation_states(api, app, mocker): def test_app_entity_states(api, app, mocker): # these are basic ES stats for the container view pages - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(ES_CONTAINER_STATS_RESP)), (200, {}, json.dumps(ES_CONTAINER_RANDOM_RESP)), @@ -175,30 +177,32 @@ def test_app_entity_states(api, app, mocker): api.update_container(eg.editgroup_id, j2.ident, ContainerEntity(redirect=j1.ident)) api.update_creator(eg.editgroup_id, c2.ident, CreatorEntity(redirect=c1.ident)) api.update_file(eg.editgroup_id, f2.ident, FileEntity(redirect=f1.ident)) - api.update_release(eg.editgroup_id, r2.ident, ReleaseEntity(redirect=r1.ident, ext_ids=ReleaseExtIds())) + api.update_release( + eg.editgroup_id, r2.ident, ReleaseEntity(redirect=r1.ident, ext_ids=ReleaseExtIds()) + ) api.update_work(eg.editgroup_id, r2.work_id, WorkEntity(redirect=r1.work_id)) api.accept_editgroup(eg.editgroup_id) # all entities - rv = app.get('/container/{}'.format(j1.ident)) + rv = app.get("/container/{}".format(j1.ident)) assert rv.status_code == 200 - rv = app.get('/container/{}'.format(j2.ident)) + rv = app.get("/container/{}".format(j2.ident)) assert rv.status_code == 302 - rv = app.get('/creator/{}'.format(c1.ident)) + rv = app.get("/creator/{}".format(c1.ident)) assert rv.status_code == 200 - rv = app.get('/creator/{}'.format(c2.ident)) + rv = app.get("/creator/{}".format(c2.ident)) assert rv.status_code == 302 - rv = app.get('/file/{}'.format(f1.ident)) + rv = app.get("/file/{}".format(f1.ident)) assert rv.status_code == 200 - rv = app.get('/file/{}'.format(f2.ident)) + rv = app.get("/file/{}".format(f2.ident)) assert rv.status_code == 302 - rv = app.get('/release/{}'.format(r1.ident)) + rv = app.get("/release/{}".format(r1.ident)) assert rv.status_code == 200 - rv = app.get('/release/{}'.format(r2.ident)) + rv = app.get("/release/{}".format(r2.ident)) assert rv.status_code == 302 - rv = app.get('/work/{}'.format(r1.work_id)) + rv = app.get("/work/{}".format(r1.work_id)) assert rv.status_code == 200 - rv = app.get('/work/{}'.format(r2.work_id)) + rv = app.get("/work/{}".format(r2.work_id)) assert rv.status_code == 302 # delete targets @@ -211,23 +215,23 @@ def test_app_entity_states(api, app, mocker): api.accept_editgroup(eg.editgroup_id) # all entities - rv = app.get('/container/{}'.format(j1.ident)) + rv = app.get("/container/{}".format(j1.ident)) assert rv.status_code == 200 - rv = app.get('/container/{}'.format(j2.ident)) + rv = app.get("/container/{}".format(j2.ident)) assert rv.status_code == 302 - rv = app.get('/creator/{}'.format(c1.ident)) + rv = app.get("/creator/{}".format(c1.ident)) assert rv.status_code == 200 - rv = app.get('/creator/{}'.format(c2.ident)) + rv = app.get("/creator/{}".format(c2.ident)) assert rv.status_code == 302 - rv = app.get('/file/{}'.format(f1.ident)) + rv = app.get("/file/{}".format(f1.ident)) assert rv.status_code == 200 - rv = app.get('/file/{}'.format(f2.ident)) + rv = app.get("/file/{}".format(f2.ident)) assert rv.status_code == 302 - rv = app.get('/release/{}'.format(r1.ident)) + rv = app.get("/release/{}".format(r1.ident)) assert rv.status_code == 200 - rv = app.get('/release/{}'.format(r2.ident)) + rv = app.get("/release/{}".format(r2.ident)) assert rv.status_code == 302 - rv = app.get('/work/{}'.format(r1.work_id)) + rv = app.get("/work/{}".format(r1.work_id)) assert rv.status_code == 200 - rv = app.get('/work/{}'.format(r2.work_id)) + rv = app.get("/work/{}".format(r2.work_id)) assert rv.status_code == 302 diff --git a/python/tests/tools_api.py b/python/tests/tools_api.py index 6d514210..0de43f9b 100644 --- a/python/tests/tools_api.py +++ b/python/tests/tools_api.py @@ -1,4 +1,3 @@ - import pytest from fatcat_openapi_client.rest import ApiException @@ -10,6 +9,7 @@ def test_authenticated_api(): api.get_changelog() api.auth_check() + def test_public_api(): api = public_api("http://localhost:9411/v0") api.get_changelog() diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py index 83266669..2bf584ba 100644 --- a/python/tests/transform_csl.py +++ b/python/tests/transform_csl.py @@ -1,4 +1,3 @@ - import json import pytest @@ -10,15 +9,15 @@ from fatcat_tools.transforms import citeproc_csl, entity_from_json, release_to_c def test_csl_crossref(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: + with open("tests/files/crossref-works.single.json", "r") as f: # not a single line raw = json.loads(f.read()) r = crossref_importer.parse_record(raw) csl = release_to_csl(r) - citeproc_csl(csl, 'csl-json') - citeproc_csl(csl, 'bibtex') - citeproc_csl(csl, 'harvard1') - citeproc_csl(csl, 'harvard1', html=True) + citeproc_csl(csl, "csl-json") + citeproc_csl(csl, "bibtex") + citeproc_csl(csl, "harvard1") + citeproc_csl(csl, "harvard1", html=True) # check that with no author surnames, can't run for c in r.contribs: @@ -28,26 +27,30 @@ def test_csl_crossref(crossref_importer): release_to_csl(r) with pytest.raises(ValueError): csl = release_to_csl(r) - citeproc_csl(csl, 'csl-json') + citeproc_csl(csl, "csl-json") + def test_csl_pubmed(crossref_importer): - with open('tests/files/example_releases_pubmed19n0972.json', 'r') as f: + with open("tests/files/example_releases_pubmed19n0972.json", "r") as f: # multiple single lines for line in f: r = entity_from_json(line, ReleaseEntity) csl = release_to_csl(r) - citeproc_csl(csl, 'csl-json') - citeproc_csl(csl, 'bibtex') - citeproc_csl(csl, 'harvard1') - citeproc_csl(csl, 'harvard1', html=True) + citeproc_csl(csl, "csl-json") + citeproc_csl(csl, "bibtex") + citeproc_csl(csl, "harvard1") + citeproc_csl(csl, "harvard1", html=True) + def test_csl_pubmed_bibtex(crossref_importer): - with open('tests/files/example_releases_pubmed19n0972.json', 'r') as f: + with open("tests/files/example_releases_pubmed19n0972.json", "r") as f: r = entity_from_json(f.readline(), ReleaseEntity) csl = release_to_csl(r) - print(citeproc_csl(csl, 'bibtex')) + print(citeproc_csl(csl, "bibtex")) # TODO: what's with the '`' in volume? - assert citeproc_csl(csl, 'bibtex').strip() == """ + assert ( + citeproc_csl(csl, "bibtex").strip() + == """ @article{mędrela-kuder_szymura_2018, title={Selected anti-health behaviours among women with osteoporosis}, volume={69`}, @@ -60,6 +63,10 @@ def test_csl_pubmed_bibtex(crossref_importer): year={2018} } """.strip() - assert citeproc_csl(csl, 'harvard1', html=True).strip() == """ + ) + assert ( + citeproc_csl(csl, "harvard1", html=True).strip() + == """ Mędrela-Kuder and Szymura (2018) ‘Selected anti-health behaviours among women with osteoporosis’, <i>Roczniki Panstwowego Zakladu Higieny</i>, 69`(4). doi: 10.32394/rpzh.2018.0046. """.strip() + ) diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index cee37867..082a4e99 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -24,13 +24,14 @@ from fatcat_tools.transforms import ( def test_basic_elasticsearch_convert(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: + with open("tests/files/crossref-works.single.json", "r") as f: # not a single line raw = json.loads(f.read()) r = crossref_importer.parse_record(raw) - r.state = 'active' + r.state = "active" release_to_elasticsearch(r) + def test_rich_elasticsearch_convert(): r = ReleaseEntity( title="something", @@ -42,7 +43,7 @@ def test_rich_elasticsearch_convert(): ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"), ], ) - r.state = 'active' + r.state = "active" r.container = ContainerEntity( name="dummy journal", extra={ @@ -63,117 +64,132 @@ def test_rich_elasticsearch_convert(): "doaj": {"as_of": "2010-02-03"}, }, ) - r.files = [FileEntity( - mimetype="application/pdf", - urls=[ - FileUrl(rel="dweb", url="dat://a954329dlk/thingie"), - FileUrl(rel="webarchive", url="https://web.archive.org/web/20001122030405/http://example.com"), - FileUrl(rel="web", url="https://archive.org/details/blah/file.pdf"), - ], - extra={ - "shadows": {}, - }, - )] + r.files = [ + FileEntity( + mimetype="application/pdf", + urls=[ + FileUrl(rel="dweb", url="dat://a954329dlk/thingie"), + FileUrl( + rel="webarchive", + url="https://web.archive.org/web/20001122030405/http://example.com", + ), + FileUrl(rel="web", url="https://archive.org/details/blah/file.pdf"), + ], + extra={ + "shadows": {}, + }, + ) + ] es = release_to_elasticsearch(r) - assert es['release_year'] == r.release_year - assert es['file_count'] == 1 - assert es['fileset_count'] == 0 - assert es['webcapture_count'] == 0 - assert es['ref_count'] == 2 - assert es['ref_linked_count'] == 1 - - assert es['preservation'] == "bright" - assert es['is_oa'] is True - assert es['is_longtail_oa'] is False - assert es['is_preserved'] is True - assert es['in_web'] is True - assert es['in_dweb'] is True - assert es['in_ia'] is True - assert es['in_ia_sim'] is False - assert es['in_kbart'] is True - assert es['in_jstor'] is True + assert es["release_year"] == r.release_year + assert es["file_count"] == 1 + assert es["fileset_count"] == 0 + assert es["webcapture_count"] == 0 + assert es["ref_count"] == 2 + assert es["ref_linked_count"] == 1 + + assert es["preservation"] == "bright" + assert es["is_oa"] is True + assert es["is_longtail_oa"] is False + assert es["is_preserved"] is True + assert es["in_web"] is True + assert es["in_dweb"] is True + assert es["in_ia"] is True + assert es["in_ia_sim"] is False + assert es["in_kbart"] is True + assert es["in_jstor"] is True + def test_elasticsearch_release_from_json(): - r = entity_from_json(open('./tests/files/release_etodop5banbndg3faecnfm6ozi.json', 'r').read(), ReleaseEntity) + r = entity_from_json( + open("./tests/files/release_etodop5banbndg3faecnfm6ozi.json", "r").read(), ReleaseEntity + ) es = release_to_elasticsearch(r) - assert es['subtitle'] == "Correpondence" - assert es['ident'] == "etodop5banbndg3faecnfm6ozi" - assert es['container_name'] == "BJOG: an International Journal of Obstetrics and Gynaecology" - assert es['first_page'] == "1404" - assert es['issue'] == "11" - assert es['volume'] == "118" - assert es['number'] is None - - assert es['preservation'] == "dark" - assert es['is_oa'] is False - assert es['is_longtail_oa'] is False - assert es['is_preserved'] is True - assert es['in_web'] is False - assert es['in_dweb'] is False - assert es['in_ia'] is False - assert es['in_ia_sim'] is True - assert es['in_kbart'] is True - assert es['in_jstor'] is False + assert es["subtitle"] == "Correpondence" + assert es["ident"] == "etodop5banbndg3faecnfm6ozi" + assert ( + es["container_name"] == "BJOG: an International Journal of Obstetrics and Gynaecology" + ) + assert es["first_page"] == "1404" + assert es["issue"] == "11" + assert es["volume"] == "118" + assert es["number"] is None + + assert es["preservation"] == "dark" + assert es["is_oa"] is False + assert es["is_longtail_oa"] is False + assert es["is_preserved"] is True + assert es["in_web"] is False + assert es["in_dweb"] is False + assert es["in_ia"] is False + assert es["in_ia_sim"] is True + assert es["in_kbart"] is True + assert es["in_jstor"] is False # this release has a fileset, and no file - r = entity_from_json(open('./tests/files/release_3mssw2qnlnblbk7oqyv2dafgey.json', 'r').read(), ReleaseEntity) + r = entity_from_json( + open("./tests/files/release_3mssw2qnlnblbk7oqyv2dafgey.json", "r").read(), ReleaseEntity + ) es = release_to_elasticsearch(r) - assert es['title'] == "Jakobshavn Glacier Bed Elevation" - assert es['ident'] == "3mssw2qnlnblbk7oqyv2dafgey" - assert es['file_count'] == 0 - assert es['fileset_count'] == 1 - assert es['webcapture_count'] == 0 - - assert es['preservation'] == "dark" - assert es['is_oa'] is True - assert es['is_longtail_oa'] is False - assert es['is_preserved'] is True - assert es['in_web'] is True - assert es['in_dweb'] is True - assert es['in_ia'] is False - assert es['in_ia_sim'] is False - assert es['in_kbart'] is False - assert es['in_jstor'] is False + assert es["title"] == "Jakobshavn Glacier Bed Elevation" + assert es["ident"] == "3mssw2qnlnblbk7oqyv2dafgey" + assert es["file_count"] == 0 + assert es["fileset_count"] == 1 + assert es["webcapture_count"] == 0 + + assert es["preservation"] == "dark" + assert es["is_oa"] is True + assert es["is_longtail_oa"] is False + assert es["is_preserved"] is True + assert es["in_web"] is True + assert es["in_dweb"] is True + assert es["in_ia"] is False + assert es["in_ia_sim"] is False + assert es["in_kbart"] is False + assert es["in_jstor"] is False # this release has a web capture, and no file (edited the JSON to remove file) - r = entity_from_json(open('./tests/files/release_mjtqtuyhwfdr7j2c3l36uor7uy.json', 'r').read(), ReleaseEntity) + r = entity_from_json( + open("./tests/files/release_mjtqtuyhwfdr7j2c3l36uor7uy.json", "r").read(), ReleaseEntity + ) es = release_to_elasticsearch(r) - assert es['title'] == "Rethinking Personal Digital Archiving, Part 1" - assert es['ident'] == "mjtqtuyhwfdr7j2c3l36uor7uy" - assert es['file_count'] == 0 - assert es['fileset_count'] == 0 - assert es['webcapture_count'] == 1 - - assert es['preservation'] == "bright" - assert es['is_oa'] is True - assert es['is_longtail_oa'] is False - assert es['is_preserved'] is True - assert es['in_web'] is True - assert es['in_dweb'] is False - assert es['in_ia'] is True - assert es['in_ia_sim'] is False - assert es['in_kbart'] is False - assert es['in_jstor'] is False + assert es["title"] == "Rethinking Personal Digital Archiving, Part 1" + assert es["ident"] == "mjtqtuyhwfdr7j2c3l36uor7uy" + assert es["file_count"] == 0 + assert es["fileset_count"] == 0 + assert es["webcapture_count"] == 1 + + assert es["preservation"] == "bright" + assert es["is_oa"] is True + assert es["is_longtail_oa"] is False + assert es["is_preserved"] is True + assert es["in_web"] is True + assert es["in_dweb"] is False + assert es["in_ia"] is True + assert es["in_ia_sim"] is False + assert es["in_kbart"] is False + assert es["in_jstor"] is False + def test_elasticsearch_container_transform(journal_metadata_importer): - with open('tests/files/journal_metadata.sample.json', 'r') as f: + with open("tests/files/journal_metadata.sample.json", "r") as f: raw1 = json.loads(f.readline()) raw2 = json.loads(f.readline()) c1 = journal_metadata_importer.parse_record(raw1) - c1.state = 'active' + c1.state = "active" c2 = journal_metadata_importer.parse_record(raw2) - c2.state = 'active' + c2.state = "active" - c1.extra['publisher_type'] = "big5" - c1.extra['discipline'] = "history" + c1.extra["publisher_type"] = "big5" + c1.extra["discipline"] = "history" es = container_to_elasticsearch(c1) - assert es['publisher'] == c1.publisher - assert es['discipline'] == c1.extra['discipline'] - assert es['publisher_type'] == c1.extra['publisher_type'] - assert es['keepers'] == [] + assert es["publisher"] == c1.publisher + assert es["discipline"] == c1.extra["discipline"] + assert es["publisher_type"] == c1.extra["publisher_type"] + assert es["keepers"] == [] stats = { "ident": "en4qj5ijrbf5djxx7p5zzpjyoq", @@ -186,71 +202,70 @@ def test_elasticsearch_container_transform(journal_metadata_importer): "dark": 1635, "none": 0, "shadows_only": 0, - "total": 11136 + "total": 11136, }, - "release_type": { - "_unknown": 9, - "article-journal": 11124, - "editorial": 2, - "letter": 1 - }, - "total": 11136 + "release_type": {"_unknown": 9, "article-journal": 11124, "editorial": 2, "letter": 1}, + "total": 11136, } es = container_to_elasticsearch(c2, stats=stats) - assert es['name'] == c2.name - assert es['publisher'] == c2.publisher - assert es['keepers'] == list(c2.extra['kbart'].keys()) == ["portico"] - assert es['any_kbart'] is True + assert es["name"] == c2.name + assert es["publisher"] == c2.publisher + assert es["keepers"] == list(c2.extra["kbart"].keys()) == ["portico"] + assert es["any_kbart"] is True def test_elasticsearch_file_transform(): - with open('./tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json', 'r') as f: + with open("./tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json", "r") as f: json_str = f.read() fe = entity_from_json(json_str, FileEntity) - fe.state = 'active' + fe.state = "active" es = file_to_elasticsearch(fe) # pylint infers type of 'fe' incorrectly for some reason (as str/bytes) - assert es['sha1'] == fe.sha1 # pylint: disable=no-member - assert es['sha256'] == fe.sha256 # pylint: disable=no-member - assert es['md5'] == fe.md5 # pylint: disable=no-member - assert es['size_bytes'] == fe.size # pylint: disable=no-member - assert es['mimetype'] == fe.mimetype # pylint: disable=no-member - assert es['in_ia'] is True - - assert 'web' in es['rels'] - assert 'www.zhros.ru' in es['hosts'] - assert 'zhros.ru' in es['domains'] - assert 'archive.org' in (es['hosts'] + es['domains']) - assert 'web.archive.org' in (es['hosts'] + es['domains']) + assert es["sha1"] == fe.sha1 # pylint: disable=no-member + assert es["sha256"] == fe.sha256 # pylint: disable=no-member + assert es["md5"] == fe.md5 # pylint: disable=no-member + assert es["size_bytes"] == fe.size # pylint: disable=no-member + assert es["mimetype"] == fe.mimetype # pylint: disable=no-member + assert es["in_ia"] is True + + assert "web" in es["rels"] + assert "www.zhros.ru" in es["hosts"] + assert "zhros.ru" in es["domains"] + assert "archive.org" in (es["hosts"] + es["domains"]) + assert "web.archive.org" in (es["hosts"] + es["domains"]) # old regression - assert '.archive.org' not in (es['hosts'] + es['domains']) + assert ".archive.org" not in (es["hosts"] + es["domains"]) + def test_elasticsearch_changelog_transform(): - ce = entity_from_json(open('./tests/files/changelog_3469683.json', 'r').read(), ChangelogEntry) + ce = entity_from_json( + open("./tests/files/changelog_3469683.json", "r").read(), ChangelogEntry + ) es = changelog_to_elasticsearch(ce) - assert es['index'] == 3469683 + assert es["index"] == 3469683 # len("2020-01-30T05:04:39") => 19 - assert es['timestamp'][:19] == "2020-01-30T05:04:39.738601Z"[:19] - assert es['editor_id'] == "scmbogxw25evtcesfcab5qaboa" - assert es['username'] == "crawl-bot" - assert es['is_bot'] is True - assert es['is_admin'] is True - assert es['agent'] == "fatcat_tools.IngestFileResultImporter" - - assert es['total'] == 50 - assert es['files'] == 50 - assert es['new_files'] == 50 - assert es['created'] == 50 - - assert es['releases'] == 0 - assert es['new_releases'] == 0 - assert es['updated'] == 0 - assert es['deleted'] == 0 + assert es["timestamp"][:19] == "2020-01-30T05:04:39.738601Z"[:19] + assert es["editor_id"] == "scmbogxw25evtcesfcab5qaboa" + assert es["username"] == "crawl-bot" + assert es["is_bot"] is True + assert es["is_admin"] is True + assert es["agent"] == "fatcat_tools.IngestFileResultImporter" + + assert es["total"] == 50 + assert es["files"] == 50 + assert es["new_files"] == 50 + assert es["created"] == 50 + + assert es["releases"] == 0 + assert es["new_releases"] == 0 + assert es["updated"] == 0 + assert es["deleted"] == 0 + def test_elasticsearch_release_kbart_year(): this_year = datetime.date.today().year @@ -264,7 +279,7 @@ def test_elasticsearch_release_kbart_year(): ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"), ], ) - r.state = 'active' + r.state = "active" r.container = ContainerEntity( name="dummy journal", extra={ @@ -276,18 +291,18 @@ def test_elasticsearch_release_kbart_year(): }, ) es = release_to_elasticsearch(r) - assert es['release_year'] == this_year - - assert es['preservation'] == "none" - assert es['is_oa'] is True - assert es['is_longtail_oa'] is False - assert es['is_preserved'] is False - assert es['in_web'] is False - assert es['in_dweb'] is False - assert es['in_ia'] is False - assert es['in_ia_sim'] is False - assert es['in_kbart'] is False - assert es['in_jstor'] is False + assert es["release_year"] == this_year + + assert es["preservation"] == "none" + assert es["is_oa"] is True + assert es["is_longtail_oa"] is False + assert es["is_preserved"] is False + assert es["in_web"] is False + assert es["in_dweb"] is False + assert es["in_ia"] is False + assert es["in_ia_sim"] is False + assert es["in_kbart"] is False + assert es["in_jstor"] is False r.container = ContainerEntity( name="dummy journal", @@ -300,15 +315,15 @@ def test_elasticsearch_release_kbart_year(): }, ) es = release_to_elasticsearch(r) - assert es['release_year'] == this_year - - assert es['preservation'] == "dark" - assert es['is_oa'] is True - assert es['is_longtail_oa'] is False - assert es['is_preserved'] is True - assert es['in_web'] is False - assert es['in_dweb'] is False - assert es['in_ia'] is False - assert es['in_ia_sim'] is False - assert es['in_kbart'] is True - assert es['in_jstor'] is False + assert es["release_year"] == this_year + + assert es["preservation"] == "dark" + assert es["is_oa"] is True + assert es["is_longtail_oa"] is False + assert es["is_preserved"] is True + assert es["in_web"] is False + assert es["in_dweb"] is False + assert es["in_ia"] is False + assert es["in_ia_sim"] is False + assert es["in_kbart"] is True + assert es["in_jstor"] is False diff --git a/python/tests/transform_ingest.py b/python/tests/transform_ingest.py index 6a358c8e..4949c109 100644 --- a/python/tests/transform_ingest.py +++ b/python/tests/transform_ingest.py @@ -1,4 +1,3 @@ - import json from fatcat_openapi_client import * @@ -9,14 +8,15 @@ from fatcat_tools.transforms import release_ingest_request def test_basic_ingest_release(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: + with open("tests/files/crossref-works.single.json", "r") as f: # not a single line raw = json.loads(f.read()) r = crossref_importer.parse_record(raw) - r.state = 'active' + r.state = "active" req = release_ingest_request(r) assert req is not None + def test_rich_ingest_release(): r = ReleaseEntity( title="something", @@ -29,7 +29,7 @@ def test_rich_ingest_release(): ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"), ], ) - r.state = 'active' + r.state = "active" r.container = ContainerEntity( name="dummy journal", extra={ @@ -52,12 +52,12 @@ def test_rich_ingest_release(): ) ir = release_ingest_request(r) assert ir is not None - assert ir['base_url'] == 'https://doi.org/10.123/456' - assert ir['ext_ids']['doi'] == '10.123/456' - assert ir['ext_ids'].get('pmcid') is None - assert ir['ingest_type'] == 'pdf' + assert ir["base_url"] == "https://doi.org/10.123/456" + assert ir["ext_ids"]["doi"] == "10.123/456" + assert ir["ext_ids"].get("pmcid") is None + assert ir["ingest_type"] == "pdf" # check ingest type ("d-lib") r.container_id = "ugbiirfvufgcjkx33r3cmemcuu" ir = release_ingest_request(r) - assert ir['ingest_type'] == 'html' + assert ir["ingest_type"] == "html" diff --git a/python/tests/transform_toml.py b/python/tests/transform_toml.py index 29da1d38..56bf32a1 100644 --- a/python/tests/transform_toml.py +++ b/python/tests/transform_toml.py @@ -1,4 +1,3 @@ - import json from fatcat_openapi_client import ReleaseEntity @@ -9,15 +8,15 @@ from fatcat_tools.transforms import entity_from_toml, entity_to_toml def test_basic_toml(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: + with open("tests/files/crossref-works.single.json", "r") as f: # not a single line raw = json.loads(f.read()) r = crossref_importer.parse_record(raw) - r.state = 'active' + r.state = "active" toml_str = entity_to_toml(r) r2 = entity_from_toml(toml_str, ReleaseEntity) assert r == r2 - toml_str = entity_to_toml(r, pop_fields=['ident', 'revision', 'blah', 'extra']) + toml_str = entity_to_toml(r, pop_fields=["ident", "revision", "blah", "extra"]) r3 = entity_from_toml(toml_str, ReleaseEntity) assert r != r3 diff --git a/python/tests/web_auth.py b/python/tests/web_auth.py index 643d806e..1238275e 100644 --- a/python/tests/web_auth.py +++ b/python/tests/web_auth.py @@ -1,4 +1,3 @@ - import responses from fixtures import * @@ -9,53 +8,74 @@ def test_ia_xauth_fail(full_app): # failed login with full_app.test_client() as app: - rv = app.get('/auth/ia/login') + rv = app.get("/auth/ia/login") assert rv.status_code == 200 - responses.add(responses.POST, full_app.config['IA_XAUTH_URI'] + "?op=authenticate", - status=401, json=dict(success=False)) - rv = app.post('/auth/ia/login', follow_redirects=True, - data=dict(email="abcd@example.com", password="god")) + responses.add( + responses.POST, + full_app.config["IA_XAUTH_URI"] + "?op=authenticate", + status=401, + json=dict(success=False), + ) + rv = app.post( + "/auth/ia/login", + follow_redirects=True, + data=dict(email="abcd@example.com", password="god"), + ) assert rv.status_code == 401 - rv = app.get('/auth/account', follow_redirects=False) + rv = app.get("/auth/account", follow_redirects=False) assert rv.status_code == 302 + @responses.activate def test_ia_xauth(full_app): # successful login with full_app.test_client() as app: - rv = app.get('/auth/token_login') + rv = app.get("/auth/token_login") assert rv.status_code == 200 - responses.add(responses.POST, full_app.config['IA_XAUTH_URI'] + "?op=authenticate", - status=200, json={'success': True}) - responses.add(responses.POST, full_app.config['IA_XAUTH_URI'] + "?op=info", - status=200, json={ - 'success': True, - 'values': {'screenname': "user123", - 'itemname': "user_item123"}}) - rv = app.post('/auth/ia/login', follow_redirects=True, - data=dict(email="abcd@example.com", password="god")) + responses.add( + responses.POST, + full_app.config["IA_XAUTH_URI"] + "?op=authenticate", + status=200, + json={"success": True}, + ) + responses.add( + responses.POST, + full_app.config["IA_XAUTH_URI"] + "?op=info", + status=200, + json={ + "success": True, + "values": {"screenname": "user123", "itemname": "user_item123"}, + }, + ) + rv = app.post( + "/auth/ia/login", + follow_redirects=True, + data=dict(email="abcd@example.com", password="god"), + ) assert rv.status_code == 200 - rv = app.get('/auth/account', follow_redirects=False) + rv = app.get("/auth/account", follow_redirects=False) assert rv.status_code == 200 + def test_basic_auth_views(app): - rv = app.get('/auth/login') + rv = app.get("/auth/login") assert rv.status_code == 200 - rv = app.get('/auth/logout') + rv = app.get("/auth/logout") assert rv.status_code == 200 + def test_auth_token(app_admin): - rv = app_admin.get('/auth/account', follow_redirects=False) + rv = app_admin.get("/auth/account", follow_redirects=False) assert rv.status_code == 200 - rv = app_admin.post('/auth/create_token', follow_redirects=False) + rv = app_admin.post("/auth/create_token", follow_redirects=False) assert rv.status_code == 200 diff --git a/python/tests/web_citation_csl.py b/python/tests/web_citation_csl.py index 508600e7..e3dbec2b 100644 --- a/python/tests/web_citation_csl.py +++ b/python/tests/web_citation_csl.py @@ -1,4 +1,3 @@ - import json import pytest @@ -8,33 +7,39 @@ from fixtures import * def test_release_bibtex(app, api): # "realistic" demo entity - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaam") assert rv.status_code == 200 - assert b'BibTeX' in rv.data - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam.bib') + assert b"BibTeX" in rv.data + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaam.bib") assert rv.status_code == 200 - assert b'@article{' in rv.data - rv = app.get('/release/ccccccccccccccccccccccccca.bib') + assert b"@article{" in rv.data + rv = app.get("/release/ccccccccccccccccccccccccca.bib") assert rv.status_code == 404 - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=bibtex') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=bibtex") assert rv.status_code == 200 - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=csl-json') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=csl-json") assert rv.status_code == 200 # could also rv.get_json() here - json.loads(rv.data.decode('utf-8')) - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=modern-language-association') + json.loads(rv.data.decode("utf-8")) + rv = app.get( + "/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=modern-language-association" + ) assert rv.status_code == 200 - assert rv.data.decode('utf-8').startswith('Ioannidis, J.. Why Most Published Research Findings Are False') + assert rv.data.decode("utf-8").startswith( + "Ioannidis, J.. Why Most Published Research Findings Are False" + ) # "dummy" demo entity; very minimal metadata - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai") assert rv.status_code == 200 - assert b'BibTeX' in rv.data - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai.bib') + assert b"BibTeX" in rv.data + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai.bib") assert rv.status_code == 200 - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=modern-language-association') + rv = app.get( + "/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=modern-language-association" + ) assert rv.status_code == 200 - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=csl-json') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=csl-json") assert rv.status_code == 200 # create release which can not have citeproc run on it (no authors) @@ -46,28 +51,26 @@ def test_release_bibtex(app, api): r1edit = api.create_release(eg.editgroup_id, r1) api.accept_editgroup(eg.editgroup_id) - rv = app.get('/release/{}'.format(r1edit.ident)) + rv = app.get("/release/{}".format(r1edit.ident)) assert rv.status_code == 200 - assert b'BibTeX' not in rv.data + assert b"BibTeX" not in rv.data with pytest.raises(ValueError): - rv = app.get('/release/{}.bib'.format(r1edit.ident)) + rv = app.get("/release/{}.bib".format(r1edit.ident)) # create release can have citeproc run on it (no authors) eg = quick_eg(api) r2 = ReleaseEntity( title="some title again", contribs=[ - ReleaseContrib( - given_name="Paul", - surname="Otlet"), + ReleaseContrib(given_name="Paul", surname="Otlet"), ], ext_ids=ReleaseExtIds(), ) r2edit = api.create_release(eg.editgroup_id, r2) api.accept_editgroup(eg.editgroup_id) - rv = app.get('/release/{}'.format(r2edit.ident)) + rv = app.get("/release/{}".format(r2edit.ident)) assert rv.status_code == 200 - assert b'BibTeX' in rv.data - rv = app.get('/release/{}.bib'.format(r2edit.ident)) + assert b"BibTeX" in rv.data + rv = app.get("/release/{}.bib".format(r2edit.ident)) assert rv.status_code == 200 diff --git a/python/tests/web_coverage.py b/python/tests/web_coverage.py index 0dbf3df8..7e84f11e 100644 --- a/python/tests/web_coverage.py +++ b/python/tests/web_coverage.py @@ -1,4 +1,3 @@ - import datetime import json @@ -11,56 +10,59 @@ def test_container_coverage(app, mocker): # preservation by type histogram elastic_resp1 = { - 'took': 294, - 'timed_out': False, - '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, - 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []}, - 'aggregations': { - 'type_preservation': { - 'buckets': [ - {'key': {'release_type': 'article-journal', 'preservation': 'bright'}, 'doc_count': 444}, - {'key': {'release_type': 'book', 'preservation': 'dark'}, 'doc_count': 111}, - ], - 'sum_other_doc_count': 0, + "took": 294, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 4327, "max_score": 0.0, "hits": []}, + "aggregations": { + "type_preservation": { + "buckets": [ + { + "key": {"release_type": "article-journal", "preservation": "bright"}, + "doc_count": 444, + }, + {"key": {"release_type": "book", "preservation": "dark"}, "doc_count": 111}, + ], + "sum_other_doc_count": 0, }, }, } # preservation by year histogram elastic_resp2 = { - 'took': 294, - 'timed_out': False, - '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, - 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []}, - 'aggregations': { - 'year_preservation': { - 'buckets': [ - {'key': {'year': 2004.0, 'preservation': 'bright'}, 'doc_count': 444}, - {'key': {'year': 2005.0, 'preservation': 'dark'}, 'doc_count': 111}, - ], - 'sum_other_doc_count': 0, + "took": 294, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 4327, "max_score": 0.0, "hits": []}, + "aggregations": { + "year_preservation": { + "buckets": [ + {"key": {"year": 2004.0, "preservation": "bright"}, "doc_count": 444}, + {"key": {"year": 2005.0, "preservation": "dark"}, "doc_count": 111}, + ], + "sum_other_doc_count": 0, }, }, } # preservation by volume histogram elastic_resp3 = { - 'took': 294, - 'timed_out': False, - '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, - 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []}, - 'aggregations': { - 'volume_preservation': { - 'buckets': [ - {'key': {'volume': "12", 'preservation': 'bright'}, 'doc_count': 444}, - {'key': {'volume': "12", 'preservation': 'dark'}, 'doc_count': 111}, - ], - 'sum_other_doc_count': 0, + "took": 294, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 4327, "max_score": 0.0, "hits": []}, + "aggregations": { + "volume_preservation": { + "buckets": [ + {"key": {"volume": "12", "preservation": "bright"}, "doc_count": 444}, + {"key": {"volume": "12", "preservation": "dark"}, "doc_count": 111}, + ], + "sum_other_doc_count": 0, }, }, } - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ # status (200, {}, json.dumps(ES_CONTAINER_STATS_RESP)), @@ -68,23 +70,23 @@ def test_container_coverage(app, mocker): (200, {}, json.dumps(elastic_resp1)), ] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/coverage') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/coverage") assert rv.status_code == 200 es_raw.side_effect = [(200, {}, json.dumps(elastic_resp2))] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_year.svg') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_year.svg") assert rv.status_code == 200 es_raw.side_effect = [(200, {}, json.dumps(elastic_resp2))] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_year.json') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_year.json") assert rv.status_code == 200 es_raw.side_effect = [(200, {}, json.dumps(elastic_resp3))] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_volume.svg') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_volume.svg") assert rv.status_code == 200 es_raw.side_effect = [(200, {}, json.dumps(elastic_resp3))] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_volume.json') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_volume.json") assert rv.status_code == 200 @@ -92,34 +94,37 @@ def test_coverage_search(app, mocker): # preservation by year histogram elastic_resp1 = { - 'took': 294, - 'timed_out': False, - '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, - 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []}, - 'aggregations': { - 'year_preservation': { - 'buckets': [ - {'key': {'year': 2004.0, 'preservation': 'bright'}, 'doc_count': 444}, - {'key': {'year': 2005.0, 'preservation': 'dark'}, 'doc_count': 111}, - ], - 'sum_other_doc_count': 0, + "took": 294, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 4327, "max_score": 0.0, "hits": []}, + "aggregations": { + "year_preservation": { + "buckets": [ + {"key": {"year": 2004.0, "preservation": "bright"}, "doc_count": 444}, + {"key": {"year": 2005.0, "preservation": "dark"}, "doc_count": 111}, + ], + "sum_other_doc_count": 0, }, }, } # preservation by type histogram elastic_resp2 = { - 'took': 294, - 'timed_out': False, - '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, - 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []}, - 'aggregations': { - 'type_preservation': { - 'buckets': [ - {'key': {'release_type': 'article-journal', 'preservation': 'bright'}, 'doc_count': 444}, - {'key': {'release_type': 'book', 'preservation': 'dark'}, 'doc_count': 111}, - ], - 'sum_other_doc_count': 0, + "took": 294, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 4327, "max_score": 0.0, "hits": []}, + "aggregations": { + "type_preservation": { + "buckets": [ + { + "key": {"release_type": "article-journal", "preservation": "bright"}, + "doc_count": 444, + }, + {"key": {"release_type": "book", "preservation": "dark"}, "doc_count": 111}, + ], + "sum_other_doc_count": 0, }, }, } @@ -127,22 +132,28 @@ def test_coverage_search(app, mocker): # preservation by date histogram today = str(datetime.date.today()) elastic_resp3 = { - 'took': 294, - 'timed_out': False, - '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, - 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []}, - 'aggregations': { - 'date_preservation': { - 'buckets': [ - {'key': {'date': f'{today}T00:00.000Z', 'preservation': 'bright'}, 'doc_count': 444}, - {'key': {'date': f'{today}T00:00.000Z', 'preservation': 'dark'}, 'doc_count': 111}, - ], - 'sum_other_doc_count': 0, + "took": 294, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 4327, "max_score": 0.0, "hits": []}, + "aggregations": { + "date_preservation": { + "buckets": [ + { + "key": {"date": f"{today}T00:00.000Z", "preservation": "bright"}, + "doc_count": 444, + }, + { + "key": {"date": f"{today}T00:00.000Z", "preservation": "dark"}, + "doc_count": 111, + }, + ], + "sum_other_doc_count": 0, }, }, } - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ # counts summary (200, {}, json.dumps(ES_CONTAINER_STATS_RESP)), @@ -152,7 +163,7 @@ def test_coverage_search(app, mocker): (200, {}, json.dumps(elastic_resp1)), ] - rv = app.get('/coverage/search?q=*') + rv = app.get("/coverage/search?q=*") assert rv.status_code == 200 es_raw.side_effect = [ @@ -164,7 +175,7 @@ def test_coverage_search(app, mocker): (200, {}, json.dumps(elastic_resp3)), ] - rv = app.get('/coverage/search?recent=1&q=*') + rv = app.get("/coverage/search?recent=1&q=*") assert rv.status_code == 200 @@ -172,61 +183,62 @@ def test_legacy_container_coverage(app, mocker): # legacy preservation by year elastic_resp1 = { - 'took': 294, - 'timed_out': False, - '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, - 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []}, - 'aggregations': { - 'year_in_ia': { - 'after_key': {'year': 2020.0, 'in_ia': True}, - 'buckets': [ - {'key': {'year': 2004.0, 'in_ia': False}, 'doc_count': 4}, - {'key': {'year': 2004.0, 'in_ia': True}, 'doc_count': 68}, - {'key': {'year': 2005.0, 'in_ia': False}, 'doc_count': 26}, - {'key': {'year': 2005.0, 'in_ia': True}, 'doc_count': 428}, - {'key': {'year': 2006.0, 'in_ia': False}, 'doc_count': 14}, - {'key': {'year': 2006.0, 'in_ia': True}, 'doc_count': 487}, - {'key': {'year': 2007.0, 'in_ia': False}, 'doc_count': 13}, - {'key': {'year': 2007.0, 'in_ia': True}, 'doc_count': 345}, + "took": 294, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 4327, "max_score": 0.0, "hits": []}, + "aggregations": { + "year_in_ia": { + "after_key": {"year": 2020.0, "in_ia": True}, + "buckets": [ + {"key": {"year": 2004.0, "in_ia": False}, "doc_count": 4}, + {"key": {"year": 2004.0, "in_ia": True}, "doc_count": 68}, + {"key": {"year": 2005.0, "in_ia": False}, "doc_count": 26}, + {"key": {"year": 2005.0, "in_ia": True}, "doc_count": 428}, + {"key": {"year": 2006.0, "in_ia": False}, "doc_count": 14}, + {"key": {"year": 2006.0, "in_ia": True}, "doc_count": 487}, + {"key": {"year": 2007.0, "in_ia": False}, "doc_count": 13}, + {"key": {"year": 2007.0, "in_ia": True}, "doc_count": 345}, ], }, }, } - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp1)), ] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.json') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.json") assert rv.status_code == 200 es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp1)), ] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.svg') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.svg") assert rv.status_code == 200 def test_coverage_empty_years(app, mocker): elastic_resp = { - 'took': 294, - 'timed_out': False, - '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, - 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []}, - 'aggregations': {'year_in_ia': { - 'after_key': {'year': 2020.0, 'in_ia': True}, - 'buckets': [ - ], - }}, + "took": 294, + "timed_out": False, + "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0}, + "hits": {"total": 4327, "max_score": 0.0, "hits": []}, + "aggregations": { + "year_in_ia": { + "after_key": {"year": 2020.0, "in_ia": True}, + "buckets": [], + } + }, } - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp)), ] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.svg') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.svg") assert rv.status_code == 200 diff --git a/python/tests/web_editgroup.py b/python/tests/web_editgroup.py index 20dc8d93..62a5df2e 100644 --- a/python/tests/web_editgroup.py +++ b/python/tests/web_editgroup.py @@ -1,59 +1,60 @@ - from fixtures import * def test_editgroup_basics(app): - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaae') + rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaae") assert rv.status_code == 200 - rv = app.get('/editgroup/ccccccccccccccccccccccccca') + rv = app.get("/editgroup/ccccccccccccccccccccccccca") assert rv.status_code == 404 - rv = app.get('/editor/aaaaaaaaaaaabkvkaaaaaaaaae') + rv = app.get("/editor/aaaaaaaaaaaabkvkaaaaaaaaae") assert rv.status_code == 200 - rv = app.get('/editor/ccccccccccccccccccccccccca') + rv = app.get("/editor/ccccccccccccccccccccccccca") assert rv.status_code == 404 - rv = app.get('/editor/aaaaaaaaaaaabkvkaaaaaaaaae/editgroups') + rv = app.get("/editor/aaaaaaaaaaaabkvkaaaaaaaaae/editgroups") assert rv.status_code == 200 - rv = app.get('/editor/ccccccccccccccccccccccccca/editgroups') + rv = app.get("/editor/ccccccccccccccccccccccccca/editgroups") assert rv.status_code == 404 - rv = app.get('/changelog') + rv = app.get("/changelog") assert rv.status_code == 200 - rv = app.get('/changelog/1') + rv = app.get("/changelog/1") assert rv.status_code == 200 - rv = app.get('/changelog/1.3') + rv = app.get("/changelog/1.3") assert rv.status_code == 404 - rv = app.get('/changelog/9999999') + rv = app.get("/changelog/9999999") assert rv.status_code == 404 - rv = app.get('/reviewable') + rv = app.get("/reviewable") assert rv.status_code == 200 + def test_editgroup_annotations(app): - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaa4') + rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaa4") assert rv.status_code == 200 - assert b'updated with changes, please re-review' in rv.data - assert b'concerns about this edit...' in rv.data - assert b'Admin' in rv.data - assert b'demo-user' in rv.data - assert b'claire' in rv.data - assert b'Signup' in rv.data - assert b'Add Comment' not in rv.data + assert b"updated with changes, please re-review" in rv.data + assert b"concerns about this edit..." in rv.data + assert b"Admin" in rv.data + assert b"demo-user" in rv.data + assert b"claire" in rv.data + assert b"Signup" in rv.data + assert b"Add Comment" not in rv.data - rv = app.get('/editor/aaaaaaaaaaaabkvkaaaaaaaaaq/annotations') + rv = app.get("/editor/aaaaaaaaaaaabkvkaaaaaaaaaq/annotations") assert rv.status_code == 200 - assert b'updated with changes, please re-review' not in rv.data - assert b'concerns about this edit...' in rv.data - assert b'Admin' not in rv.data - assert b'claire' in rv.data - assert b'aaaaaaaaaaaabo53aaaaaaaaa4' in rv.data + assert b"updated with changes, please re-review" not in rv.data + assert b"concerns about this edit..." in rv.data + assert b"Admin" not in rv.data + assert b"claire" in rv.data + assert b"aaaaaaaaaaaabo53aaaaaaaaa4" in rv.data + def test_editgroup_annotations_login(app_admin): # if logged in, should see form - rv = app_admin.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaa4') + rv = app_admin.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaa4") assert rv.status_code == 200 - assert b'Signup' not in rv.data - assert b'Add Comment' in rv.data + assert b"Signup" not in rv.data + assert b"Add Comment" in rv.data diff --git a/python/tests/web_editing.py b/python/tests/web_editing.py index fb8b3f93..cbde095d 100644 --- a/python/tests/web_editing.py +++ b/python/tests/web_editing.py @@ -1,4 +1,3 @@ - from fixtures import * @@ -6,87 +5,97 @@ def test_web_release_create_accept(app_admin, api): eg = quick_eg(api) - rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id)) + rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id)) assert rv.status_code == 200 - assert b'Release Edits (0)' in rv.data + assert b"Release Edits (0)" in rv.data # bogus/bad submit - rv = app_admin.post('/release/create', + rv = app_admin.post( + "/release/create", data={ - 'editgroup_id': eg.editgroup_id, - 'release_type': 'badmojo', - 'release_stage': 'published', - 'title': 'something bogus', + "editgroup_id": eg.editgroup_id, + "release_type": "badmojo", + "release_stage": "published", + "title": "something bogus", }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 400 # Does not insert bad choices into drop-down - #assert b'badmojo' in rv.data - assert b'Not a valid choice' in rv.data + # assert b'badmojo' in rv.data + assert b"Not a valid choice" in rv.data # bad wikidata QID - rv = app_admin.post('/release/create', + rv = app_admin.post( + "/release/create", data={ - 'editgroup_id': eg.editgroup_id, - 'release_type': 'article-journal', - 'release_stage': 'published', - 'title': 'something bogus', - 'wikidata_qid': '884', + "editgroup_id": eg.editgroup_id, + "release_type": "article-journal", + "release_stage": "published", + "title": "something bogus", + "wikidata_qid": "884", }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 400 # ok/valid submit - rv = app_admin.post('/release/create', + rv = app_admin.post( + "/release/create", data={ - 'editgroup_id': eg.editgroup_id, - 'release_type': 'article-journal', - 'release_stage': 'published', - 'title': 'something bogus', - 'doi': '10.1234/999999', + "editgroup_id": eg.editgroup_id, + "release_type": "article-journal", + "release_stage": "published", + "title": "something bogus", + "doi": "10.1234/999999", }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 200 - assert b'10.1234/999999' in rv.data + assert b"10.1234/999999" in rv.data - rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id)) + rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id)) assert rv.status_code == 200 - assert b'Release Edits (1)' in rv.data + assert b"Release Edits (1)" in rv.data - rv = app_admin.post('/editgroup/{}/submit'.format(eg.editgroup_id), + rv = app_admin.post( + "/editgroup/{}/submit".format(eg.editgroup_id), follow_redirects=True, ) assert rv.status_code == 200 - rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id)) + rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id)) assert rv.status_code == 200 - assert b'Submitted' in rv.data - assert b'None!' in rv.data + assert b"Submitted" in rv.data + assert b"None!" in rv.data - rv = app_admin.post('/editgroup/{}/unsubmit'.format(eg.editgroup_id), + rv = app_admin.post( + "/editgroup/{}/unsubmit".format(eg.editgroup_id), follow_redirects=True, ) assert rv.status_code == 200 - rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id)) + rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id)) assert rv.status_code == 200 - rv = app_admin.post('/editgroup/{}/annotation'.format(eg.editgroup_id), + rv = app_admin.post( + "/editgroup/{}/annotation".format(eg.editgroup_id), data={ - 'comment_markdown': "This is an **example** of markdown in a test annotation", + "comment_markdown": "This is an **example** of markdown in a test annotation", }, follow_redirects=True, ) assert rv.status_code == 200 - rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id)) + rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id)) assert rv.status_code == 200 - assert b'<p>This is an <strong>example</strong> of markdown in a test annotation' in rv.data + assert b"<p>This is an <strong>example</strong> of markdown in a test annotation" in rv.data - rv = app_admin.post('/editgroup/{}/accept'.format(eg.editgroup_id), + rv = app_admin.post( + "/editgroup/{}/accept".format(eg.editgroup_id), follow_redirects=True, ) assert rv.status_code == 200 - rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id)) + rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id)) assert rv.status_code == 200 - assert b'Merged' in rv.data + assert b"Merged" in rv.data def test_web_container_create(app_admin, api): @@ -94,20 +103,24 @@ def test_web_container_create(app_admin, api): eg = quick_eg(api) # bogus/bad submit - rv = app_admin.post('/container/create', + rv = app_admin.post( + "/container/create", data={ - 'editgroup_id': eg.editgroup_id, + "editgroup_id": eg.editgroup_id, }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 400 # ok/valid submit - rv = app_admin.post('/container/create', + rv = app_admin.post( + "/container/create", data={ - 'editgroup_id': eg.editgroup_id, - 'name': 'blah blah journal blah', + "editgroup_id": eg.editgroup_id, + "name": "blah blah journal blah", }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 200 @@ -116,144 +129,167 @@ def test_web_file_create(app_admin, api): eg = quick_eg(api) # bogus/bad submit - rv = app_admin.post('/file/create', + rv = app_admin.post( + "/file/create", data={ - 'editgroup_id': eg.editgroup_id, + "editgroup_id": eg.editgroup_id, }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 400 # ok/valid submit - rv = app_admin.post('/file/create', + rv = app_admin.post( + "/file/create", data={ - 'editgroup_id': eg.editgroup_id, - 'size': '12345', - 'sha1': '45be56a396c4d03faaa41e055170c23534dec736', + "editgroup_id": eg.editgroup_id, + "size": "12345", + "sha1": "45be56a396c4d03faaa41e055170c23534dec736", }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 200 # upper-case SHA-1 - rv = app_admin.post('/file/create', + rv = app_admin.post( + "/file/create", data={ - 'editgroup_id': eg.editgroup_id, - 'size': '12345', - 'sha1': '45BE56A396C4D03FAAA41E055170C23534DEC736', + "editgroup_id": eg.editgroup_id, + "size": "12345", + "sha1": "45BE56A396C4D03FAAA41E055170C23534DEC736", }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 200 + def test_web_file_toml_create(app_admin, api): eg = quick_eg(api) # bogus/bad submit - rv = app_admin.post('/file/create/toml', + rv = app_admin.post( + "/file/create/toml", data={ - 'editgroup_id': eg.editgroup_id, + "editgroup_id": eg.editgroup_id, }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 400 # ok/valid submit - rv = app_admin.post('/file/create/toml', + rv = app_admin.post( + "/file/create/toml", data={ - 'editgroup_id': eg.editgroup_id, - 'toml': """ + "editgroup_id": eg.editgroup_id, + "toml": """ size = 12345 sha1 = "45be56a396c4d03faaa41e055170c23534dec736" """, }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 200 # upper-case SHA-1 - rv = app_admin.post('/file/create/toml', + rv = app_admin.post( + "/file/create/toml", data={ - 'editgroup_id': eg.editgroup_id, - 'toml': """ + "editgroup_id": eg.editgroup_id, + "toml": """ size = 12345 sha1 = "45BE56A396C4D03FAAA41E055170C23534DEC736" """, }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 400 + def test_web_file_delete(app_admin, api): eg = quick_eg(api) - rv = app_admin.get('/file/aaaaaaaaaaaaamztaaaaaaaaam/delete') + rv = app_admin.get("/file/aaaaaaaaaaaaamztaaaaaaaaam/delete") assert rv.status_code == 200 - rv = app_admin.post('/file/aaaaaaaaaaaaamztaaaaaaaaam/delete', + rv = app_admin.post( + "/file/aaaaaaaaaaaaamztaaaaaaaaam/delete", data={ - 'editgroup_id': eg.editgroup_id, + "editgroup_id": eg.editgroup_id, }, - follow_redirects=True) + follow_redirects=True, + ) assert rv.status_code == 200 # NOTE: did not *accept* the deletion edit + DUMMY_DEMO_ENTITIES = { - 'container': 'aaaaaaaaaaaaaeiraaaaaaaaam', - 'creator': 'aaaaaaaaaaaaaircaaaaaaaaaq', - 'file': 'aaaaaaaaaaaaamztaaaaaaaaam', - 'fileset': 'aaaaaaaaaaaaaztgaaaaaaaaai', - 'webcapture': 'aaaaaaaaaaaaa53xaaaaaaaaai', - 'release': 'aaaaaaaaaaaaarceaaaaaaaaai', - 'work': 'aaaaaaaaaaaaavkvaaaaaaaaai', + "container": "aaaaaaaaaaaaaeiraaaaaaaaam", + "creator": "aaaaaaaaaaaaaircaaaaaaaaaq", + "file": "aaaaaaaaaaaaamztaaaaaaaaam", + "fileset": "aaaaaaaaaaaaaztgaaaaaaaaai", + "webcapture": "aaaaaaaaaaaaa53xaaaaaaaaai", + "release": "aaaaaaaaaaaaarceaaaaaaaaai", + "work": "aaaaaaaaaaaaavkvaaaaaaaaai", } + def test_web_edit_get(app_admin): # these are all existing entities - for entity_type in ['release', 'file', 'container']: - rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit') + for entity_type in ["release", "file", "container"]: + rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit") assert rv.status_code == 200 - if entity_type == 'release': - assert b'A bigger example' in rv.data - elif entity_type == 'file': - assert b'ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362' in rv.data - elif entity_type == 'container': - assert b'1549-1277' in rv.data - - rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit/toml') + if entity_type == "release": + assert b"A bigger example" in rv.data + elif entity_type == "file": + assert ( + b"ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362" in rv.data + ) + elif entity_type == "container": + assert b"1549-1277" in rv.data + + rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit/toml") assert rv.status_code == 200 - if entity_type == 'release': - assert b'A bigger example' in rv.data - elif entity_type == 'file': - assert b'ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362' in rv.data - elif entity_type == 'container': - assert b'1549-1277' in rv.data + if entity_type == "release": + assert b"A bigger example" in rv.data + elif entity_type == "file": + assert ( + b"ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362" in rv.data + ) + elif entity_type == "container": + assert b"1549-1277" in rv.data # TOML-only endpoints - for entity_type in ['creator', 'fileset', 'webcapture', 'work']: - rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit') + for entity_type in ["creator", "fileset", "webcapture", "work"]: + rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit") assert rv.status_code == 302 - rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit/toml') + rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit/toml") assert rv.status_code == 200 def test_web_create_get(app_admin): - for entity_type in ['release', 'file', 'container']: - rv = app_admin.get(f'/{entity_type}/create') + for entity_type in ["release", "file", "container"]: + rv = app_admin.get(f"/{entity_type}/create") assert rv.status_code == 200 - rv = app_admin.get(f'/{entity_type}/create/toml') + rv = app_admin.get(f"/{entity_type}/create/toml") assert rv.status_code == 200 # these are TOML only - for entity_type in ['creator', 'fileset', 'webcapture', 'work']: - rv = app_admin.get(f'/{entity_type}/create') + for entity_type in ["creator", "fileset", "webcapture", "work"]: + rv = app_admin.get(f"/{entity_type}/create") assert rv.status_code == 302 - rv = app_admin.get(f'/{entity_type}/create/toml') + rv = app_admin.get(f"/{entity_type}/create/toml") assert rv.status_code == 200 + def test_web_edit_delete(app_admin): for entity_type in DUMMY_DEMO_ENTITIES.keys(): - rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/delete') + rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/delete") assert rv.status_code == 200 diff --git a/python/tests/web_editor.py b/python/tests/web_editor.py index 0d0679bb..b7c249ba 100644 --- a/python/tests/web_editor.py +++ b/python/tests/web_editor.py @@ -1,32 +1,35 @@ - from fixtures import * def test_change_username(app_admin): # these tests aren't supposed to mutate database - rv = app_admin.post('/auth/change_username', data={'username': 'admin-tmp'}, - follow_redirects=True) + rv = app_admin.post( + "/auth/change_username", data={"username": "admin-tmp"}, follow_redirects=True + ) assert rv.status_code == 200 - rv = app_admin.get('/auth/account') - assert b'admin-tmp' in rv.data + rv = app_admin.get("/auth/account") + assert b"admin-tmp" in rv.data - rv = app_admin.post('/auth/change_username', data={'username': 'claire'}, - follow_redirects=True) + rv = app_admin.post( + "/auth/change_username", data={"username": "claire"}, follow_redirects=True + ) assert rv.status_code == 400 - rv = app_admin.get('/auth/account') - assert b'admin-tmp' in rv.data + rv = app_admin.get("/auth/account") + assert b"admin-tmp" in rv.data - rv = app_admin.post('/auth/change_username', data={'username': 'admin'}, - follow_redirects=True) + rv = app_admin.post( + "/auth/change_username", data={"username": "admin"}, follow_redirects=True + ) assert rv.status_code == 200 - rv = app_admin.get('/auth/account') - assert b'admin-tmp' not in rv.data + rv = app_admin.get("/auth/account") + assert b"admin-tmp" not in rv.data + def test_username_redirect(app_admin): - rv = app_admin.get('/u/admin') + rv = app_admin.get("/u/admin") assert rv.status_code == 302 - rv = app_admin.get('/u/bogus-not-registered') + rv = app_admin.get("/u/bogus-not-registered") assert rv.status_code == 404 diff --git a/python/tests/web_entity_views.py b/python/tests/web_entity_views.py index 0d51f87e..4a3a89f8 100644 --- a/python/tests/web_entity_views.py +++ b/python/tests/web_entity_views.py @@ -1,4 +1,3 @@ - import json from fixtures import * @@ -6,30 +5,30 @@ from fixtures import * from fatcat_web.forms import ContainerEntityForm, FileEntityForm, ReleaseEntityForm DUMMY_DEMO_ENTITIES = { - 'container': ('aaaaaaaaaaaaaeiraaaaaaaaai', '00000000-0000-0000-1111-fff000000002'), + "container": ("aaaaaaaaaaaaaeiraaaaaaaaai", "00000000-0000-0000-1111-fff000000002"), # note inconsistency here (q not i) - 'creator': ('aaaaaaaaaaaaaircaaaaaaaaaq', '00000000-0000-0000-2222-fff000000002'), - 'file': ('aaaaaaaaaaaaamztaaaaaaaaai', '00000000-0000-0000-3333-fff000000002'), - 'fileset': ('aaaaaaaaaaaaaztgaaaaaaaaai', '00000000-0000-0000-6666-fff000000002'), - 'webcapture': ('aaaaaaaaaaaaa53xaaaaaaaaai', '00000000-0000-0000-7777-fff000000002'), - 'release': ('aaaaaaaaaaaaarceaaaaaaaaai', '00000000-0000-0000-4444-fff000000002'), - 'work': ('aaaaaaaaaaaaavkvaaaaaaaaai', '00000000-0000-0000-5555-fff000000002'), + "creator": ("aaaaaaaaaaaaaircaaaaaaaaaq", "00000000-0000-0000-2222-fff000000002"), + "file": ("aaaaaaaaaaaaamztaaaaaaaaai", "00000000-0000-0000-3333-fff000000002"), + "fileset": ("aaaaaaaaaaaaaztgaaaaaaaaai", "00000000-0000-0000-6666-fff000000002"), + "webcapture": ("aaaaaaaaaaaaa53xaaaaaaaaai", "00000000-0000-0000-7777-fff000000002"), + "release": ("aaaaaaaaaaaaarceaaaaaaaaai", "00000000-0000-0000-4444-fff000000002"), + "work": ("aaaaaaaaaaaaavkvaaaaaaaaai", "00000000-0000-0000-5555-fff000000002"), } REALISTIC_DEMO_ENTITIES = { - 'container': 'aaaaaaaaaaaaaeiraaaaaaaaam', - 'creator': 'aaaaaaaaaaaaaircaaaaaaaaam', - 'file': 'aaaaaaaaaaaaamztaaaaaaaaam', - 'fileset': 'aaaaaaaaaaaaaztgaaaaaaaaam', - 'webcapture': 'aaaaaaaaaaaaa53xaaaaaaaaam', - 'release': 'aaaaaaaaaaaaarceaaaaaaaaam', - 'work': 'aaaaaaaaaaaaavkvaaaaaaaaam', + "container": "aaaaaaaaaaaaaeiraaaaaaaaam", + "creator": "aaaaaaaaaaaaaircaaaaaaaaam", + "file": "aaaaaaaaaaaaamztaaaaaaaaam", + "fileset": "aaaaaaaaaaaaaztgaaaaaaaaam", + "webcapture": "aaaaaaaaaaaaa53xaaaaaaaaam", + "release": "aaaaaaaaaaaaarceaaaaaaaaam", + "work": "aaaaaaaaaaaaavkvaaaaaaaaam", } def test_entity_basics(app, mocker): - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") # these are basic ES stats for the container view pages es_raw.side_effect = [ (200, {}, json.dumps(ES_CONTAINER_STATS_RESP)), @@ -38,40 +37,43 @@ def test_entity_basics(app, mocker): for entity_type, (ident, revision) in DUMMY_DEMO_ENTITIES.items(): # good requests - rv = app.get('/{}/{}'.format(entity_type, ident)) + rv = app.get("/{}/{}".format(entity_type, ident)) assert rv.status_code == 200 - rv = app.get('/{}_{}'.format(entity_type, ident)) + rv = app.get("/{}_{}".format(entity_type, ident)) assert rv.status_code == 302 - rv = app.get('/{}/{}/history'.format(entity_type, ident)) + rv = app.get("/{}/{}/history".format(entity_type, ident)) assert rv.status_code == 200 - rv = app.get('/{}/{}/metadata'.format(entity_type, ident)) + rv = app.get("/{}/{}/metadata".format(entity_type, ident)) assert rv.status_code == 200 - rv = app.get('/{}/rev/{}'.format(entity_type, revision)) + rv = app.get("/{}/rev/{}".format(entity_type, revision)) assert rv.status_code == 200 - rv = app.get('/{}/rev/{}_something'.format(entity_type, revision)) + rv = app.get("/{}/rev/{}_something".format(entity_type, revision)) assert rv.status_code == 404 - rv = app.get('/{}/rev/{}/metadata'.format(entity_type, revision)) + rv = app.get("/{}/rev/{}/metadata".format(entity_type, revision)) assert rv.status_code == 200 - print('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}'.format(entity_type, ident)) - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}'.format(entity_type, ident)) + print("/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}".format(entity_type, ident)) + rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}".format(entity_type, ident)) assert rv.status_code == 200 - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}/metadata'.format(entity_type, ident)) + rv = app.get( + "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}/metadata".format(entity_type, ident) + ) assert rv.status_code == 200 # bad requests - rv = app.get('/{}/9999999999'.format(entity_type)) + rv = app.get("/{}/9999999999".format(entity_type)) assert rv.status_code == 404 - rv = app.get('/{}/9999999999/history'.format(entity_type)) + rv = app.get("/{}/9999999999/history".format(entity_type)) assert rv.status_code == 404 - rv = app.get('/{}/f1f046a3-45c9-ffff-ffff-ffffffffffff'.format(entity_type)) + rv = app.get("/{}/f1f046a3-45c9-ffff-ffff-ffffffffffff".format(entity_type)) assert rv.status_code == 404 - rv = app.get('/{}/rev/f1f046a3-45c9-ffff-ffff-fffffffff'.format(entity_type)) + rv = app.get("/{}/rev/f1f046a3-45c9-ffff-ffff-fffffffff".format(entity_type)) assert rv.status_code == 404 - rv = app.get('/{}/ccccccccccccccccccccccccca'.format(entity_type)) + rv = app.get("/{}/ccccccccccccccccccccccccca".format(entity_type)) assert rv.status_code == 404 # TODO: redirects and deleted entities + def test_web_deleted_release(app, api): # specific regression test for view of a deleted release @@ -91,109 +93,113 @@ def test_web_deleted_release(app, api): r2 = api.get_release(r1edit.ident) assert r2.state == "deleted" - rv = app.get('/release/{}'.format(r2.ident)) + rv = app.get("/release/{}".format(r2.ident)) assert rv.status_code == 200 - rv = app.get('/release/{}/metadata'.format(r2.ident)) + rv = app.get("/release/{}/metadata".format(r2.ident)) assert rv.status_code == 200 - rv = app.get('/release/{}/history'.format(r2.ident)) + rv = app.get("/release/{}/history".format(r2.ident)) assert rv.status_code == 200 def test_lookups(app): - rv = app.get('/container/lookup') + rv = app.get("/container/lookup") assert rv.status_code == 200 - rv = app.get('/container/lookup?issnl=9999-9999') + rv = app.get("/container/lookup?issnl=9999-9999") assert rv.status_code == 404 - rv = app.get('/container/lookup?issnl=1234-5678') + rv = app.get("/container/lookup?issnl=1234-5678") assert rv.status_code == 302 - rv = app.get('/creator/lookup') + rv = app.get("/creator/lookup") assert rv.status_code == 200 - rv = app.get('/creator/lookup?orcid=0000-0003-2088-7465') + rv = app.get("/creator/lookup?orcid=0000-0003-2088-7465") assert rv.status_code == 302 - rv = app.get('/creator/lookup?orcid=0000-0003-2088-0000') + rv = app.get("/creator/lookup?orcid=0000-0003-2088-0000") assert rv.status_code == 404 - rv = app.get('/file/lookup') + rv = app.get("/file/lookup") assert rv.status_code == 200 - rv = app.get('/file/lookup?sha1=7d97e98f8af710c7e7fe703abc8f639e0ee507c4') + rv = app.get("/file/lookup?sha1=7d97e98f8af710c7e7fe703abc8f639e0ee507c4") assert rv.status_code == 302 - rv = app.get('/file/lookup?sha1=7d97e98f8af710c7e7f00000000000000ee507c4') + rv = app.get("/file/lookup?sha1=7d97e98f8af710c7e7f00000000000000ee507c4") assert rv.status_code == 404 - rv = app.get('/fileset/lookup') + rv = app.get("/fileset/lookup") assert rv.status_code == 404 - rv = app.get('/webcapture/lookup') + rv = app.get("/webcapture/lookup") assert rv.status_code == 404 - rv = app.get('/release/lookup') + rv = app.get("/release/lookup") assert rv.status_code == 200 - rv = app.get('/release/lookup?doi=10.123/abc') + rv = app.get("/release/lookup?doi=10.123/abc") assert rv.status_code == 302 - rv = app.get('/release/lookup?doi=10.123%2Fabc') + rv = app.get("/release/lookup?doi=10.123%2Fabc") assert rv.status_code == 302 - rv = app.get('/release/lookup?doi=abcde') + rv = app.get("/release/lookup?doi=abcde") assert rv.status_code == 400 - rv = app.get('/release/lookup?doi=10.1234/uuu') + rv = app.get("/release/lookup?doi=10.1234/uuu") assert rv.status_code == 404 - rv = app.get('/work/lookup') + rv = app.get("/work/lookup") assert rv.status_code == 404 def test_web_container(app, mocker): - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") # these are basic ES stats for the container view pages es_raw.side_effect = [ (200, {}, json.dumps(ES_CONTAINER_STATS_RESP)), (200, {}, json.dumps(ES_CONTAINER_RANDOM_RESP)), ] - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaai') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaai/metadata') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaai/metadata") assert rv.status_code == 200 - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit') + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit") assert rv.status_code == 302 - rv = app.get('/container/create') + rv = app.get("/container/create") assert rv.status_code == 302 - rv = app.get('/container/rev/00000000-0000-0000-1111-fff000000002') + rv = app.get("/container/rev/00000000-0000-0000-1111-fff000000002") assert rv.status_code == 200 - rv = app.get('/container/rev/00000000-0000-0000-1111-fff000000002/metadata') + rv = app.get("/container/rev/00000000-0000-0000-1111-fff000000002/metadata") assert rv.status_code == 200 - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai') + rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai/metadata') + rv = app.get( + "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai/metadata" + ) assert rv.status_code == 200 - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit') + rv = app.get( + "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit" + ) assert rv.status_code == 302 def test_web_container_login(full_app, app_admin): - rv = app_admin.get('/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit') + rv = app_admin.get("/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit") assert rv.status_code == 200 - assert b'Journal of Trivial Results' in rv.data - assert b'1234-5678' in rv.data - rv = app_admin.get('/container/create') + assert b"Journal of Trivial Results" in rv.data + assert b"1234-5678" in rv.data + rv = app_admin.get("/container/create") assert rv.status_code == 200 # creation (via form) with full_app.test_request_context(): form = ContainerEntityForm() form.issnl.data = "invalid-issn" - rv = app_admin.post('/container/create', data=form.data, follow_redirects=True) + rv = app_admin.post("/container/create", data=form.data, follow_redirects=True) assert rv.status_code == 400 - assert b'invalid-issn' in rv.data + assert b"invalid-issn" in rv.data with full_app.test_request_context(): form = ContainerEntityForm() # these fields are required form.name.data = "Journal of Experiments" - rv = app_admin.post('/container/create', data=form.data, follow_redirects=True) + rv = app_admin.post("/container/create", data=form.data, follow_redirects=True) assert rv.status_code == 200 assert b"Journal of Experiments" in rv.data @@ -201,139 +207,148 @@ def test_web_container_login(full_app, app_admin): with full_app.test_request_context(): form = ContainerEntityForm() form.issnl.data = "invalid-issn" - rv = app_admin.post('/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit', - data=form.data, follow_redirects=True) + rv = app_admin.post( + "/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit", data=form.data, follow_redirects=True + ) assert rv.status_code == 400 - assert b'invalid-issn' in rv.data + assert b"invalid-issn" in rv.data def test_web_creator(app): # not logged in - rv = app.get('/creator/aaaaaaaaaaaaaircaaaaaaaaai') + rv = app.get("/creator/aaaaaaaaaaaaaircaaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/creator/aaaaaaaaaaaaaircaaaaaaaaai/edit') + rv = app.get("/creator/aaaaaaaaaaaaaircaaaaaaaaai/edit") assert rv.status_code == 302 - rv = app.get('/creator/create') + rv = app.get("/creator/create") assert rv.status_code == 302 def test_web_file(app): # not logged in - rv = app.get('/file/aaaaaaaaaaaaamztaaaaaaaaai') + rv = app.get("/file/aaaaaaaaaaaaamztaaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/file/aaaaaaaaaaaaamztaaaaaaaaai/edit') + rv = app.get("/file/aaaaaaaaaaaaamztaaaaaaaaai/edit") assert rv.status_code == 302 - rv = app.get('/file/create') + rv = app.get("/file/create") assert rv.status_code == 302 def test_web_file_login(full_app, app_admin): - rv = app_admin.get('/file/aaaaaaaaaaaaamztaaaaaaaaai/edit') + rv = app_admin.get("/file/aaaaaaaaaaaaamztaaaaaaaaai/edit") assert rv.status_code == 200 - assert b'7d97e98f8af710c7e7fe703abc8f639e0ee507c4' in rv.data - assert b'archive.org/robots.txt' in rv.data - rv = app_admin.get('/file/create') + assert b"7d97e98f8af710c7e7fe703abc8f639e0ee507c4" in rv.data + assert b"archive.org/robots.txt" in rv.data + rv = app_admin.get("/file/create") assert rv.status_code == 200 # creation (via form) with full_app.test_request_context(): form = FileEntityForm() form.sha1.data = "invalidstring" - rv = app_admin.post('/file/create', data=form.data, follow_redirects=True) + rv = app_admin.post("/file/create", data=form.data, follow_redirects=True) assert rv.status_code == 400 - assert b'invalidstring' in rv.data + assert b"invalidstring" in rv.data with full_app.test_request_context(): form = FileEntityForm() # these fields are required form.size.data = 1234 form.sha1.data = "202f899638fcaa97128b968a43a8f45e00b69a25" - rv = app_admin.post('/file/create', data=form.data, follow_redirects=True) + rv = app_admin.post("/file/create", data=form.data, follow_redirects=True) assert rv.status_code == 200 # editing (via form) with full_app.test_request_context(): form = FileEntityForm() form.md5.data = "invalidstring" - rv = app_admin.post('/file/aaaaaaaaaaaaamztaaaaaaaaai/edit', - data=form.data, follow_redirects=True) + rv = app_admin.post( + "/file/aaaaaaaaaaaaamztaaaaaaaaai/edit", data=form.data, follow_redirects=True + ) assert rv.status_code == 400 - assert b'invalidstring' in rv.data + assert b"invalidstring" in rv.data + def test_web_fileset(app): # not logged in - rv = app.get('/fileset/aaaaaaaaaaaaaztgaaaaaaaaai') + rv = app.get("/fileset/aaaaaaaaaaaaaztgaaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/fileset/aaaaaaaaaaaaaztgaaaaaaaaai/edit') + rv = app.get("/fileset/aaaaaaaaaaaaaztgaaaaaaaaai/edit") assert rv.status_code == 302 - rv = app.get('/fileset/create') + rv = app.get("/fileset/create") assert rv.status_code == 302 def test_web_webcatpure(app): # not logged in - rv = app.get('/webcapture/aaaaaaaaaaaaa53xaaaaaaaaai') + rv = app.get("/webcapture/aaaaaaaaaaaaa53xaaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/webcapture/aaaaaaaaaaaaa53xaaaaaaaaai/edit') + rv = app.get("/webcapture/aaaaaaaaaaaaa53xaaaaaaaaai/edit") assert rv.status_code == 302 - rv = app.get('/webcapture/create') + rv = app.get("/webcapture/create") assert rv.status_code == 302 def test_web_release(app): # not logged in - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/contribs') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/contribs") assert rv.status_code == 200 - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/references') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/references") assert rv.status_code == 200 - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/metadata') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/metadata") assert rv.status_code == 200 - rv = app.get('/release/rev/00000000-0000-0000-4444-fff000000002/contribs') + rv = app.get("/release/rev/00000000-0000-0000-4444-fff000000002/contribs") assert rv.status_code == 200 - rv = app.get('/release/rev/00000000-0000-0000-4444-fff000000002/references') + rv = app.get("/release/rev/00000000-0000-0000-4444-fff000000002/references") assert rv.status_code == 200 - rv = app.get('/release/rev/00000000-0000-0000-4444-fff000000002/metadata') + rv = app.get("/release/rev/00000000-0000-0000-4444-fff000000002/metadata") assert rv.status_code == 200 - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai') + rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/contribs') + rv = app.get( + "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/contribs" + ) assert rv.status_code == 200 - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/references') + rv = app.get( + "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/references" + ) assert rv.status_code == 200 - rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/metadata') + rv = app.get( + "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/metadata" + ) assert rv.status_code == 200 - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/edit') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/edit") assert rv.status_code == 302 - rv = app.get('/release/create') + rv = app.get("/release/create") assert rv.status_code == 302 def test_web_release_login(full_app, app_admin): - rv = app_admin.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/edit') + rv = app_admin.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/edit") assert rv.status_code == 200 - assert b'robin hood' in rv.data - assert b'PMC555' in rv.data - rv = app_admin.get('/release/create') + assert b"robin hood" in rv.data + assert b"PMC555" in rv.data + rv = app_admin.get("/release/create") assert rv.status_code == 200 # creation (via form) with full_app.test_request_context(): form = ReleaseEntityForm() form.title.data = "My Research: Missing Some Stuff" - rv = app_admin.post('/release/create', data=form.data, follow_redirects=True) + rv = app_admin.post("/release/create", data=form.data, follow_redirects=True) assert rv.status_code == 400 - assert b'My Research: Missing Some Stuff' in rv.data - assert b'already' not in rv.data + assert b"My Research: Missing Some Stuff" in rv.data + assert b"already" not in rv.data with full_app.test_request_context(): form = ReleaseEntityForm() @@ -341,7 +356,7 @@ def test_web_release_login(full_app, app_admin): form.title.data = "Creating Releases: A Review" form.release_type.data = "article-journal" form.release_stage.data = "draft" - rv = app_admin.post('/release/create', data=form.data, follow_redirects=True) + rv = app_admin.post("/release/create", data=form.data, follow_redirects=True) assert rv.status_code == 200 with full_app.test_request_context(): @@ -352,33 +367,33 @@ def test_web_release_login(full_app, app_admin): form.release_stage.data = "draft" # already merged editgroup form.editgroup_id.data = "aaaaaaaaaaaabo53aaaaaaaaae" - rv = app_admin.post('/release/create', data=form.data, follow_redirects=True) + rv = app_admin.post("/release/create", data=form.data, follow_redirects=True) assert rv.status_code == 400 # XXX: this should return the page with error annotated, not generic # 400 page - #assert b"already accepted" in rv.data + # assert b"already accepted" in rv.data # editing with full_app.test_request_context(): form = ReleaseEntityForm() form.title.data = "My Research: Missing Some Stuff" form.release_type.data = "bogus-release-type" - rv = app_admin.post('/release/create', data=form.data, follow_redirects=True) + rv = app_admin.post("/release/create", data=form.data, follow_redirects=True) assert rv.status_code == 400 - assert b'My Research: Missing Some Stuff' in rv.data + assert b"My Research: Missing Some Stuff" in rv.data def test_web_search(app): - rv = app.get('/release/search') + rv = app.get("/release/search") assert rv.status_code == 200 def test_web_work(app): - rv = app.get('/work/aaaaaaaaaaaaavkvaaaaaaaaai') + rv = app.get("/work/aaaaaaaaaaaaavkvaaaaaaaaai") assert rv.status_code == 200 - rv = app.get('/work/aaaaaaaaaaaaavkvaaaaaaaaai/edit') + rv = app.get("/work/aaaaaaaaaaaaavkvaaaaaaaaai/edit") assert rv.status_code == 302 - rv = app.get('/work/create') + rv = app.get("/work/create") assert rv.status_code == 302 diff --git a/python/tests/web_refs.py b/python/tests/web_refs.py index 0ab69607..61966480 100644 --- a/python/tests/web_refs.py +++ b/python/tests/web_refs.py @@ -1,4 +1,3 @@ - import json import pytest @@ -10,14 +9,14 @@ from fatcat_web.search import get_elastic_container_random_releases def test_basic_refs(app, mocker): - with open('tests/files/elastic_refs_in_release.json') as f: + with open("tests/files/elastic_refs_in_release.json") as f: elastic_resp_in = json.loads(f.read()) - with open('tests/files/elastic_refs_out_release.json') as f: + with open("tests/files/elastic_refs_out_release.json") as f: elastic_resp_out = json.loads(f.read()) - with open('tests/files/elastic_empty.json') as f: + with open("tests/files/elastic_empty.json") as f: elastic_resp_empty = json.loads(f.read()) - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp_in)), (200, {}, json.dumps(elastic_resp_in)), @@ -28,27 +27,27 @@ def test_basic_refs(app, mocker): ] # render refs-in - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in") assert rv.status_code == 200 assert b"Why Most Published Research Findings Are False" in rv.data - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in.json') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in.json") assert rv.status_code == 200 # empty (in) - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in") assert rv.status_code == 200 assert b"No References Found" in rv.data # render refs-out - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out") assert rv.status_code == 200 assert b"Why Most Published Research Findings Are False" in rv.data - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out.json') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out.json") assert rv.status_code == 200 # empty (out) - rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out') + rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out") assert rv.status_code == 200 assert b"No References Found" in rv.data diff --git a/python/tests/web_routes.py b/python/tests/web_routes.py index 0edf06d1..3c4ad946 100644 --- a/python/tests/web_routes.py +++ b/python/tests/web_routes.py @@ -1,10 +1,8 @@ - from fixtures import * def test_static_routes(app): - for route in ('/health.json', '/robots.txt', '/', '/about', '/rfc', - '/static/fatcat.jpg'): + for route in ("/health.json", "/robots.txt", "/", "/about", "/rfc", "/static/fatcat.jpg"): rv = app.get(route) assert rv.status_code == 200 diff --git a/python/tests/web_search.py b/python/tests/web_search.py index fdcffdea..47cd4057 100644 --- a/python/tests/web_search.py +++ b/python/tests/web_search.py @@ -1,4 +1,3 @@ - import json import pytest @@ -10,68 +9,71 @@ from fatcat_web.search import get_elastic_container_random_releases def test_generic_search(app): - rv = app.get('/search?q=blood') + rv = app.get("/search?q=blood") assert rv.status_code == 302 assert "/release/search" in rv.location # file sha1sum - rv = app.get('/search?q=0262d5351e8e7a0af27af8ceaf7b4e581da085f2') + rv = app.get("/search?q=0262d5351e8e7a0af27af8ceaf7b4e581da085f2") assert rv.status_code == 302 assert "/file/lookup" in rv.location # PMCID - rv = app.get('/search?q=PMC12345') + rv = app.get("/search?q=PMC12345") assert rv.status_code == 302 assert "/release/lookup" in rv.location # ISSN - rv = app.get('/search?q=1234-5678') + rv = app.get("/search?q=1234-5678") assert rv.status_code == 302 assert "/container/lookup" in rv.location + def test_release_search(app, mocker): - rv = app.get('/release/search') + rv = app.get("/release/search") assert rv.status_code == 200 - with open('tests/files/elastic_release_search.json') as f: - elastic_resp=json.loads(f.read()) + with open("tests/files/elastic_release_search.json") as f: + elastic_resp = json.loads(f.read()) - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp)), ] - rv = app.get('/release/search?q=blood') + rv = app.get("/release/search?q=blood") assert rv.status_code == 200 assert b"Showing" in rv.data assert b"Quantum Studies of Acetylene Adsorption on Ice Surface" in rv.data + def test_container_search(app, mocker): - rv = app.get('/container/search') + rv = app.get("/container/search") assert rv.status_code == 200 - with open('tests/files/elastic_container_search.json') as f: - elastic_resp=json.loads(f.read()) + with open("tests/files/elastic_container_search.json") as f: + elastic_resp = json.loads(f.read()) - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp)), ] - rv = app.get('/container/search?q=blood') + rv = app.get("/container/search?q=blood") assert rv.status_code == 200 assert b"Showing" in rv.data assert b"European Instructional Course Lectures" in rv.data assert b"British Editorial Society of Bone and Joint Surger" in rv.data + def test_random_releases(app, mocker): - with open('tests/files/elastic_release_search.json') as f: - elastic_resp=json.loads(f.read()) + with open("tests/files/elastic_release_search.json") as f: + elastic_resp = json.loads(f.read()) - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp)), ] @@ -81,101 +83,120 @@ def test_random_releases(app, mocker): elastic_resp1 = { - 'timed_out': False, - 'aggregations': { - 'release_ref_count': {'value': 8031459}}, - 'hits': {'total': 80578584, 'hits': [], 'max_score': 0.0}, - '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, - 'took': 0 + "timed_out": False, + "aggregations": {"release_ref_count": {"value": 8031459}}, + "hits": {"total": 80578584, "hits": [], "max_score": 0.0}, + "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0}, + "took": 0, } elastic_resp2 = { - 'timed_out': False, - 'aggregations': { - 'paper_like': {'buckets': { - 'is_oa': {'doc_count': 8031459}, - 'in_kbart': {'doc_count': 51594200}, - 'in_web': {'doc_count': 10925092}, - 'in_web_not_kbart': {'doc_count': 5160359}}}}, - 'hits': {'total': 80578584, 'hits': [], 'max_score': 0.0}, - '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, - 'took': 0 + "timed_out": False, + "aggregations": { + "paper_like": { + "buckets": { + "is_oa": {"doc_count": 8031459}, + "in_kbart": {"doc_count": 51594200}, + "in_web": {"doc_count": 10925092}, + "in_web_not_kbart": {"doc_count": 5160359}, + } + } + }, + "hits": {"total": 80578584, "hits": [], "max_score": 0.0}, + "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0}, + "took": 0, } elastic_resp3 = { - 'timed_out': False, - 'hits': {'total': 80578584, 'hits': [], 'max_score': 0.0}, - '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, - 'took': 0 + "timed_out": False, + "hits": {"total": 80578584, "hits": [], "max_score": 0.0}, + "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0}, + "took": 0, } + def test_stats(app, mocker): - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp1)), (200, {}, json.dumps(elastic_resp2)), (200, {}, json.dumps(elastic_resp3)), ] - rv = app.get('/stats') + rv = app.get("/stats") assert rv.status_code == 200 assert b"80,578,584" in rv.data + def test_stats_json(app, mocker): - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp1)), (200, {}, json.dumps(elastic_resp2)), (200, {}, json.dumps(elastic_resp3)), ] - rv = app.get('/stats.json') + rv = app.get("/stats.json") assert rv.status_code == 200 - assert rv.json['papers']['in_kbart'] == 51594200 - assert rv.json['release']['refs_total'] == 8031459 + assert rv.json["papers"]["in_kbart"] == 51594200 + assert rv.json["release"]["refs_total"] == 8031459 + def test_container_stats(app, mocker): elastic_resp = { - 'timed_out': False, - 'aggregations': { - 'container_stats': {'buckets': { - 'is_preserved': {'doc_count': 461939}, - 'in_kbart': {'doc_count': 461939}, - 'in_web': {'doc_count': 2797}}}, - 'preservation': { - 'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0, - 'buckets': [{'key': 'bright', 'doc_count': 4143}, {'key': 'none', - 'doc_count': 101}, {'key': 'dark', 'doc_count': 79}, {'key': - 'shadows_only', 'doc_count': 5}]}, - 'release_type': { - 'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0, - 'buckets': [{'key': 'article-journal', 'doc_count': 4324}, {'key': - 'article', 'doc_count': 2}, {'key': '_unknown', 'doc_count': 1}, - {'key': 'editorial', 'doc_count': 1}]}, + "timed_out": False, + "aggregations": { + "container_stats": { + "buckets": { + "is_preserved": {"doc_count": 461939}, + "in_kbart": {"doc_count": 461939}, + "in_web": {"doc_count": 2797}, + } + }, + "preservation": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + {"key": "bright", "doc_count": 4143}, + {"key": "none", "doc_count": 101}, + {"key": "dark", "doc_count": 79}, + {"key": "shadows_only", "doc_count": 5}, + ], + }, + "release_type": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + {"key": "article-journal", "doc_count": 4324}, + {"key": "article", "doc_count": 2}, + {"key": "_unknown", "doc_count": 1}, + {"key": "editorial", "doc_count": 1}, + ], + }, }, - 'hits': {'total': 461939, 'hits': [], 'max_score': 0.0}, - '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0}, - 'took': 50 + "hits": {"total": 461939, "hits": [], "max_score": 0.0}, + "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0}, + "took": 50, } - es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request') + es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request") es_raw.side_effect = [ (200, {}, json.dumps(elastic_resp)), (200, {}, json.dumps(elastic_resp)), ] - rv = app.get('/container/issnl/1234-5678/stats.json') + rv = app.get("/container/issnl/1234-5678/stats.json") assert rv.status_code == 200 stats = rv.json - assert isinstance(stats['total'], int) - assert isinstance(stats['release_type'], dict) - assert isinstance(stats['preservation']['total'], int) - assert isinstance(stats['preservation']['bright'], int) - assert isinstance(stats['preservation']['dark'], int) - assert isinstance(stats['preservation']['none'], int) - - rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/stats.json') + assert isinstance(stats["total"], int) + assert isinstance(stats["release_type"], dict) + assert isinstance(stats["preservation"]["total"], int) + assert isinstance(stats["preservation"]["bright"], int) + assert isinstance(stats["preservation"]["dark"], int) + assert isinstance(stats["preservation"]["none"], int) + + rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/stats.json") assert rv.status_code == 200 stats = rv.json - assert isinstance(stats['total'], int) - assert stats['ident'] == "aaaaaaaaaaaaaeiraaaaaaaaam" + assert isinstance(stats["total"], int) + assert stats["ident"] == "aaaaaaaaaaaaaeiraaaaaaaaam" |