diff options
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/api_annotations.py | 39 | ||||
-rw-r--r-- | python/tests/api_containers.py | 48 | ||||
-rw-r--r-- | python/tests/api_creators.py | 44 | ||||
-rw-r--r-- | python/tests/api_editgroups.py | 140 | ||||
-rw-r--r-- | python/tests/api_files.py | 52 | ||||
-rw-r--r-- | python/tests/api_filesets.py | 79 | ||||
-rw-r--r-- | python/tests/api_misc.py | 8 | ||||
-rw-r--r-- | python/tests/api_releases.py | 103 | ||||
-rw-r--r-- | python/tests/api_webcaptures.py | 96 | ||||
-rw-r--r-- | python/tests/citation_efficiency.py | 113 | ||||
-rwxr-xr-x | python/tests/cli.sh | 2 | ||||
-rw-r--r-- | python/tests/files/crossref-works.single.json | 2 | ||||
-rw-r--r-- | python/tests/fixtures.py | 30 | ||||
-rw-r--r-- | python/tests/import_crossref.py | 47 | ||||
-rw-r--r-- | python/tests/import_grobid_metadata.py | 30 | ||||
-rw-r--r-- | python/tests/import_issn.py | 26 | ||||
-rw-r--r-- | python/tests/import_journal_metadata.py | 39 | ||||
-rw-r--r-- | python/tests/import_matched.py | 28 | ||||
-rw-r--r-- | python/tests/import_orcid.py | 33 | ||||
-rw-r--r-- | python/tests/importer.py | 34 | ||||
-rw-r--r-- | python/tests/transform_tests.py | 2 |
21 files changed, 870 insertions, 125 deletions
diff --git a/python/tests/api_annotations.py b/python/tests/api_annotations.py new file mode 100644 index 00000000..0d3c5046 --- /dev/null +++ b/python/tests/api_annotations.py @@ -0,0 +1,39 @@ + +import json +import pytest +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_annotations(api): + + eg = quick_eg(api) + + # ensure no annotations on this object + a = api.get_editgroup_annotations(eg.editgroup_id) + assert a == [] + + # create an annotation! + api.create_editgroup_annotation( + eg.editgroup_id, + EditgroupAnnotation( + comment_markdown="some *annotation*", + extra=dict(thing="thang"))) + + # check that we can fetch it all sorts of ways + a = api.get_editgroup_annotations(eg.editgroup_id) + assert len(a) == 1 + assert a[0].extra['thing'] == "thang" + + # the editor persists, so this is a hack to find a "recent" one + a2 = api.get_editor_annotations(eg.editor_id, limit=100) + found = None + for thing in a2: + if thing.annotation_id == a[0].annotation_id: + found = thing + break + assert thing + assert thing.extra['thing'] == "thang" diff --git a/python/tests/api_containers.py b/python/tests/api_containers.py new file mode 100644 index 00000000..674ae3b8 --- /dev/null +++ b/python/tests/api_containers.py @@ -0,0 +1,48 @@ + +import json +import pytest +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_container(api): + eg = quick_eg(api) + + # all the fields! + c1 = ContainerEntity( + name="some container name", + container_type="journal", + publisher="some container publisher", + issnl="1234-567X", + wikidata_qid="Q954248", + extra=dict(a=1, b=2), + ) + + c1edit = api.create_container(c1, editgroup_id=eg.editgroup_id) + api.accept_editgroup(eg.editgroup_id) + c2 = api.get_container(c1edit.ident) + + # check that fields match + assert c1.name == c2.name + assert c1.container_type == c2.container_type + assert c1.publisher == c2.publisher + assert c1.issnl == c2.issnl + assert c1.wikidata_qid == c2.wikidata_qid + assert c1.extra == c2.extra + + # expansion + # TODO: via release + # lookup + # TODO: via issnl; but need to generate random identifiers + +def test_container_examples(api): + + api.lookup_container(issnl='1549-1277') + + c1 = api.get_container('aaaaaaaaaaaaaeiraaaaaaaaam') + assert c1.name == "PLOS Medicine" + assert c1.issnl == "1549-1277" + diff --git a/python/tests/api_creators.py b/python/tests/api_creators.py new file mode 100644 index 00000000..7443675b --- /dev/null +++ b/python/tests/api_creators.py @@ -0,0 +1,44 @@ + +import json +import pytest +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_creators(api): + eg = quick_eg(api) + + # all the fields! + c1 = CreatorEntity( + display_name="Emma Smith", + given_name="emma", + surname="smith", + orcid="0000-0002-1825-0097", + wikidata_qid="Q9542248", + extra=dict(a=1, b=5), + ) + + c1edit = api.create_creator(c1, editgroup_id=eg.editgroup_id) + api.accept_editgroup(eg.editgroup_id) + c2 = api.get_creator(c1edit.ident) + + # check that fields match + assert c1.display_name == c2.display_name + assert c1.given_name == c2.given_name + assert c1.surname == c2.surname + assert c1.orcid == c2.orcid + assert c1.wikidata_qid == c2.wikidata_qid + assert c1.extra == c2.extra + + # expansion + # TODO: via release + # lookup + # TODO: via issnl; but need to generate random identifiers + +def test_creators_examples(api): + # TODO: aaaaaaaaaaaaaircaaaaaaaaam + + api.lookup_creator(orcid='0000-0003-3118-6859') diff --git a/python/tests/api_editgroups.py b/python/tests/api_editgroups.py new file mode 100644 index 00000000..722d8686 --- /dev/null +++ b/python/tests/api_editgroups.py @@ -0,0 +1,140 @@ + +import json +import pytest +import datetime +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_editgroup_submit(api): + # 1. check that edit group can be submitted/unsubmitted, and shows up in reviewable appropriately + # 2. accepted edits don't show up as reviewable and can't be submitted + + c1 = CreatorEntity(display_name="test updates") + eg = quick_eg(api) + c1 = api.get_creator(api.create_creator(c1, editgroup_id=eg.editgroup_id).ident) + + eg2 = api.get_editgroup(eg.editgroup_id) + assert not eg2.submitted + assert not eg2.changelog_index + + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id not in [v.editgroup_id for v in reviewable] + wip = api.get_editor_editgroups(eg.editor_id, limit=100) + assert eg.editgroup_id in [v.editgroup_id for v in wip] + + api.update_editgroup(eg.editgroup_id, eg2, submit=True) + eg3 = api.get_editgroup(eg.editgroup_id) + assert eg3.submitted + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id in [v.editgroup_id for v in reviewable] + + api.update_editgroup(eg.editgroup_id, eg2, submit=False) + eg3 = api.get_editgroup(eg.editgroup_id) + assert not eg3.submitted + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id not in [v.editgroup_id for v in reviewable] + + # put back in reviewable + api.update_editgroup(eg.editgroup_id, eg2, submit=True) + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id in [v.editgroup_id for v in reviewable] + + # shouldn't be reviewable if accepted + api.accept_editgroup(eg.editgroup_id) + reviewable = api.get_editgroups_reviewable(limit=100) + assert eg.editgroup_id not in [v.editgroup_id for v in reviewable] + eg3 = api.get_editgroup(eg.editgroup_id) + #print(eg3) + assert eg3.submitted + assert eg3.changelog_index + + with pytest.raises(fatcat_client.rest.ApiException): + api.update_editgroup(eg.editgroup_id, eg3, submit=True) + with pytest.raises(fatcat_client.rest.ApiException): + eg3.description = "something" + api.update_editgroup(eg.editgroup_id, eg3) + + +def test_editgroup_ordering(api): + + eg1 = quick_eg(api) + eg2 = quick_eg(api) + api.update_editgroup( + eg1.editgroup_id, + Editgroup(editgroup_id=eg1.editgroup_id, description="FAIL"), + submit=True) + api.update_editgroup( + eg2.editgroup_id, + Editgroup(editgroup_id=eg2.editgroup_id, description="FAIL"), + submit=True) + + r1 = api.get_editgroups_reviewable() + #print(r1) + assert not r1[0].description + assert not r1[1].description + assert r1[0].submitted >= r1[1].submitted + + # should be no editgroups "in the future" (since now + 1sec) + r1 = api.get_editgroups_reviewable(since=(datetime.datetime.utcnow() + datetime.timedelta(seconds=1)).isoformat()+"Z") + assert not r1 + + r1 = api.get_editgroups_reviewable(since=(datetime.datetime.utcnow() - datetime.timedelta(seconds=5)).isoformat()+"Z") + assert r1[0].submitted <= r1[1].submitted + + +def test_editgroup_autoaccept(api): + # autoaccept changes: editgroups required when, in what combination + + eg = quick_eg(api) + c1 = CreatorEntity(display_name="test autoaccept") + c2 = CreatorEntity(display_name="test another autoaccept") + + with pytest.raises(fatcat_client.rest.ApiException): + edits = api.create_creator_batch([c1, c2]) + + with pytest.raises(fatcat_client.rest.ApiException): + edits = api.create_creator_batch([c1, c2], editgroup_id=eg.editgroup_id, autoaccept=True) + + edits1 = api.create_creator_batch([c1, c2], editgroup_id=eg.editgroup_id) + edits2 = api.create_creator_batch([c1, c2], autoaccept=True) + + assert edits1[0].editgroup_id == eg.editgroup_id + assert edits1[0].editgroup_id != edits2[1].editgroup_id + eg1 = api.get_editgroup(edits1[0].editgroup_id) + eg2 = api.get_editgroup(edits2[0].editgroup_id) + + assert not eg1.changelog_index + assert eg2.changelog_index + #print(edits1) + #print(eg1.edits.creators) + assert eg1.edits.creators[0].ident in [t.ident for t in edits1] + assert eg2.edits.creators[0].ident in [t.ident for t in edits2] + + +def test_batch_params(api): + + eg = quick_eg(api) + c1 = CreatorEntity(display_name="test autoaccept") + c2 = CreatorEntity(display_name="test another autoaccept") + + with pytest.raises(fatcat_client.rest.ApiException): + edits = api.create_creator_batch([c1, c2]) + + desc = "test description" + extra = dict(a=75, q="thing") + edits = api.create_creator_batch([c1, c2], autoaccept=True, description=desc, extra=json.dumps(extra)) + eg = api.get_editgroup(edits[0].editgroup_id) + + assert eg.description == desc + assert eg.extra == extra + + # currently must manually json dumps() extra field + with pytest.raises(fatcat_client.rest.ApiException): + api.create_creator_batch([c1, c2], autoaccept=True, description=desc, extra=extra) + + with pytest.raises(fatcat_client.rest.ApiException): + api.create_creator_batch([c1, c2], autoaccept=True, description=desc, extra="{") diff --git a/python/tests/api_files.py b/python/tests/api_files.py new file mode 100644 index 00000000..033538ef --- /dev/null +++ b/python/tests/api_files.py @@ -0,0 +1,52 @@ + +import json +import pytest +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_file(api): + + eg = quick_eg(api) + + # all the fields! + f1 = FileEntity( + size=89238, + md5="7ce6615b2a5904939576d9567bd5f68e", + sha1="027e7ed3ea1a40e92dd2657a1e3c992b5dc45dd2", + sha256="f1f4f18a904e76818863ccbc6141fce92b0dcb47b0d6041aec98bc6806e393c3", + mimetype="application/pdf", + extra=dict(a=2, b=5), + urls=[ + FileEntityUrls(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"), + ], + release_ids=[], + ) + + f1edit = api.create_file(f1, editgroup_id=eg.editgroup_id) + api.accept_editgroup(eg.editgroup_id) + f2 = api.get_file(f1edit.ident) + + # check that fields match + assert f1.size == f2.size + assert f1.md5 == f2.md5 + assert f1.sha1 == f2.sha1 + assert f1.sha256 == f2.sha256 + assert f1.mimetype == f2.mimetype + assert f1.extra == f2.extra + assert f1.urls == f2.urls + assert f1.release_ids == f2.release_ids + + # expansion + # TODO: via release + # lookup + # TODO: via hashes; but need to generate random? + +def test_file_examples(api): + + api.lookup_file(sha256='ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362') + + f1 = api.get_file('aaaaaaaaaaaaamztaaaaaaaaam') diff --git a/python/tests/api_filesets.py b/python/tests/api_filesets.py new file mode 100644 index 00000000..966b85ca --- /dev/null +++ b/python/tests/api_filesets.py @@ -0,0 +1,79 @@ + +import json +import pytest +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_fileset(api): + + eg = quick_eg(api) + r1 = ReleaseEntity(title="test fileset release") + r1edit = api.create_release(r1, editgroup_id=eg.editgroup_id) + + fs1 = FilesetEntity( + manifest = [ + FilesetEntityManifest( + path="data/thing.tar.gz", + size=54321, + md5="540da3ea6e448d8dfb057c05225f853a", + sha1="1dab6a0e110f9b5d70b18db0abf051f7f93faf06", + sha256="c7b49f3e84cd1b7cb0b0e3e9f632b7be7e21b4dc229df23331f880a8a7dfa75a", + extra={"a": 1, "b": 3}, + ), + FilesetEntityManifest( + path="README.md", + size=54210, + md5="5f83592b5249671719bbed6ce91ecfa8", + sha1="455face3598611458efe1f072e58624790a67266", + sha256="429bcafa4d3d0072d5b2511e12c85c1aac1d304011d1c406da14707f7b9cd905", + extra={"x": 1, "y": "q"}, + ), + ], + urls = [ + FileEntityUrls(url="https://archive.org/download/fileset-123/", rel="repository"), + FileEntityUrls(url="https://humble-host.com/~user123/dataset/", rel="web"), + ], + release_ids = [r1edit.ident], + ) + + fs1edit = api.create_fileset(fs1, editgroup_id=eg.editgroup_id) + api.accept_editgroup(eg.editgroup_id) + fs2 = api.get_fileset(fs1edit.ident) + + # check that fields match + assert fs1.urls == fs2.urls + assert fs1.manifest == fs2.manifest + assert fs1.release_ids == fs2.release_ids + + # expansion + r1 = api.get_release(r1edit.ident, expand="filesets") + assert r1.filesets[0].manifest == fs1.manifest + +def test_fileset_examples(api): + fs3 = api.get_fileset('aaaaaaaaaaaaaztgaaaaaaaaam') + + assert fs3.urls[0].url == 'http://other-personal-blog.name/dataset/' + assert fs3.urls[1].rel == 'archive' + assert fs3.manifest[1].md5 == 'f4de91152c7ab9fdc2a128f962faebff' + assert fs3.manifest[1].extra['mimetype'] == 'application/gzip' + +def test_bad_fileset(api): + + eg = quick_eg(api) + + bad_list = [ + # good (for testing test itself) + #FilesetEntity(manifest=[FilesetEntityManifest(path="123.jpg", size=1234)]), + #FilesetEntity(urls=[FileEntityUrls(url="thing", rel="blah")]), + FilesetEntity(manifest=[FilesetEntityManifest(path="123.jpg", size="big")]), + FilesetEntity(release_ids=["asdf"]), + ] + + for b in bad_list: + with pytest.raises(fatcat_client.rest.ApiException): + api.create_fileset(b, editgroup_id=eg.editgroup_id) + diff --git a/python/tests/api_misc.py b/python/tests/api_misc.py index 3510ea82..0a0f16da 100644 --- a/python/tests/api_misc.py +++ b/python/tests/api_misc.py @@ -8,14 +8,6 @@ from fatcat_client.rest import ApiException from fixtures import * -def test_lookups(api): - - api.lookup_creator(orcid='0000-0003-3118-6859') - api.lookup_container(issnl='1549-1277') - api.lookup_file(sha256='ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362') - api.lookup_release(pmid='54321') - api.lookup_release(isbn13='978-3-16-148410-0') - def test_lookup_hide_extend(api): r = api.lookup_release(doi='10.1371/journal.pmed.0020124') diff --git a/python/tests/api_releases.py b/python/tests/api_releases.py new file mode 100644 index 00000000..ed6f24a4 --- /dev/null +++ b/python/tests/api_releases.py @@ -0,0 +1,103 @@ + +import json +import pytest +import datetime +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_release(api): + + eg = quick_eg(api) + + # all the fields! + r1 = ReleaseEntity( + title="some title", + original_title="оригинальное название", + release_type="post-weblog", + release_status="pre-print", + release_date=datetime.datetime.utcnow().date(), + release_year=2015, + doi="10.5555/12345678", + pmid="12345", + pmcid="PMC4321", + wikidata_qid="Q1234", + isbn13="978-3-16-148410-0", + core_id="187348", + arxiv_id="aslkdjfh", + jstor_id="8328424", + volume="84", + issue="XII", + pages="4-99", + publisher="some publisher", + language="en", + license_slug="CC-0", + extra=dict(a=1, b=2), + contribs=[], + refs=[], + abstracts=[ + ReleaseEntityAbstracts( + content="this is some abstract", + mimetype="text/plain", + lang="en"), + ReleaseEntityAbstracts( + content="this is some other abstract", + mimetype="text/plain", + lang="de"), + ], + ) + + r1edit = api.create_release(r1, editgroup_id=eg.editgroup_id) + api.accept_editgroup(eg.editgroup_id) + r2 = api.get_release(r1edit.ident) + + # check that fields match + assert r1.title == r2.title + assert r1.original_title == r2.original_title + assert r1.release_type == r2.release_type + assert r1.release_date == r2.release_date + assert r1.release_year == r2.release_year + assert r1.doi == r2.doi + assert r1.pmid == r2.pmid + assert r1.pmcid == r2.pmcid + assert r1.wikidata_qid == r2.wikidata_qid + assert r1.isbn13 == r2.isbn13 + assert r1.core_id == r2.core_id + assert r1.arxiv_id == r2.arxiv_id + assert r1.jstor_id == r2.jstor_id + assert r1.volume == r2.volume + assert r1.issue == r2.issue + assert r1.pages == r2.pages + assert r1.publisher == r2.publisher + assert r1.language == r2.language + assert r1.license_slug == r2.license_slug + assert r1.extra == r2.extra + + for i in range(len(r1.abstracts)): + r1.abstracts[i].content == r2.abstracts[i].content + r1.abstracts[i].mimetype == r2.abstracts[i].mimetype + r1.abstracts[i].lang == r2.abstracts[i].lang + for i in range(len(r1.contribs)): + r1.contribs[i] == r2.contribs[i] + for i in range(len(r1.refs)): + r1.refs[i] == r2.refs[i] + + # expansion + # TODO: via work + # lookup + # TODO: via all; but need to generate random identifiers + +def test_release_examples(api): + + api.lookup_release(pmid='54321') + api.lookup_release(isbn13='978-3-16-148410-0') + + r1 = api.get_release('aaaaaaaaaaaaarceaaaaaaaaai') + assert r1.title == "bigger example" + assert len(r1.refs) == 5 + assert r1.contribs[0].role == "editor" + assert r1.abstracts[0].mimetype == "application/xml+jats" + diff --git a/python/tests/api_webcaptures.py b/python/tests/api_webcaptures.py new file mode 100644 index 00000000..dc1754b3 --- /dev/null +++ b/python/tests/api_webcaptures.py @@ -0,0 +1,96 @@ + +import json +import pytest +import datetime +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_webcapture(api): + + eg = quick_eg(api) + r1 = ReleaseEntity(title="test webcapture release") + r1edit = api.create_release(r1, editgroup_id=eg.editgroup_id) + + wc1 = WebcaptureEntity( + original_url = "http://example.site", + #timestamp = "2012-01-02T03:04:05Z", + timestamp = datetime.datetime.now(datetime.timezone.utc), + cdx = [ + WebcaptureEntityCdx( + surt="site,example,)/data/thing.tar.gz", + #timestamp="2012-01-02T03:04:05Z", + timestamp=datetime.datetime.now(datetime.timezone.utc), + url="http://example.site/data/thing.tar.gz", + mimetype="application/gzip", + status_code=200, + sha1="455face3598611458efe1f072e58624790a67266", + sha256="c7b49f3e84cd1b7cb0b0e3e9f632b7be7e21b4dc229df23331f880a8a7dfa75a", + ), + WebcaptureEntityCdx( + surt="site,example,)/README.md", + #timestamp="2012-01-02T03:04:05Z", + timestamp=datetime.datetime.now(datetime.timezone.utc), + url="http://example.site/README.md", + mimetype="text/markdown", + status_code=200, + sha1="455face3598611458efe1f072e58624790a67266", + sha256="429bcafa4d3d0072d5b2511e12c85c1aac1d304011d1c406da14707f7b9cd905", + ), + ], + archive_urls = [ + FileEntityUrls(rel="wayback", url="https://web.archive.org/web/"), + ], + release_ids = [r1edit.ident], + ) + + wc1edit = api.create_webcapture(wc1, editgroup_id=eg.editgroup_id) + api.accept_editgroup(eg.editgroup_id) + wc2 = api.get_webcapture(wc1edit.ident) + + # check that fields match + # I don't know why these aren't equal... + #print(wc1.archive_urls) + #print(wc2.archive_urls) + #assert wc1.archive_urls == wc2.archive_urls + assert wc1.archive_urls[0].rel == wc2.archive_urls[0].rel + assert wc1.archive_urls[0].url == wc2.archive_urls[0].url + assert wc1.cdx == wc2.cdx + assert wc1.release_ids == wc2.release_ids + assert wc1.timestamp == wc2.timestamp + assert wc1.original_url == wc2.original_url + + # TODO: check release expansion + r1 = api.get_release(r1edit.ident, expand="webcaptures") + print(r1) + assert r1.webcaptures[0].cdx == wc1.cdx + +def test_webcapture_examples(api): + wc3 = api.get_webcapture('aaaaaaaaaaaaa53xaaaaaaaaam') + + assert wc3.cdx[0].surt == 'org,asheesh)/' + assert wc3.cdx[1].sha1 == 'a637f1d27d9bcb237310ed29f19c07e1c8cf0aa5' + assert wc3.archive_urls[1].rel == 'warc' + + +def test_bad_webcapture(api): + + eg = quick_eg(api) + + bad_list = [ + # good (for testing test itself) + WebcaptureEntity(cdx=[ + WebcaptureEntityCdx( + surt="site,example,)/123.jpg", + url="http://example.site/123.jpg", + sha1="455face3598611458efe1f072e58624790a67266", + timestamp=201506071122)]), + ] + + for b in bad_list: + with pytest.raises(fatcat_client.rest.ApiException): + api.create_webcapture(b, editgroup_id=eg.editgroup_id) + diff --git a/python/tests/citation_efficiency.py b/python/tests/citation_efficiency.py new file mode 100644 index 00000000..fe5006cc --- /dev/null +++ b/python/tests/citation_efficiency.py @@ -0,0 +1,113 @@ + +import json +import pytest +from copy import copy + +from fatcat_client import * +from fatcat_client.rest import ApiException +from fixtures import * + + +def test_citation_indexing(api): + # indexing is consistent and reacts to change + + eg = quick_eg(api) + r1 = ReleaseEntity(title="the target") + r1.refs = [ + ReleaseRef(key="first", title="the first title"), + ReleaseRef(key="second", title="the second title"), + ReleaseRef(key="third", title="a third title"), + ] + r1 = api.get_release(api.create_release(r1, editgroup_id=eg.editgroup_id).ident) + api.accept_editgroup(eg.editgroup_id) + + assert r1.refs[0].index == 0 + assert r1.refs[0].key == "first" + assert r1.refs[1].index == 1 + assert r1.refs[1].key == "second" + assert r1.refs[2].index == 2 + assert r1.refs[2].key == "third" + + r1.refs.pop(1) + eg = quick_eg(api) + api.update_release(r1.ident, r1, editgroup_id=eg.editgroup_id) + api.accept_editgroup(eg.editgroup_id) + r1 = api.get_release(r1.ident) + + assert r1.refs[0].index == 0 + assert r1.refs[0].key == "first" + assert r1.refs[1].index == 1 + assert r1.refs[1].key == "third" + +def test_citation_targets(api): + # invariant to linking citations + # also, updates work + + eg = quick_eg(api) + r1 = ReleaseEntity(title="the target") + r1 = api.get_release(api.create_release(r1, editgroup_id=eg.editgroup_id).ident) + r2 = ReleaseEntity(title="the citer") + r2.refs = [ + ReleaseRef(key="first", title="something else"), + ReleaseRef(key="second", title="the target title"), + ] + r2 = api.get_release(api.create_release(r2, editgroup_id=eg.editgroup_id).ident) + api.accept_editgroup(eg.editgroup_id) + + eg = quick_eg(api) + r2.refs[1].target_release_id = r1.ident + api.update_release(r2.ident, r2, editgroup_id=eg.editgroup_id) + api.accept_editgroup(eg.editgroup_id) + r2 = api.get_release(r2.ident) + assert r2.refs[0].key == "first" + assert r2.refs[1].key == "second" + assert r2.refs[0].index == 0 # TODO: one-indexing? + assert r2.refs[1].index == 1 + assert r2.refs[0].target_release_id == None + assert r2.refs[1].target_release_id == r1.ident + assert len(r2.refs) == 2 + +def test_citation_empty_array(api): + # distinction between empty array (no citations) and no array (hidden) + + r1 = ReleaseEntity(title="citation null") + r2 = ReleaseEntity(title="citation empty array") + r1.refs = None + r2.refs = [] + + eg = quick_eg(api) + r1 = api.get_release(api.create_release(r1, editgroup_id=eg.editgroup_id).ident) + r2 = api.get_release(api.create_release(r2, editgroup_id=eg.editgroup_id).ident) + api.accept_editgroup(eg.editgroup_id) + + print(r1.refs) + print(r2.refs) + assert r1.refs == [] + assert r1.refs == r2.refs + + r1b = api.get_release(r1.ident, hide="refs") + assert r1b.refs == None + +def test_citation_encoding(api): + # escape-only changes (eg, \u1234 whatever for ASCII) + + r1 = ReleaseEntity(title="citation encoding") + title = "title-unicode \\u0050 \\\" " + container = "container-unicode ☃︎ ä ö ü スティー" + extra = extra={'a': 1, 'b': 2, 'ö': 3} + locator = "p123" + r1.refs = [ + ReleaseRef(key="1", year=1923, title=title, container_name=container, + extra=extra, locator=locator), + ReleaseRef(key="2"), + ] + + eg = quick_eg(api) + r1 = api.get_release(api.create_release(r1, editgroup_id=eg.editgroup_id).ident) + api.accept_editgroup(eg.editgroup_id) + + assert title == r1.refs[0].title + assert container == r1.refs[0].container_name + assert extra == r1.refs[0].extra + assert locator == r1.refs[0].locator + diff --git a/python/tests/cli.sh b/python/tests/cli.sh index eba6d3a7..19d8a85b 100755 --- a/python/tests/cli.sh +++ b/python/tests/cli.sh @@ -14,7 +14,7 @@ set -x ./fatcat_import.py crossref tests/files/crossref-works.2018-01-21.badsample.json tests/files/ISSN-to-ISSN-L.snip.txt ./fatcat_import.py orcid tests/files/0000-0001-8254-7103.json -./fatcat_import.py issn tests/files/journal_extra_metadata.snip.csv +./fatcat_import.py journal-metadata tests/files/journal_extra_metadata.snip.csv ./fatcat_import.py matched tests/files/matched_sample.json ./fatcat_import.py matched tests/files/example_matched.json ./fatcat_import.py grobid-metadata tests/files/example_grobid_metadata_lines.tsv diff --git a/python/tests/files/crossref-works.single.json b/python/tests/files/crossref-works.single.json index 2af2b358..e3d2e05c 100644 --- a/python/tests/files/crossref-works.single.json +++ b/python/tests/files/crossref-works.single.json @@ -84,7 +84,7 @@ { "given": "Carlos G.", "family": "Diaz", - "affiliation": ["Some University"] + "affiliation": [{"name": "Some University"}, {"name": "Some Department"}] }, { "given": "Francisco M.", diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index 6a880c48..3cc275b3 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -28,6 +28,7 @@ def api(): conf.api_key["Authorization"] = os.getenv("FATCAT_API_AUTH_TOKEN") conf.api_key_prefix["Authorization"] = "Bearer" api_client = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) + api_client.editor_id = "aaaaaaaaaaaabkvkaaaaaaaaae" return api_client def test_get_changelog_entry(api): @@ -38,33 +39,6 @@ def test_get_changelog_entry(api): ## Helpers ################################################################## def quick_eg(api_inst): - eg = api_inst.create_editgroup( - fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) + eg = api_inst.create_editgroup(fatcat_client.Editgroup()) return eg -# TODO: what are these even here for? -def check_entity_fields(e): - for key in ('rev', 'is_live', 'redirect_id'): - assert key in e - for key in ('id',): - assert e[key] is not None - -def check_release(e): - for key in ('work', 'release_type'): - assert key in e - for key in ('title', ): - assert e[key] is not None - for key in ('refs', 'creators'): - assert type(e[key]) == list - -def check_creator(e): - for key in ('name',): - assert e[key] is not None - -def check_container(e): - for key in ('name',): - assert e[key] is not None - -def check_file(e): - for key in ('size', 'sha1'): - assert e[key] is not None diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index e2ca6122..193f78f6 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -1,35 +1,51 @@ import json import pytest -from fatcat_tools.importers import CrossrefImporter +from fatcat_tools.importers import CrossrefImporter, JsonLinePusher from fixtures import api @pytest.fixture(scope="function") def crossref_importer(api): with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=False) + yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True) @pytest.fixture(scope="function") def crossref_importer_existing(api): with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=True) + yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False) def test_crossref_importer_batch(crossref_importer): with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_batch(f) + JsonLinePusher(crossref_importer, f).run() def test_crossref_importer(crossref_importer): + last_index = crossref_importer.api.get_changelog(limit=1)[0].index with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_source(f) + crossref_importer.bezerk_mode = True + counts = JsonLinePusher(crossref_importer, f).run() + assert counts['insert'] == 14 + assert counts['exists'] == 0 + assert counts['skip'] == 0 + # fetch most recent editgroup - changes = crossref_importer.api.get_changelog(limit=1) - eg = changes[0].editgroup + change = crossref_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup assert eg.description assert "crossref" in eg.description.lower() assert eg.extra['git_rev'] assert "fatcat_tools.CrossrefImporter" in eg.extra['agent'] + last_index = crossref_importer.api.get_changelog(limit=1)[0].index + with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + crossref_importer.bezerk_mode = False + crossref_importer.reset() + counts = JsonLinePusher(crossref_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 14 + assert counts['skip'] == 0 + assert last_index == crossref_importer.api.get_changelog(limit=1)[0].index + def test_crossref_mappings(crossref_importer): assert crossref_importer.map_release_type('journal-article') == "article-journal" assert crossref_importer.map_release_type('asdf') is None @@ -39,13 +55,13 @@ def test_crossref_mappings(crossref_importer): def test_crossref_importer_create(crossref_importer): crossref_importer.create_containers = True with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_source(f) + JsonLinePusher(crossref_importer, f).run() def test_crossref_dict_parse(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: # not a single line raw = json.loads(f.read()) - (r, c) = crossref_importer.parse_crossref_dict(raw) + r = crossref_importer.parse_record(raw) extra = r.extra['crossref'] assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" assert r.doi == "10.1002/(sici)1097-461x(1998)66:4<261::aid-qua1>3.0.co;2-t" @@ -61,7 +77,8 @@ def test_crossref_dict_parse(crossref_importer): assert len(r.contribs) == 5 assert r.contribs[0].raw_name == "Marcelo D. Radicioni" assert r.contribs[0].index == 0 - assert r.contribs[1].extra['affiliations'] == ["Some University"] + assert r.contribs[1].raw_affiliation == "Some University" + assert r.contribs[1].extra['more_affiliations'] == ["Some Department"] assert r.contribs[1].role == "author" assert r.contribs[3].role == "editor" assert r.contribs[3].index is None @@ -78,8 +95,10 @@ def test_crossref_dict_parse(crossref_importer): def test_stateful_checking(crossref_importer_existing): with open('tests/files/crossref-works.single.json', 'r') as f: # not a single line, a whole document - raw = json.loads(f.read()) + raw = f.read() # might not exist yet... - crossref_importer_existing.process_source([json.dumps(raw)]) - # ok, make sure we get 'None' back - assert crossref_importer_existing.parse_crossref_dict(raw) is None + crossref_importer_existing.push_record(json.loads(raw)) + crossref_importer_existing.finish() + # make sure we wouldn't insert again + entity = crossref_importer_existing.parse_record(json.loads(raw)) + assert crossref_importer_existing.try_update(entity) is False diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py index 97ebcaef..4fed4aaa 100644 --- a/python/tests/import_grobid_metadata.py +++ b/python/tests/import_grobid_metadata.py @@ -3,7 +3,7 @@ import os import json import base64 import pytest -from fatcat_tools.importers import GrobidMetadataImporter +from fatcat_tools.importers import GrobidMetadataImporter, LinePusher from fixtures import api """ @@ -15,10 +15,6 @@ side-effects. Should probably be disabled or re-written. def grobid_metadata_importer(api): yield GrobidMetadataImporter(api) -# TODO: use API to check that entities actually created... -#def test_grobid_metadata_importer_batch(grobid_metadata_importer): -# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: -# grobid_metadata_importer.process_batch(f) def test_grobid_metadata_parse(grobid_metadata_importer): with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: @@ -30,7 +26,8 @@ def test_grobid_metadata_parse(grobid_metadata_importer): print(re.contribs) assert re.contribs[0].raw_name == "Wahyu Ary" assert re.publisher == None - assert re.extra.get('container_name') == None + if re.extra: + assert re.extra.get('container_name') == None assert len(re.refs) == 27 def test_file_metadata_parse(grobid_metadata_importer): @@ -53,13 +50,28 @@ def test_file_metadata_parse(grobid_metadata_importer): assert len(fe.release_ids) == 0 def test_grobid_metadata_importer(grobid_metadata_importer): + last_index = grobid_metadata_importer.api.get_changelog(limit=1)[0].index with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - grobid_metadata_importer.process_source(f) + grobid_metadata_importer.bezerk_mode = True + counts = LinePusher(grobid_metadata_importer, f).run() + assert counts['insert'] == 10 + assert counts['inserted.release'] == 10 + assert counts['exists'] == 0 + assert counts['skip'] == 0 # fetch most recent editgroup - changes = grobid_metadata_importer.api.get_changelog(limit=1) - eg = changes[0].editgroup + change = grobid_metadata_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup assert eg.description assert "grobid" in eg.description.lower() assert eg.extra['git_rev'] assert "fatcat_tools.GrobidMetadataImporter" in eg.extra['agent'] + + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + grobid_metadata_importer.reset() + grobid_metadata_importer.bezerk_mode = False + counts = LinePusher(grobid_metadata_importer, f).run() + assert counts['insert'] == 0 + assert counts['inserted.release'] == 0 + assert counts['exists'] == 10 + assert counts['skip'] == 0 diff --git a/python/tests/import_issn.py b/python/tests/import_issn.py deleted file mode 100644 index 6b5978d9..00000000 --- a/python/tests/import_issn.py +++ /dev/null @@ -1,26 +0,0 @@ - -import pytest -from fatcat_tools.importers import IssnImporter -from fixtures import api - - -@pytest.fixture(scope="function") -def issn_importer(api): - yield IssnImporter(api) - -# TODO: use API to check that entities actually created... -def test_issn_importer_batch(issn_importer): - with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: - issn_importer.process_csv_batch(f) - -def test_issn_importer(issn_importer): - with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: - issn_importer.process_csv_source(f) - - # fetch most recent editgroup - changes = issn_importer.api.get_changelog(limit=1) - eg = changes[0].editgroup - assert eg.description - assert "container" in eg.description.lower() - assert eg.extra['git_rev'] - assert "fatcat_tools.IssnImporter" in eg.extra['agent'] diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py new file mode 100644 index 00000000..a2b10a65 --- /dev/null +++ b/python/tests/import_journal_metadata.py @@ -0,0 +1,39 @@ + +import pytest +from fatcat_tools.importers import JournalMetadataImporter, CsvPusher +from fixtures import api + + +@pytest.fixture(scope="function") +def journal_metadata_importer(api): + yield JournalMetadataImporter(api) + +# TODO: use API to check that entities actually created... +def test_journal_metadata_importer_batch(journal_metadata_importer): + with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: + CsvPusher(journal_metadata_importer, f).run() + +def test_journal_metadata_importer(journal_metadata_importer): + last_index = journal_metadata_importer.api.get_changelog(limit=1)[0].index + with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: + journal_metadata_importer.bezerk_mode = True + counts = CsvPusher(journal_metadata_importer, f).run() + assert counts['insert'] == 9 + assert counts['exists'] == 0 + assert counts['skip'] == 0 + + # fetch most recent editgroup + change = journal_metadata_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup + assert eg.description + assert "container" in eg.description.lower() + assert eg.extra['git_rev'] + assert "fatcat_tools.JournalMetadataImporter" in eg.extra['agent'] + + with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: + journal_metadata_importer.reset() + journal_metadata_importer.bezerk_mode = False + counts = CsvPusher(journal_metadata_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 9 + assert counts['skip'] == 0 diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py index 080674ac..8f694456 100644 --- a/python/tests/import_matched.py +++ b/python/tests/import_matched.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.importers import MatchedImporter +from fatcat_tools.importers import MatchedImporter, JsonLinePusher from fixtures import api @@ -10,26 +10,40 @@ def matched_importer(api): yield MatchedImporter(api) # TODO: use API to check that entities actually created... -def test_matched_importer_batch(matched_importer): +def test_matched_importer(matched_importer): with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_batch(f) + JsonLinePusher(matched_importer, f).run() def test_matched_importer(matched_importer): + last_index = matched_importer.api.get_changelog(limit=1)[0].index with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_source(f) + matched_importer.bezerk_mode = True + counts = JsonLinePusher(matched_importer, f).run() + assert counts['insert'] == 2 + assert counts['exists'] == 0 + assert counts['skip'] == 11 # fetch most recent editgroup - changes = matched_importer.api.get_changelog(limit=1) - eg = changes[0].editgroup + change = matched_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup assert eg.description assert "file-to-release" in eg.description.lower() assert eg.extra['git_rev'] assert "fatcat_tools.MatchedImporter" in eg.extra['agent'] + # re-insert; should skip + with open('tests/files/example_matched.json', 'r') as f: + matched_importer.reset() + matched_importer.bezerk_mode = False + counts = JsonLinePusher(matched_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 2 + assert counts['skip'] == 11 + def test_matched_dict_parse(matched_importer): with open('tests/files/example_matched.json', 'r') as f: raw = json.loads(f.readline()) - f = matched_importer.parse_matched_dict(raw) + f = matched_importer.parse_record(raw) assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" assert f.mimetype == "application/pdf" diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py index 717a1328..57886b52 100644 --- a/python/tests/import_orcid.py +++ b/python/tests/import_orcid.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.importers import OrcidImporter +from fatcat_tools.importers import OrcidImporter, JsonLinePusher from fixtures import api @@ -9,37 +9,46 @@ from fixtures import api def orcid_importer(api): yield OrcidImporter(api) -# TODO: use API to check that entities actually created... -def test_orcid_importer_batch(orcid_importer): - with open('tests/files/0000-0001-8254-7103.json', 'r') as f: - orcid_importer.process_batch(f) - def test_orcid_importer_badid(orcid_importer): with open('tests/files/0000-0001-8254-710X.json', 'r') as f: - orcid_importer.process_batch(f) + JsonLinePusher(orcid_importer, f).run() +# TODO: use API to check that entities actually created... def test_orcid_importer(orcid_importer): + last_index = orcid_importer.api.get_changelog(limit=1)[0].index with open('tests/files/0000-0001-8254-7103.json', 'r') as f: - orcid_importer.process_source(f) + orcid_importer.bezerk_mode = True + counts = JsonLinePusher(orcid_importer, f).run() + assert counts['insert'] == 1 + assert counts['exists'] == 0 + assert counts['skip'] == 0 # fetch most recent editgroup - changes = orcid_importer.api.get_changelog(limit=1) - eg = changes[0].editgroup + change = orcid_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup assert eg.description assert "orcid" in eg.description.lower() assert eg.extra['git_rev'] assert "fatcat_tools.OrcidImporter" in eg.extra['agent'] + with open('tests/files/0000-0001-8254-7103.json', 'r') as f: + orcid_importer.reset() + orcid_importer.bezerk_mode = False + counts = JsonLinePusher(orcid_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 1 + assert counts['skip'] == 0 + def test_orcid_importer_x(orcid_importer): with open('tests/files/0000-0003-3953-765X.json', 'r') as f: - orcid_importer.process_source(f) + JsonLinePusher(orcid_importer, f).run() c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X") assert c is not None def test_orcid_dict_parse(orcid_importer): with open('tests/files/0000-0001-8254-7103.json', 'r') as f: raw = json.loads(f.readline()) - c = orcid_importer.parse_orcid_dict(raw) + c = orcid_importer.parse_record(raw) assert c.given_name == "Man-Hui" assert c.surname == "Li" assert c.display_name == "Man-Hui Li" diff --git a/python/tests/importer.py b/python/tests/importer.py index 34efa5d8..9308ba84 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -1,13 +1,13 @@ import pytest -from fatcat_tools.importers import FatcatImporter +from fatcat_tools.importers import CrossrefImporter, OrcidImporter from fixtures import api def test_issnl_mapping_lookup(api): with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - fi = FatcatImporter(api, issn_map_file=issn_file) + fi = CrossrefImporter(api, issn_map_file=issn_file) assert fi.issn2issnl('0000-0027') == '0002-0027' assert fi.issn2issnl('0002-0027') == '0002-0027' @@ -18,20 +18,18 @@ def test_issnl_mapping_lookup(api): def test_identifiers(api): with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - fi = FatcatImporter(api, issn_map_file=issn_file) - - assert fi.is_issnl("1234-5678") == True - assert fi.is_issnl("1234-5678.") == False - assert fi.is_issnl("12345678") == False - assert fi.is_issnl("1-2345678") == False - - assert fi.is_doi("10.1234/56789") == True - assert fi.is_doi("101234/56789") == False - assert fi.is_doi("10.1234_56789") == False - - assert fi.is_orcid("0000-0003-3118-6591") == True - assert fi.is_orcid("0000-0003-3953-765X") == True - assert fi.is_orcid("0000-00x3-3118-659") == False - assert fi.is_orcid("0000-00033118-659") == False - assert fi.is_orcid("0000-0003-3118-659.") == False + ci = CrossrefImporter(api, issn_map_file=issn_file) + + assert ci.is_issnl("1234-5678") == True + assert ci.is_issnl("1234-5678.") == False + assert ci.is_issnl("12345678") == False + assert ci.is_issnl("1-2345678") == False + + oi = OrcidImporter(api) + + assert oi.is_orcid("0000-0003-3118-6591") == True + assert oi.is_orcid("0000-0003-3953-765X") == True + assert oi.is_orcid("0000-00x3-3118-659") == False + assert oi.is_orcid("0000-00033118-659") == False + assert oi.is_orcid("0000-0003-3118-659.") == False diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py index e9d23250..6d6c6c82 100644 --- a/python/tests/transform_tests.py +++ b/python/tests/transform_tests.py @@ -11,7 +11,7 @@ def test_elasticsearch_convert(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: # not a single line raw = json.loads(f.read()) - (r, c) = crossref_importer.parse_crossref_dict(raw) + r = crossref_importer.parse_record(raw) r.state = 'active' release_to_elasticsearch(r) |