aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
commitcdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch)
tree5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python/tests
parent78f08280edea4ff65ca613ad30005c45cc48dea6 (diff)
downloadfatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz
fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip
fmt (black): tests/
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/api_annotations.py10
-rw-r--r--python/tests/api_containers.py5
-rw-r--r--python/tests/api_creators.py5
-rw-r--r--python/tests/api_editgroups.py35
-rw-r--r--python/tests/api_editor.py3
-rw-r--r--python/tests/api_entity_editing.py14
-rw-r--r--python/tests/api_entity_state.py34
-rw-r--r--python/tests/api_files.py13
-rw-r--r--python/tests/api_filesets.py29
-rw-r--r--python/tests/api_misc.py19
-rw-r--r--python/tests/api_releases.py69
-rw-r--r--python/tests/api_webcaptures.py126
-rw-r--r--python/tests/citation_efficiency.py20
-rw-r--r--python/tests/clean_files.py43
-rw-r--r--python/tests/fixtures.py75
-rw-r--r--python/tests/harvest_crossref.py12
-rw-r--r--python/tests/harvest_datacite.py15
-rw-r--r--python/tests/harvest_pubmed.py31
-rw-r--r--python/tests/harvest_state.py13
-rw-r--r--python/tests/import_arabesque.py41
-rw-r--r--python/tests/import_arxiv.py51
-rw-r--r--python/tests/import_common.py9
-rw-r--r--python/tests/import_crossref.py123
-rw-r--r--python/tests/import_datacite.py74
-rw-r--r--python/tests/import_dblp.py89
-rw-r--r--python/tests/import_doaj.py19
-rw-r--r--python/tests/import_file_generic.py98
-rw-r--r--python/tests/import_file_meta.py47
-rw-r--r--python/tests/import_fileset_generic.py37
-rw-r--r--python/tests/import_grobid_metadata.py52
-rw-r--r--python/tests/import_ingest.py93
-rw-r--r--python/tests/import_jalc.py77
-rw-r--r--python/tests/import_journal_metadata.py27
-rw-r--r--python/tests/import_jstor.py49
-rw-r--r--python/tests/import_matched.py30
-rw-r--r--python/tests/import_orcid.py33
-rw-r--r--python/tests/import_pubmed.py89
-rw-r--r--python/tests/import_shadow.py34
-rw-r--r--python/tests/importer.py14
-rw-r--r--python/tests/subentity_state.py76
-rw-r--r--python/tests/tools_api.py2
-rw-r--r--python/tests/transform_csl.py39
-rw-r--r--python/tests/transform_elasticsearch.py341
-rw-r--r--python/tests/transform_ingest.py18
-rw-r--r--python/tests/transform_toml.py7
-rw-r--r--python/tests/web_auth.py64
-rw-r--r--python/tests/web_citation_csl.py53
-rw-r--r--python/tests/web_coverage.py224
-rw-r--r--python/tests/web_editgroup.py59
-rw-r--r--python/tests/web_editing.py254
-rw-r--r--python/tests/web_editor.py33
-rw-r--r--python/tests/web_entity_views.py259
-rw-r--r--python/tests/web_refs.py21
-rw-r--r--python/tests/web_routes.py4
-rw-r--r--python/tests/web_search.py171
55 files changed, 1852 insertions, 1430 deletions
diff --git a/python/tests/api_annotations.py b/python/tests/api_annotations.py
index 79acaa4b..ac898fa2 100644
--- a/python/tests/api_annotations.py
+++ b/python/tests/api_annotations.py
@@ -1,4 +1,3 @@
-
from fatcat_openapi_client import *
from fixtures import *
@@ -14,14 +13,13 @@ def test_annotations(api):
# create an annotation!
api.create_editgroup_annotation(
eg.editgroup_id,
- EditgroupAnnotation(
- comment_markdown="some *annotation*",
- extra=dict(thing="thang")))
+ EditgroupAnnotation(comment_markdown="some *annotation*", extra=dict(thing="thang")),
+ )
# check that we can fetch it all sorts of ways
a = api.get_editgroup_annotations(eg.editgroup_id)
assert len(a) == 1
- assert a[0].extra['thing'] == "thang"
+ assert a[0].extra["thing"] == "thang"
# the editor persists, so this is a hack to find a "recent" one
a2 = api.get_editor_annotations(eg.editor_id, limit=100)
@@ -31,4 +29,4 @@ def test_annotations(api):
found = thing
break
assert found
- assert found.extra['thing'] == "thang"
+ assert found.extra["thing"] == "thang"
diff --git a/python/tests/api_containers.py b/python/tests/api_containers.py
index 0f6c85b1..d6fd421a 100644
--- a/python/tests/api_containers.py
+++ b/python/tests/api_containers.py
@@ -1,4 +1,3 @@
-
import pytest
from fatcat_openapi_client import *
from fixtures import *
@@ -63,6 +62,7 @@ def test_container(api):
c2 = api.get_container(c2.ident)
assert c2.state == "deleted"
+
def test_container_bad_idents(api):
# all the fields!
@@ -81,9 +81,10 @@ def test_container_bad_idents(api):
with pytest.raises(ValueError):
c1.issne = "asdf-hhhh"
+
def test_container_examples(api):
- c1 = api.get_container('aaaaaaaaaaaaaeiraaaaaaaaam')
+ c1 = api.get_container("aaaaaaaaaaaaaeiraaaaaaaaam")
assert c1.name == "PLOS Medicine"
assert c1.issnl == "1549-1277"
assert c1.issne == "1549-1676"
diff --git a/python/tests/api_creators.py b/python/tests/api_creators.py
index b271e2b3..fe2bd91b 100644
--- a/python/tests/api_creators.py
+++ b/python/tests/api_creators.py
@@ -1,4 +1,3 @@
-
from fatcat_openapi_client import *
from fixtures import *
@@ -56,10 +55,12 @@ def test_creators(api):
c2 = api.get_creator(c2.ident)
assert c2.state == "deleted"
+
# TODO: test expansion of new creator/release pair (release get)?
+
def test_creators_examples(api):
# ident: aaaaaaaaaaaaaircaaaaaaaaam
- c1 = api.lookup_creator(orcid='0000-0003-3118-6859')
+ c1 = api.lookup_creator(orcid="0000-0003-3118-6859")
assert c1.ident == "aaaaaaaaaaaaaircaaaaaaaaam"
diff --git a/python/tests/api_editgroups.py b/python/tests/api_editgroups.py
index 9f870fb1..f77f2202 100644
--- a/python/tests/api_editgroups.py
+++ b/python/tests/api_editgroups.py
@@ -1,4 +1,3 @@
-
import datetime
import pytest
@@ -45,7 +44,7 @@ def test_editgroup_submit(api):
reviewable = api.get_editgroups_reviewable(limit=100)
assert eg.editgroup_id not in [v.editgroup_id for v in reviewable]
eg3 = api.get_editgroup(eg.editgroup_id)
- #print(eg3)
+ # print(eg3)
assert eg3.submitted
assert eg3.changelog_index
@@ -63,23 +62,29 @@ def test_editgroup_ordering(api):
api.update_editgroup(
eg1.editgroup_id,
Editgroup(editgroup_id=eg1.editgroup_id, description="FAIL"),
- submit=True)
+ submit=True,
+ )
api.update_editgroup(
eg2.editgroup_id,
Editgroup(editgroup_id=eg2.editgroup_id, description="FAIL"),
- submit=True)
+ submit=True,
+ )
r1 = api.get_editgroups_reviewable()
- #print(r1)
+ # print(r1)
assert not r1[0].description
assert not r1[1].description
assert r1[0].submitted >= r1[1].submitted
# should be no editgroups "in the future" (since now + 1sec)
- r1 = api.get_editgroups_reviewable(since=(datetime.datetime.utcnow() + datetime.timedelta(seconds=1)).isoformat()+"Z")
+ r1 = api.get_editgroups_reviewable(
+ since=(datetime.datetime.utcnow() + datetime.timedelta(seconds=1)).isoformat() + "Z"
+ )
assert not r1
- r1 = api.get_editgroups_reviewable(since=(datetime.datetime.utcnow() - datetime.timedelta(seconds=5)).isoformat()+"Z")
+ r1 = api.get_editgroups_reviewable(
+ since=(datetime.datetime.utcnow() - datetime.timedelta(seconds=5)).isoformat() + "Z"
+ )
assert r1[0].submitted <= r1[1].submitted
@@ -88,9 +93,9 @@ def test_editgroup_auto_batch(api):
c1 = CreatorEntity(display_name="test auto_batch")
c2 = CreatorEntity(display_name="test another auto_batch")
- eg1 = api.create_creator_auto_batch(CreatorAutoBatch(
- editgroup=Editgroup(),
- entity_list=[c1, c2]))
+ eg1 = api.create_creator_auto_batch(
+ CreatorAutoBatch(editgroup=Editgroup(), entity_list=[c1, c2])
+ )
assert eg1.changelog_index
assert len(eg1.edits.creators) == 2
@@ -103,11 +108,11 @@ def test_batch_params(api):
desc = "test description"
extra = dict(a=75, q="thing")
- eg1 = api.create_creator_auto_batch(CreatorAutoBatch(
- editgroup=Editgroup(
- description=desc,
- extra=extra),
- entity_list=[c1, c2]))
+ eg1 = api.create_creator_auto_batch(
+ CreatorAutoBatch(
+ editgroup=Editgroup(description=desc, extra=extra), entity_list=[c1, c2]
+ )
+ )
assert eg1.description == desc
assert eg1.extra == extra
diff --git a/python/tests/api_editor.py b/python/tests/api_editor.py
index 0c680a2f..ef70b42c 100644
--- a/python/tests/api_editor.py
+++ b/python/tests/api_editor.py
@@ -1,4 +1,3 @@
-
import fatcat_openapi_client
import pytest
from fixtures import api
@@ -18,11 +17,13 @@ def test_editor_update(api):
check = api.get_editor(editor_id)
assert check == orig
+
def test_editor_get(api):
editor_id = api.editor_id
api.get_editor(editor_id)
+
def test_editor_lookup(api):
editor_id = api.editor_id
diff --git a/python/tests/api_entity_editing.py b/python/tests/api_entity_editing.py
index 5f22e9f2..4e5b07d7 100644
--- a/python/tests/api_entity_editing.py
+++ b/python/tests/api_entity_editing.py
@@ -1,4 +1,3 @@
-
import pytest
from fatcat_openapi_client import *
from fixtures import *
@@ -53,7 +52,7 @@ def test_edit_after_accept(api):
except fatcat_openapi_client.rest.ApiException as e:
assert 400 <= e.status < 500
# TODO: need better message
- #assert "accepted" in e.body
+ # assert "accepted" in e.body
# cleanup
eg = quick_eg(api)
@@ -142,8 +141,8 @@ def test_wip_revision(api):
def test_edit_get_all(api_dummy_entities):
ade = api_dummy_entities
- api = ade['api']
- eg = api.get_editgroup(ade['editgroup'].editgroup_id)
+ api = ade["api"]
+ eg = api.get_editgroup(ade["editgroup"].editgroup_id)
assert api.get_creator_edit(eg.edits.creators[0].edit_id)
assert api.get_container_edit(eg.edits.containers[0].edit_id)
@@ -153,11 +152,12 @@ def test_edit_get_all(api_dummy_entities):
assert api.get_release_edit(eg.edits.releases[0].edit_id)
assert api.get_work_edit(eg.edits.works[0].edit_id)
+
def test_edit_delete_all(api_dummy_entities):
ade = api_dummy_entities
- api = ade['api']
- eg = api.get_editgroup(ade['editgroup'].editgroup_id)
+ api = ade["api"]
+ eg = api.get_editgroup(ade["editgroup"].editgroup_id)
api.delete_creator_edit(eg.editgroup_id, eg.edits.creators[0].edit_id)
api.delete_container_edit(eg.editgroup_id, eg.edits.containers[0].edit_id)
@@ -168,7 +168,7 @@ def test_edit_delete_all(api_dummy_entities):
api.delete_work_edit(eg.editgroup_id, eg.edits.works[0].edit_id)
# verify deletion
- eg = api.get_editgroup(ade['editgroup'].editgroup_id)
+ eg = api.get_editgroup(ade["editgroup"].editgroup_id)
assert len(eg.edits.creators) == 0
assert len(eg.edits.containers) == 0
assert len(eg.edits.files) == 0
diff --git a/python/tests/api_entity_state.py b/python/tests/api_entity_state.py
index 58e1d42b..954ea29a 100644
--- a/python/tests/api_entity_state.py
+++ b/python/tests/api_entity_state.py
@@ -1,4 +1,3 @@
-
import pytest
from fatcat_openapi_client import *
from fatcat_openapi_client.rest import ApiException
@@ -17,8 +16,8 @@ def test_redirect_entity(api):
offset = 0
while True:
offset += 1
- o1 = '0000-0000-1111-%04d' % offset
- o2 = '0000-0000-2222-%04d' % offset
+ o1 = "0000-0000-1111-%04d" % offset
+ o2 = "0000-0000-2222-%04d" % offset
try:
api.lookup_creator(orcid=o1)
continue
@@ -55,7 +54,9 @@ def test_redirect_entity(api):
assert redirs == []
api.accept_editgroup(eg.editgroup_id)
redirs = api.get_creator_redirects(c1.ident)
- assert redirs == [c2.ident, ]
+ assert redirs == [
+ c2.ident,
+ ]
# get both by ident
res = api.get_creator(c1.ident)
@@ -95,7 +96,9 @@ def test_redirect_entity(api):
assert res.display_name is None
assert res.revision is None
redirs = api.get_creator_redirects(c1.ident)
- assert redirs == [c2.ident, ]
+ assert redirs == [
+ c2.ident,
+ ]
# undelete first; check that second is a redirect
eg = quick_eg(api)
@@ -106,7 +109,9 @@ def test_redirect_entity(api):
assert res.state == "redirect"
assert res.display_name == "test one one"
redirs = api.get_creator_redirects(c1.ident)
- assert redirs == [c2.ident, ]
+ assert redirs == [
+ c2.ident,
+ ]
# split second entity back out
assert c2.revision
@@ -116,7 +121,9 @@ def test_redirect_entity(api):
# prev_revision should be none after an un-redirect
assert update_edit.prev_revision is None
redirs = api.get_creator_redirects(c1.ident)
- assert redirs == [c2.ident, ]
+ assert redirs == [
+ c2.ident,
+ ]
api.accept_editgroup(eg.editgroup_id)
res = api.get_creator(c2.ident)
assert res.state == "active"
@@ -138,7 +145,7 @@ def test_delete_entity(api):
offset = 0
while True:
offset += 1
- o1 = '0000-0000-1111-%04d' % offset
+ o1 = "0000-0000-1111-%04d" % offset
try:
api.lookup_creator(orcid=o1)
continue
@@ -177,10 +184,10 @@ def test_delete_entity(api):
try:
# can't re-delete an entity
api.delete_creator(eg.editgroup_id, c1.ident)
- #api.accept_editgroup(eg.editgroup_id)
+ # api.accept_editgroup(eg.editgroup_id)
assert False
except fatcat_openapi_client.rest.ApiException as e:
- assert 400 <= e.status < 500 # error is 4xx
+ assert 400 <= e.status < 500 # error is 4xx
# undelete
eg = quick_eg(api)
@@ -204,9 +211,9 @@ def test_recursive_redirects_entity(api):
offset = 0
while True:
offset += 1
- o1 = '0000-0000-1111-%04d' % offset
- o2 = '0000-0000-2222-%04d' % offset
- o3 = '0000-0000-3333-%04d' % offset
+ o1 = "0000-0000-1111-%04d" % offset
+ o2 = "0000-0000-2222-%04d" % offset
+ o3 = "0000-0000-3333-%04d" % offset
try:
api.lookup_creator(orcid=o1)
continue
@@ -449,6 +456,7 @@ def test_required_entity_fields(api):
assert 400 <= e.status < 500
assert "title" in e.body
+
def test_revert_current_status(api):
c1 = CreatorEntity(display_name="test updates")
diff --git a/python/tests/api_files.py b/python/tests/api_files.py
index 65eda993..443fe63f 100644
--- a/python/tests/api_files.py
+++ b/python/tests/api_files.py
@@ -1,4 +1,3 @@
-
from fatcat_openapi_client import *
from fixtures import *
@@ -15,7 +14,10 @@ def test_file(api):
sha256="f1f4f18a904e76818863ccbc6141fce92b0dcb47b0d6041aec98bc6806e393c3",
mimetype="application/pdf",
urls=[
- FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"),
+ FileUrl(
+ url="https://web.archive.org/web/12345542/something.com/blah.pdf",
+ rel="webarchive",
+ ),
],
release_ids=[],
extra=dict(a=2, b=5),
@@ -53,12 +55,15 @@ def test_file(api):
f2 = api.get_file(f2.ident)
assert f2.state == "deleted"
+
def test_file_examples(api):
- f1 = api.lookup_file(sha256='ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362')
+ f1 = api.lookup_file(
+ sha256="ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362"
+ )
assert f1.releases is None
- f1 = api.get_file('aaaaaaaaaaaaamztaaaaaaaaam', expand="releases")
+ f1 = api.get_file("aaaaaaaaaaaaamztaaaaaaaaam", expand="releases")
assert f1.sha256 == "ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362"
assert f1.releases[0].ident
assert f1.releases[0].abstracts is None
diff --git a/python/tests/api_filesets.py b/python/tests/api_filesets.py
index f351dc9f..1ec0df17 100644
--- a/python/tests/api_filesets.py
+++ b/python/tests/api_filesets.py
@@ -1,4 +1,3 @@
-
import pytest
from fatcat_openapi_client import *
from fixtures import *
@@ -11,7 +10,7 @@ def test_fileset(api):
r1edit = api.create_release(eg.editgroup_id, r1)
fs1 = FilesetEntity(
- manifest = [
+ manifest=[
FilesetFile(
path="data/thing.tar.gz",
size=54321,
@@ -30,11 +29,11 @@ def test_fileset(api):
extra={"x": 1, "y": "q"},
),
],
- urls = [
+ urls=[
FilesetUrl(url="https://archive.org/download/fileset-123/", rel="repository"),
FilesetUrl(url="https://humble-host.com/~user123/dataset/", rel="web"),
],
- release_ids = [r1edit.ident],
+ release_ids=[r1edit.ident],
extra=dict(t=4, u=9),
edit_extra=dict(test_key="filesets rule"),
)
@@ -59,7 +58,9 @@ def test_fileset(api):
# expansion
r1 = api.get_release(r1edit.ident, expand="filesets")
# XXX: manifest return order is *NOT* currently stable
- assert (r1.filesets[0].manifest == fs1.manifest) or (r1.filesets[0].manifest == list(reversed(fs1.manifest)))
+ assert (r1.filesets[0].manifest == fs1.manifest) or (
+ r1.filesets[0].manifest == list(reversed(fs1.manifest))
+ )
# get redirects (none)
assert api.get_fileset_redirects(fs2.ident) == []
@@ -71,28 +72,30 @@ def test_fileset(api):
fs2 = api.get_fileset(fs2.ident)
assert fs2.state == "deleted"
+
def test_fileset_examples(api):
- fs3 = api.get_fileset('aaaaaaaaaaaaaztgaaaaaaaaam')
+ fs3 = api.get_fileset("aaaaaaaaaaaaaztgaaaaaaaaam")
assert fs3.releases is None
- fs3 = api.get_fileset('aaaaaaaaaaaaaztgaaaaaaaaam', expand="releases")
+ fs3 = api.get_fileset("aaaaaaaaaaaaaztgaaaaaaaaam", expand="releases")
- assert fs3.urls[0].url == 'http://other-personal-blog.name/dataset/'
- assert fs3.urls[1].rel == 'archive'
- assert fs3.manifest[1].md5 == 'f4de91152c7ab9fdc2a128f962faebff'
- assert fs3.manifest[1].mimetype == 'application/gzip'
+ assert fs3.urls[0].url == "http://other-personal-blog.name/dataset/"
+ assert fs3.urls[1].rel == "archive"
+ assert fs3.manifest[1].md5 == "f4de91152c7ab9fdc2a128f962faebff"
+ assert fs3.manifest[1].mimetype == "application/gzip"
assert fs3.manifest[1].extra is not None
assert fs3.releases[0].ident
assert fs3.releases[0].abstracts is None
assert fs3.releases[0].refs is None
+
def test_bad_fileset(api):
eg = quick_eg(api)
bad_list = [
# good (for testing test itself)
- #FilesetEntity(manifest=[FilesetFile(path="123.jpg", size=1234)]),
- #FilesetEntity(urls=[FilesetUrl(url="thing", rel="blah")]),
+ # FilesetEntity(manifest=[FilesetFile(path="123.jpg", size=1234)]),
+ # FilesetEntity(urls=[FilesetUrl(url="thing", rel="blah")]),
FilesetEntity(manifest=[FilesetFile(path="123.jpg", size="big")]),
FilesetEntity(release_ids=["asdf"]),
]
diff --git a/python/tests/api_misc.py b/python/tests/api_misc.py
index 4c9ac9a6..7bb7023b 100644
--- a/python/tests/api_misc.py
+++ b/python/tests/api_misc.py
@@ -1,35 +1,42 @@
-
from fatcat_openapi_client import *
from fixtures import *
def test_lookup_hide_extend(api):
- r = api.lookup_release(doi='10.1371/journal.pmed.0020124')
+ r = api.lookup_release(doi="10.1371/journal.pmed.0020124")
assert len(r.refs) >= 2
assert r.files is None
assert r.container is None
assert len(r.container_id) > 10
assert r.abstracts == []
- r = api.lookup_release(doi='10.1371/journal.pmed.0020124', expand='files', hide='refs,abstracts')
+ r = api.lookup_release(
+ doi="10.1371/journal.pmed.0020124", expand="files", hide="refs,abstracts"
+ )
assert r.refs is None
assert len(r.files[0].sha1) == 40
assert r.container is None
assert r.abstracts is None
- r = api.lookup_release(doi='10.1371/journal.pmed.0020124', expand='container,abstracts')
+ r = api.lookup_release(doi="10.1371/journal.pmed.0020124", expand="container,abstracts")
assert len(r.refs) >= 2
assert r.files is None
assert r.container.issnl
assert r.abstracts == []
- f = api.lookup_file(sha256='ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362')
+ f = api.lookup_file(
+ sha256="ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362"
+ )
assert f.releases is None
- f = api.lookup_file(sha256='ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362', expand='releases')
+ f = api.lookup_file(
+ sha256="ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362",
+ expand="releases",
+ )
assert f.releases[0].ident == f.release_ids[0]
+
def test_unexpected_body(api):
eg = quick_eg(api)
diff --git a/python/tests/api_releases.py b/python/tests/api_releases.py
index a823d4cd..39c09270 100644
--- a/python/tests/api_releases.py
+++ b/python/tests/api_releases.py
@@ -1,4 +1,3 @@
-
import datetime
import pytest
@@ -44,26 +43,16 @@ def test_release(api):
language="en",
license_slug="CC-0",
contribs=[
- ReleaseContrib(
- given_name="Paul",
- surname="Otlet"),
- ReleaseContrib(
- raw_name="Cindy Sherman",
- given_name="Cindy",
- surname="Sherman"),
- ReleaseContrib(
- raw_name="Andy Warhol"),
+ ReleaseContrib(given_name="Paul", surname="Otlet"),
+ ReleaseContrib(raw_name="Cindy Sherman", given_name="Cindy", surname="Sherman"),
+ ReleaseContrib(raw_name="Andy Warhol"),
],
refs=[],
abstracts=[
+ ReleaseAbstract(content="this is some abstract", mimetype="text/plain", lang="en"),
ReleaseAbstract(
- content="this is some abstract",
- mimetype="text/plain",
- lang="en"),
- ReleaseAbstract(
- content="this is some other abstract",
- mimetype="text/plain",
- lang="de"),
+ content="this is some other abstract", mimetype="text/plain", lang="de"
+ ),
],
extra=dict(a=1, b=2),
edit_extra=dict(test_key="releases rule"),
@@ -138,33 +127,34 @@ def test_release(api):
r2 = api.get_release(r2.ident)
assert r2.state == "deleted"
+
def test_release_examples(api):
- api.lookup_release(pmid='54321')
- api.lookup_release(doi='10.123/abc')
- api.lookup_release(isbn13='978-3-16-148410-0')
- api.lookup_release(arxiv='1905.03769v1')
- api.lookup_release(jstor='1819117828')
- api.lookup_release(ark='ark:/13030/m53r5pzm')
- api.lookup_release(mag='992489213')
- api.lookup_release(hdl='20.500.23456/ABC/DUMMY')
- api.lookup_release(hdl='20.500.23456/abc/dummy')
+ api.lookup_release(pmid="54321")
+ api.lookup_release(doi="10.123/abc")
+ api.lookup_release(isbn13="978-3-16-148410-0")
+ api.lookup_release(arxiv="1905.03769v1")
+ api.lookup_release(jstor="1819117828")
+ api.lookup_release(ark="ark:/13030/m53r5pzm")
+ api.lookup_release(mag="992489213")
+ api.lookup_release(hdl="20.500.23456/ABC/DUMMY")
+ api.lookup_release(hdl="20.500.23456/abc/dummy")
# failed lookup exception type
try:
- api.lookup_release(pmid='5432100')
+ api.lookup_release(pmid="5432100")
except fatcat_openapi_client.rest.ApiException as ae:
assert ae.status == 404
assert "DatabaseRowNotFound" in ae.body
# failed lookup formatting
try:
- api.lookup_release(doi='blah')
+ api.lookup_release(doi="blah")
except fatcat_openapi_client.rest.ApiException as ae:
assert ae.status == 400
assert "MalformedExternalId" in ae.body
- r1 = api.get_release('aaaaaaaaaaaaarceaaaaaaaaai')
+ r1 = api.get_release("aaaaaaaaaaaaarceaaaaaaaaai")
assert r1.title.startswith("A bigger example")
assert len(r1.refs) == 5
assert r1.contribs[14].role == "editor"
@@ -174,6 +164,7 @@ def test_release_examples(api):
api.get_release_filesets(r1.ident)
api.get_release_webcaptures(r1.ident)
+
def test_empty_fields(api):
eg = quick_eg(api)
@@ -181,33 +172,43 @@ def test_empty_fields(api):
r1 = ReleaseEntity(
title="something",
contribs=[ReleaseContrib(raw_name="somebody")],
- ext_ids=ReleaseExtIds())
+ ext_ids=ReleaseExtIds(),
+ )
api.create_release(eg.editgroup_id, r1)
with pytest.raises(fatcat_openapi_client.rest.ApiException):
r2 = ReleaseEntity(title="", ext_ids=ReleaseExtIds())
api.create_release(eg.editgroup_id, r2)
with pytest.raises(fatcat_openapi_client.rest.ApiException):
- r2 = ReleaseEntity(title="something", contribs=[ReleaseContrib(raw_name="")], ext_ids=ReleaseExtIds())
+ r2 = ReleaseEntity(
+ title="something", contribs=[ReleaseContrib(raw_name="")], ext_ids=ReleaseExtIds()
+ )
api.create_release(eg.editgroup_id, r2)
+
def test_controlled_vocab(api):
eg = quick_eg(api)
- r1 = ReleaseEntity(title="something", release_type="journal-thingie", ext_ids=ReleaseExtIds())
+ r1 = ReleaseEntity(
+ title="something", release_type="journal-thingie", ext_ids=ReleaseExtIds()
+ )
with pytest.raises(fatcat_openapi_client.rest.ApiException):
api.create_release(eg.editgroup_id, r1)
r1.release_type = "article"
api.create_release(eg.editgroup_id, r1)
- r2 = ReleaseEntity(title="something else", release_stage="pre-print", ext_ids=ReleaseExtIds())
+ r2 = ReleaseEntity(
+ title="something else", release_stage="pre-print", ext_ids=ReleaseExtIds()
+ )
with pytest.raises(fatcat_openapi_client.rest.ApiException):
api.create_release(eg.editgroup_id, r2)
r2.release_stage = "published"
api.create_release(eg.editgroup_id, r2)
- r3 = ReleaseEntity(title="something else", withdrawn_status="boondogle", ext_ids=ReleaseExtIds())
+ r3 = ReleaseEntity(
+ title="something else", withdrawn_status="boondogle", ext_ids=ReleaseExtIds()
+ )
with pytest.raises(fatcat_openapi_client.rest.ApiException):
api.create_release(eg.editgroup_id, r3)
r3.withdrawn_status = "spam"
diff --git a/python/tests/api_webcaptures.py b/python/tests/api_webcaptures.py
index 510d398c..36bf40e9 100644
--- a/python/tests/api_webcaptures.py
+++ b/python/tests/api_webcaptures.py
@@ -1,4 +1,3 @@
-
import datetime
import pytest
@@ -13,13 +12,13 @@ def test_webcapture(api):
r1edit = api.create_release(eg.editgroup_id, r1)
wc1 = WebcaptureEntity(
- original_url = "http://example.site",
- #timestamp = "2012-01-02T03:04:05Z",
- timestamp = datetime.datetime.now(datetime.timezone.utc),
- cdx = [
+ original_url="http://example.site",
+ # timestamp = "2012-01-02T03:04:05Z",
+ timestamp=datetime.datetime.now(datetime.timezone.utc),
+ cdx=[
WebcaptureCdxLine(
surt="site,example,)/data/thing.tar.gz",
- #timestamp="2012-01-02T03:04:05Z",
+ # timestamp="2012-01-02T03:04:05Z",
timestamp=datetime.datetime.now(datetime.timezone.utc),
url="http://example.site/data/thing.tar.gz",
mimetype="application/gzip",
@@ -30,7 +29,7 @@ def test_webcapture(api):
),
WebcaptureCdxLine(
surt="site,example,)/README.md",
- #timestamp="2012-01-02T03:04:05Z",
+ # timestamp="2012-01-02T03:04:05Z",
timestamp=datetime.datetime.now(datetime.timezone.utc),
url="http://example.site/README.md",
mimetype="text/markdown",
@@ -40,10 +39,10 @@ def test_webcapture(api):
sha256="429bcafa4d3d0072d5b2511e12c85c1aac1d304011d1c406da14707f7b9cd905",
),
],
- archive_urls = [
+ archive_urls=[
FileUrl(rel="wayback", url="https://web.archive.org/web/"),
],
- release_ids = [r1edit.ident],
+ release_ids=[r1edit.ident],
extra=dict(c=1, b=2),
edit_extra=dict(test_key="webcaptures rule"),
)
@@ -61,9 +60,9 @@ def test_webcapture(api):
# check that fields match
# I don't know why these aren't equal...
- #print(wc1.archive_urls)
- #print(wc2.archive_urls)
- #assert wc1.archive_urls == wc2.archive_urls
+ # print(wc1.archive_urls)
+ # print(wc2.archive_urls)
+ # assert wc1.archive_urls == wc2.archive_urls
assert wc1.archive_urls[0].rel == wc2.archive_urls[0].rel
assert wc1.archive_urls[0].url == wc2.archive_urls[0].url
assert wc1.cdx[0] == wc2.cdx[0]
@@ -89,14 +88,15 @@ def test_webcapture(api):
wc2 = api.get_webcapture(wc2.ident)
assert wc2.state == "deleted"
+
def test_webcapture_examples(api):
- wc3 = api.get_webcapture('aaaaaaaaaaaaa53xaaaaaaaaam')
+ wc3 = api.get_webcapture("aaaaaaaaaaaaa53xaaaaaaaaam")
assert wc3.releases is None
- wc3 = api.get_webcapture('aaaaaaaaaaaaa53xaaaaaaaaam', expand="releases")
+ wc3 = api.get_webcapture("aaaaaaaaaaaaa53xaaaaaaaaam", expand="releases")
- assert wc3.cdx[0].surt == 'org,asheesh)/'
- assert wc3.cdx[1].sha1 == 'a637f1d27d9bcb237310ed29f19c07e1c8cf0aa5'
- assert wc3.archive_urls[1].rel == 'warc'
+ assert wc3.cdx[0].surt == "org,asheesh)/"
+ assert wc3.cdx[1].sha1 == "a637f1d27d9bcb237310ed29f19c07e1c8cf0aa5"
+ assert wc3.archive_urls[1].rel == "warc"
assert wc3.releases[0].ident
assert wc3.releases[0].abstracts is None
assert wc3.releases[0].refs is None
@@ -110,44 +110,60 @@ def test_bad_webcapture(api):
good = WebcaptureEntity(
original_url="http://example.site/123.jpg",
timestamp="2012-01-02T03:04:05Z",
- cdx=[WebcaptureCdxLine(
- surt="site,example,)/123.jpg",
- url="http://example.site/123.jpg",
- sha1="455face3598611458efe1f072e58624790a67266",
- timestamp="2012-01-02T03:04:05Z")])
+ cdx=[
+ WebcaptureCdxLine(
+ surt="site,example,)/123.jpg",
+ url="http://example.site/123.jpg",
+ sha1="455face3598611458efe1f072e58624790a67266",
+ timestamp="2012-01-02T03:04:05Z",
+ )
+ ],
+ )
bad_list = [
# uncomment to "test the test"
- #good,
+ # good,
# CDX timestamp format
WebcaptureEntity(
original_url="http://example.site/123.jpg",
timestamp="2012-01-02T03:04:05Z",
- cdx=[WebcaptureCdxLine(
- surt="site,example,)/123.jpg",
- url="http://example.site/123.jpg",
- sha1="455face3598611458efe1f072e58624790a67266",
- size=123,
- timestamp="20120102030405")]),
+ cdx=[
+ WebcaptureCdxLine(
+ surt="site,example,)/123.jpg",
+ url="http://example.site/123.jpg",
+ sha1="455face3598611458efe1f072e58624790a67266",
+ size=123,
+ timestamp="20120102030405",
+ )
+ ],
+ ),
# CDX timestamp format (int)
WebcaptureEntity(
original_url="http://example.site/123.jpg",
timestamp="2012-01-02T03:04:05Z",
- cdx=[WebcaptureCdxLine(
- surt="site,example,)/123.jpg",
- url="http://example.site/123.jpg",
- sha1="455face3598611458efe1f072e58624790a67266",
- timestamp=20120102030405)]),
+ cdx=[
+ WebcaptureCdxLine(
+ surt="site,example,)/123.jpg",
+ url="http://example.site/123.jpg",
+ sha1="455face3598611458efe1f072e58624790a67266",
+ timestamp=20120102030405,
+ )
+ ],
+ ),
# negative size
WebcaptureEntity(
original_url="http://example.site/123.jpg",
timestamp="2012-01-02T03:04:05Z",
- cdx=[WebcaptureCdxLine(
- surt="site,example,)/123.jpg",
- url="http://example.site/123.jpg",
- sha1="455face3598611458efe1f072e58624790a67266",
- size=-123,
- timestamp="20120102030405")]),
+ cdx=[
+ WebcaptureCdxLine(
+ surt="site,example,)/123.jpg",
+ url="http://example.site/123.jpg",
+ sha1="455face3598611458efe1f072e58624790a67266",
+ size=-123,
+ timestamp="20120102030405",
+ )
+ ],
+ ),
]
api.create_webcapture(eg.editgroup_id, good)
@@ -160,21 +176,27 @@ def test_bad_webcapture(api):
WebcaptureEntity(
original_url="http://example.site/123.jpg",
timestamp="2012-01-02T03:04:05Z",
- cdx=[WebcaptureCdxLine(
- #url="http://example.site/123.jpg",
- surt="site,example,)/123.jpg",
- sha1="455face3598611458efe1f072e58624790a67266",
- timestamp="2012-01-02T03:04:05Z",
- )])
+ cdx=[
+ WebcaptureCdxLine(
+ # url="http://example.site/123.jpg",
+ surt="site,example,)/123.jpg",
+ sha1="455face3598611458efe1f072e58624790a67266",
+ timestamp="2012-01-02T03:04:05Z",
+ )
+ ],
+ )
with pytest.raises(ValueError):
# missing/empty CDX timestamp
WebcaptureEntity(
original_url="http://example.site/123.jpg",
timestamp="2012-01-02T03:04:05Z",
- cdx=[WebcaptureCdxLine(
- url="http://example.site/123.jpg",
- surt="site,example,)/123.jpg",
- sha1="455face3598611458efe1f072e58624790a67266",
- #timestamp="2012-01-02T03:04:05Z",
- )])
+ cdx=[
+ WebcaptureCdxLine(
+ url="http://example.site/123.jpg",
+ surt="site,example,)/123.jpg",
+ sha1="455face3598611458efe1f072e58624790a67266",
+ # timestamp="2012-01-02T03:04:05Z",
+ )
+ ],
+ )
diff --git a/python/tests/citation_efficiency.py b/python/tests/citation_efficiency.py
index 1d57dfeb..2aeb28ea 100644
--- a/python/tests/citation_efficiency.py
+++ b/python/tests/citation_efficiency.py
@@ -1,4 +1,3 @@
-
from fatcat_openapi_client import *
from fixtures import *
@@ -34,6 +33,7 @@ def test_citation_indexing(api):
assert r1.refs[1].index == 1
assert r1.refs[1].key == "third"
+
def test_citation_targets(api):
# invariant to linking citations
# also, updates work
@@ -56,12 +56,13 @@ def test_citation_targets(api):
r2 = api.get_release(r2.ident)
assert r2.refs[0].key == "first"
assert r2.refs[1].key == "second"
- assert r2.refs[0].index == 0 # TODO: one-indexing?
+ assert r2.refs[0].index == 0 # TODO: one-indexing?
assert r2.refs[1].index == 1
assert r2.refs[0].target_release_id is None
assert r2.refs[1].target_release_id == r1.ident
assert len(r2.refs) == 2
+
def test_citation_empty_array(api):
# distinction between empty array (no citations) and no array (hidden)
@@ -83,17 +84,24 @@ def test_citation_empty_array(api):
r1b = api.get_release(r1.ident, hide="refs")
assert r1b.refs is None
+
def test_citation_encoding(api):
# escape-only changes (eg, \u1234 whatever for ASCII)
r1 = ReleaseEntity(title="citation encoding", ext_ids=ReleaseExtIds())
- title = "title-unicode \\u0050 \\\" "
+ title = 'title-unicode \\u0050 \\" '
container = "container-unicode ☃︎ ä ö ü スティー"
- extra = extra={'a': 1, 'b': 2, 'ö': 3}
+ extra = extra = {"a": 1, "b": 2, "ö": 3}
locator = "p123"
r1.refs = [
- ReleaseRef(key="1", year=1923, title=title, container_name=container,
- extra=extra, locator=locator),
+ ReleaseRef(
+ key="1",
+ year=1923,
+ title=title,
+ container_name=container,
+ extra=extra,
+ locator=locator,
+ ),
ReleaseRef(key="2"),
]
diff --git a/python/tests/clean_files.py b/python/tests/clean_files.py
index c8b935bb..34a00ebb 100644
--- a/python/tests/clean_files.py
+++ b/python/tests/clean_files.py
@@ -1,4 +1,3 @@
-
import copy
import pytest
@@ -12,6 +11,7 @@ from fatcat_tools.cleanups import FileCleaner
def file_cleaner(api):
yield FileCleaner(api)
+
def test_url_cleanups(file_cleaner):
f = FileEntity(
@@ -20,41 +20,62 @@ def test_url_cleanups(file_cleaner):
)
f.urls = [
- FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"),
- FileUrl(url="https://web.archive.org/web/None/something.com/blah.pdf", rel="webarchive"),
- FileUrl(url="https://archive.org/details/None/something.com/blah.pdf", rel="repository"),
+ FileUrl(
+ url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"
+ ),
+ FileUrl(
+ url="https://web.archive.org/web/None/something.com/blah.pdf", rel="webarchive"
+ ),
+ FileUrl(
+ url="https://archive.org/details/None/something.com/blah.pdf", rel="repository"
+ ),
]
f = file_cleaner.clean_entity(f)
# remove None wayback links
assert len(f.urls) == 2
for u in f.urls:
- assert 'web/None' not in u.url
+ assert "web/None" not in u.url
assert f == file_cleaner.clean_entity(f)
assert f == file_cleaner.clean_entity(copy.deepcopy(f))
# rel=repository -> rel=archive for archive.org links
- assert f.urls[1].rel == 'archive'
+ assert f.urls[1].rel == "archive"
# short wayback dates
f.urls = [
- FileUrl(url="http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"),
- FileUrl(url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"),
+ FileUrl(
+ url="http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf",
+ rel="webarchive",
+ ),
+ FileUrl(
+ url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf",
+ rel="webarchive",
+ ),
]
f = file_cleaner.clean_entity(f)
assert len(f.urls) == 1
- assert f.urls[0].url == 'http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf'
+ assert (
+ f.urls[0].url
+ == "http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf"
+ )
assert f == file_cleaner.clean_entity(f)
assert f == file_cleaner.clean_entity(copy.deepcopy(f))
f.urls = [
- FileUrl(url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"),
+ FileUrl(
+ url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf",
+ rel="webarchive",
+ ),
]
f = file_cleaner.clean_entity(f)
assert len(f.urls) == 1
- assert f.urls[0].url == 'http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf'
+ assert (
+ f.urls[0].url
+ == "http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf"
+ )
assert f == file_cleaner.clean_entity(f)
assert f == file_cleaner.clean_entity(copy.deepcopy(f))
diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py
index a9b81bda..0950087f 100644
--- a/python/tests/fixtures.py
+++ b/python/tests/fixtures.py
@@ -1,4 +1,3 @@
-
import elasticsearch
import fatcat_openapi_client
import pytest
@@ -9,73 +8,77 @@ import fatcat_web
from fatcat_tools import authenticated_api
ES_CONTAINER_STATS_RESP = {
- 'timed_out': False,
- 'aggregations': {
- 'container_stats': {'buckets': {
- 'is_preserved': {'doc_count': 461939},
- 'in_kbart': {'doc_count': 461939},
- 'in_web': {'doc_count': 2797},
- }},
- 'preservation': {
- 'buckets': [
- {'key': 'bright', 'doc_count': 444},
- {'key': 'dark', 'doc_count': 111},
+ "timed_out": False,
+ "aggregations": {
+ "container_stats": {
+ "buckets": {
+ "is_preserved": {"doc_count": 461939},
+ "in_kbart": {"doc_count": 461939},
+ "in_web": {"doc_count": 2797},
+ }
+ },
+ "preservation": {
+ "buckets": [
+ {"key": "bright", "doc_count": 444},
+ {"key": "dark", "doc_count": 111},
],
- 'sum_other_doc_count': 0,
+ "sum_other_doc_count": 0,
},
- 'release_type': {
- 'buckets': [
- {'key': 'article-journal', 'doc_count': 456},
- {'key': 'book', 'doc_count': 123},
+ "release_type": {
+ "buckets": [
+ {"key": "article-journal", "doc_count": 456},
+ {"key": "book", "doc_count": 123},
],
- 'sum_other_doc_count': 0,
+ "sum_other_doc_count": 0,
},
},
- 'hits': {'total': 461939, 'hits': [], 'max_score': 0.0},
- '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0},
- 'took': 50
+ "hits": {"total": 461939, "hits": [], "max_score": 0.0},
+ "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0},
+ "took": 50,
}
# TODO: this should not be empty
ES_CONTAINER_RANDOM_RESP = {
- 'timed_out': False,
- 'hits': {'total': 461939, 'hits': [], 'max_score': 0.0},
- '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0},
- 'took': 50
+ "timed_out": False,
+ "hits": {"total": 461939, "hits": [], "max_score": 0.0},
+ "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0},
+ "took": 50,
}
ES_RELEASE_EMPTY_RESP = {
- 'timed_out': False,
- 'hits': {'total': 0, 'hits': [], 'max_score': 0.0},
- '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0},
- 'took': 50
+ "timed_out": False,
+ "hits": {"total": 0, "hits": [], "max_score": 0.0},
+ "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0},
+ "took": 50,
}
+
@pytest.fixture
def full_app(mocker):
load_dotenv(dotenv_path="./example.env")
fatcat_web.app.testing = True
fatcat_web.app.debug = False
- fatcat_web.app.config['WTF_CSRF_ENABLED'] = False
+ fatcat_web.app.config["WTF_CSRF_ENABLED"] = False
# mock out ES client requests, so they at least fail fast
fatcat_web.app.es_client = elasticsearch.Elasticsearch("mockbackend")
- mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
return fatcat_web.app
+
@pytest.fixture
def app(full_app):
return full_app.test_client()
+
@pytest.fixture
def app_admin(app):
ADMIN_DEV_TOKEN = "AgEPZGV2LmZhdGNhdC53aWtpAhYyMDE5MDEwMS1kZXYtZHVtbXkta2V5AAImZWRpdG9yX2lkID0gYWFhYWFhYWFhYWFhYmt2a2FhYWFhYWFhYWkAAht0aW1lID4gMjAxOS0wNC0wNFQyMzozMjo0NloAAAYgrN3jjy0mgEqIydTFfsOLYSS55dz6Fh2d1CGMNQFLwcQ="
- rv = app.post('/auth/token_login',
- data=dict(token=ADMIN_DEV_TOKEN),
- follow_redirects=True)
+ rv = app.post("/auth/token_login", data=dict(token=ADMIN_DEV_TOKEN), follow_redirects=True)
assert rv.status_code == 200
return app
+
@pytest.fixture
def api():
load_dotenv(dotenv_path="./example.env")
@@ -83,6 +86,7 @@ def api():
api_client.editor_id = "aaaaaaaaaaaabkvkaaaaaaaaae"
return api_client
+
@pytest.fixture
def api_dummy_entities(api):
"""
@@ -121,13 +125,16 @@ def api_dummy_entities(api):
"work": w1,
}
+
def test_get_changelog_entry(api):
"""Check that fixture is working"""
cl = api.get_changelog_entry(1)
assert cl
+
## Helpers ##################################################################
+
def quick_eg(api_inst):
eg = api_inst.create_editgroup(fatcat_openapi_client.Editgroup())
return eg
diff --git a/python/tests/harvest_crossref.py b/python/tests/harvest_crossref.py
index fb43e3f6..12ad34fe 100644
--- a/python/tests/harvest_crossref.py
+++ b/python/tests/harvest_crossref.py
@@ -1,4 +1,3 @@
-
import datetime
import json
@@ -12,13 +11,14 @@ def test_crossref_harvest_date(mocker):
# mock out the harvest state object so it doesn't try to actually connect
# to Kafka
- mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+ mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka")
# mock day request to crossref API
- with open('tests/files/crossref_api_works.json', 'r') as f:
+ with open("tests/files/crossref_api_works.json", "r") as f:
crossref_resp = json.loads(f.readline())
- responses.add(responses.GET, 'https://api.crossref.org/works',
- json=crossref_resp, status=200)
+ responses.add(
+ responses.GET, "https://api.crossref.org/works", json=crossref_resp, status=200
+ )
harvester = HarvestCrossrefWorker(
kafka_hosts="dummy",
@@ -34,7 +34,7 @@ def test_crossref_harvest_date(mocker):
assert len(responses.calls) == 1
# ensure email was included in User-Agent
- assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers['User-Agent']
+ assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers["User-Agent"]
# check that correct date param was passed as expected
assert "filter=from-update-date%3A2019-02-03" in responses.calls[0].request.url
diff --git a/python/tests/harvest_datacite.py b/python/tests/harvest_datacite.py
index 0a7584bd..40e5f711 100644
--- a/python/tests/harvest_datacite.py
+++ b/python/tests/harvest_datacite.py
@@ -1,4 +1,3 @@
-
import datetime
import json
@@ -12,13 +11,12 @@ def test_datacite_harvest_date(mocker):
# mock out the harvest state object so it doesn't try to actually connect
# to Kafka
- mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+ mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka")
# mock day request to datacite API
- with open('tests/files/datacite_api.json', 'r') as f:
+ with open("tests/files/datacite_api.json", "r") as f:
resp = json.loads(f.readline())
- responses.add(responses.GET, 'https://api.datacite.org/dois',
- json=resp, status=200)
+ responses.add(responses.GET, "https://api.datacite.org/dois", json=resp, status=200)
harvester = HarvestDataciteWorker(
kafka_hosts="dummy",
@@ -34,10 +32,13 @@ def test_datacite_harvest_date(mocker):
assert len(responses.calls) == 1
# ensure email was included in User-Agent
- assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers['User-Agent']
+ assert "mailto:test@fatcat.wiki" in responses.calls[0].request.headers["User-Agent"]
# check that correct date param was passed as expected
- assert "query=updated%3A%5B2019-02-03T00%3A00%3A00.000Z+TO+2019-02-03T23%3A59%3A59.999Z%5D" in responses.calls[0].request.url
+ assert (
+ "query=updated%3A%5B2019-02-03T00%3A00%3A00.000Z+TO+2019-02-03T23%3A59%3A59.999Z%5D"
+ in responses.calls[0].request.url
+ )
# check that we published the expected number of DOI objects were published
# to the (mock) kafka topic
diff --git a/python/tests/harvest_pubmed.py b/python/tests/harvest_pubmed.py
index aba766ba..422870f2 100644
--- a/python/tests/harvest_pubmed.py
+++ b/python/tests/harvest_pubmed.py
@@ -14,20 +14,20 @@ def test_pubmed_harvest_date(mocker):
# mock out the harvest state object so it doesn't try to actually connect
# to Kafka
- mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+ mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka")
# Mocking a file fetched from FTP, should contain some 'PubmedArticle' elements.
# $ zcat tests/files/pubmedsample_2019.xml.gz | grep -c '<PubmedArticle>'
# 176
- file_to_retrieve = os.path.join(os.path.dirname(__file__), 'files/pubmedsample_2019.xml.gz')
- ftpretr = mocker.patch('fatcat_tools.harvest.pubmed.ftpretr')
+ file_to_retrieve = os.path.join(os.path.dirname(__file__), "files/pubmedsample_2019.xml.gz")
+ ftpretr = mocker.patch("fatcat_tools.harvest.pubmed.ftpretr")
ftpretr.return_value = file_to_retrieve
- test_date = '2020-02-20'
+ test_date = "2020-02-20"
# We'll need one entry in the date_file_map.
- generate_date_file_map = mocker.patch('fatcat_tools.harvest.pubmed.generate_date_file_map')
- generate_date_file_map.return_value = {test_date: set(['dummy'])}
+ generate_date_file_map = mocker.patch("fatcat_tools.harvest.pubmed.generate_date_file_map")
+ generate_date_file_map.return_value = {test_date: set(["dummy"])}
# For cleanup.
os.remove = mocker.Mock()
@@ -41,7 +41,7 @@ def test_pubmed_harvest_date(mocker):
harvester.producer = mocker.Mock()
harvester.date_file_map = generate_date_file_map()
# Since we mock out the FTP fetch, the concrete date does not matter here.
- harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d'))
+ harvester.fetch_date(datetime.datetime.strptime(test_date, "%Y-%m-%d"))
# check that we published the expected number of DOI objects were published
# to the (mock) kafka topic
@@ -49,20 +49,23 @@ def test_pubmed_harvest_date(mocker):
assert harvester.producer.flush.call_count == 1
assert os.remove.call_count == 2
+
def test_pubmed_harvest_date_no_pmid(mocker):
# mock out the harvest state object so it doesn't try to actually connect
# to Kafka
- mocker.patch('fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka')
+ mocker.patch("fatcat_tools.harvest.harvest_common.HarvestState.initialize_from_kafka")
- file_to_retrieve = os.path.join(os.path.dirname(__file__), 'files/pubmedsample_no_pmid_2019.xml.gz')
- ftpretr = mocker.patch('fatcat_tools.harvest.pubmed.ftpretr')
+ file_to_retrieve = os.path.join(
+ os.path.dirname(__file__), "files/pubmedsample_no_pmid_2019.xml.gz"
+ )
+ ftpretr = mocker.patch("fatcat_tools.harvest.pubmed.ftpretr")
ftpretr.return_value = file_to_retrieve
- test_date = '2020-02-20'
+ test_date = "2020-02-20"
# We'll need one entry in the date_file_map.
- generate_date_file_map = mocker.patch('fatcat_tools.harvest.pubmed.generate_date_file_map')
- generate_date_file_map.return_value = {test_date: set(['dummy'])}
+ generate_date_file_map = mocker.patch("fatcat_tools.harvest.pubmed.generate_date_file_map")
+ generate_date_file_map.return_value = {test_date: set(["dummy"])}
harvester = PubmedFTPWorker(
kafka_hosts="dummy",
@@ -74,4 +77,4 @@ def test_pubmed_harvest_date_no_pmid(mocker):
# The file has not PMID, not importable.
with pytest.raises(ValueError):
- harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d'))
+ harvester.fetch_date(datetime.datetime.strptime(test_date, "%Y-%m-%d"))
diff --git a/python/tests/harvest_state.py b/python/tests/harvest_state.py
index ecd276ed..039794b9 100644
--- a/python/tests/harvest_state.py
+++ b/python/tests/harvest_state.py
@@ -1,4 +1,3 @@
-
import datetime
from fatcat_tools.harvest import *
@@ -18,21 +17,21 @@ def test_harvest_state():
assert hs.next_span() is None
hs = HarvestState(
- start_date=datetime.date(2000,1,1),
- end_date=datetime.date(2000,1,3),
+ start_date=datetime.date(2000, 1, 1),
+ end_date=datetime.date(2000, 1, 3),
)
assert len(hs.to_process) == 3
hs = HarvestState(
- start_date=datetime.date(2000,1,29),
- end_date=datetime.date(2000,2,2),
+ start_date=datetime.date(2000, 1, 29),
+ end_date=datetime.date(2000, 2, 2),
)
assert len(hs.to_process) == 5
hs = HarvestState(catchup_days=0)
assert hs.next_span() is None
hs.enqueue_period(
- start_date=datetime.date(2000,1,1),
- end_date=datetime.date(2000,1,3),
+ start_date=datetime.date(2000, 1, 1),
+ end_date=datetime.date(2000, 1, 3),
)
assert len(hs.to_process) == 3
hs.update('{"completed-date": "2000-01-02"}')
diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py
index 6f73d817..90e6d05b 100644
--- a/python/tests/import_arabesque.py
+++ b/python/tests/import_arabesque.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -11,41 +10,51 @@ from fatcat_tools.importers import ArabesqueMatchImporter, JsonLinePusher, Sqlit
def arabesque_importer(api):
yield ArabesqueMatchImporter(api, extid_type="doi", crawl_id="DUMMY123")
+
# TODO: use API to check that entities actually created...
def test_arabesque_importer_basic(arabesque_importer):
- SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run()
+ SqlitePusher(
+ arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result"
+ ).run()
+
def test_arabesque_importer_json(arabesque_importer):
- with open('tests/files/arabesque_example.json', 'r') as f:
+ with open("tests/files/arabesque_example.json", "r") as f:
JsonLinePusher(arabesque_importer, f).run()
+
def test_arabesque_importer(arabesque_importer):
last_index = arabesque_importer.api.get_changelog(limit=1)[0].index
arabesque_importer.bezerk_mode = True
- counts = SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run()
- assert counts['insert'] == 1
- assert counts['exists'] == 0
- assert counts['skip'] == 490
+ counts = SqlitePusher(
+ arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result"
+ ).run()
+ assert counts["insert"] == 1
+ assert counts["exists"] == 0
+ assert counts["skip"] == 490
# fetch most recent editgroup
- change = arabesque_importer.api.get_changelog_entry(index=last_index+1)
+ change = arabesque_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "identifier/url seedlist" in eg.description.lower()
- assert eg.extra['git_rev']
- assert eg.extra['crawl_id'] == "DUMMY123"
- assert "fatcat_tools.ArabesqueMatchImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert eg.extra["crawl_id"] == "DUMMY123"
+ assert "fatcat_tools.ArabesqueMatchImporter" in eg.extra["agent"]
# re-insert; should skip
arabesque_importer.reset()
arabesque_importer.bezerk_mode = False
- counts = SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run()
- assert counts['insert'] == 0
- assert counts['exists'] == 1
- assert counts['skip'] == 490
+ counts = SqlitePusher(
+ arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result"
+ ).run()
+ assert counts["insert"] == 0
+ assert counts["exists"] == 1
+ assert counts["skip"] == 490
+
def test_arabesque_dict_parse(arabesque_importer):
- with open('tests/files/arabesque_example.json', 'r') as f:
+ with open("tests/files/arabesque_example.json", "r") as f:
raw = json.loads(f.readline())
f = arabesque_importer.parse_record(raw)
assert f.sha1 == "bdd78be55800bb1c9a5e47005bac5e4124793c7b"
diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py
index 9368fb06..2a297c00 100644
--- a/python/tests/import_arxiv.py
+++ b/python/tests/import_arxiv.py
@@ -1,4 +1,3 @@
-
import pytest
from bs4 import BeautifulSoup
from fixtures import *
@@ -12,35 +11,37 @@ def arxiv_importer(api):
ari._test_override = True
return ari
+
def test_arxiv_importer(arxiv_importer):
last_index = arxiv_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/arxivraw_1810.09584.xml', 'r') as f:
+ with open("tests/files/arxivraw_1810.09584.xml", "r") as f:
arxiv_importer.bezerk_mode = True
counts = Bs4XmlFilePusher(arxiv_importer, f, "record").run()
- assert counts['insert'] == 2
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 2
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = arxiv_importer.api.get_changelog_entry(index=last_index+1)
+ change = arxiv_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "arxiv" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.ArxivRawImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.ArxivRawImporter" in eg.extra["agent"]
last_index = arxiv_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/arxivraw_1810.09584.xml', 'r') as f:
+ with open("tests/files/arxivraw_1810.09584.xml", "r") as f:
arxiv_importer.bezerk_mode = False
arxiv_importer.reset()
counts = Bs4XmlFilePusher(arxiv_importer, f, "record").run()
- assert counts['insert'] == 0
- assert counts['exists'] == 2
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 2
+ assert counts["skip"] == 0
assert last_index == arxiv_importer.api.get_changelog(limit=1)[0].index
+
def test_arxiv_xml_parse(arxiv_importer):
- with open('tests/files/arxivraw_1810.09584.xml', 'r') as f:
+ with open("tests/files/arxivraw_1810.09584.xml", "r") as f:
soup = BeautifulSoup(f, "xml")
r = arxiv_importer.parse_record(soup.find_all("record")[0])
@@ -67,11 +68,15 @@ def test_arxiv_xml_parse(arxiv_importer):
assert r2.release_year == 2019
assert str(r2.release_date) == "2019-01-13"
# matched by ISSN, so shouldn't be in there?
- #assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"
+ # assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"
assert len(r1.contribs) == 4
- assert r1.extra['arxiv']['categories'] == ['cond-mat.stat-mech', 'physics.bio-ph', 'physics.data-an']
- assert r1.extra['arxiv']['base_id'] == '1810.09584'
- assert r1.extra['superceded'] is True
+ assert r1.extra["arxiv"]["categories"] == [
+ "cond-mat.stat-mech",
+ "physics.bio-ph",
+ "physics.data-an",
+ ]
+ assert r1.extra["arxiv"]["base_id"] == "1810.09584"
+ assert r1.extra["superceded"] is True
assert r1.contribs[0].raw_name == "Raphael Chetrite"
assert r1.contribs[0].role == "author"
@@ -90,11 +95,15 @@ def test_arxiv_xml_parse(arxiv_importer):
assert r1.abstracts == r2.abstracts
- assert r1.extra['arxiv']['comments'] == "7 pages, 2 figures"
- assert r1.extra['arxiv']['categories'] == ["cond-mat.stat-mech", "physics.bio-ph", "physics.data-an"]
+ assert r1.extra["arxiv"]["comments"] == "7 pages, 2 figures"
+ assert r1.extra["arxiv"]["categories"] == [
+ "cond-mat.stat-mech",
+ "physics.bio-ph",
+ "physics.data-an",
+ ]
- assert not r2.extra.get('superceded')
- r2.extra['superceded'] = True
+ assert not r2.extra.get("superceded")
+ r2.extra["superceded"] = True
assert r1.extra == r2.extra
assert not r1.refs
diff --git a/python/tests/import_common.py b/python/tests/import_common.py
index 2b7c935a..cd89f914 100644
--- a/python/tests/import_common.py
+++ b/python/tests/import_common.py
@@ -1,4 +1,3 @@
-
import datetime
import json
from typing import Any
@@ -17,15 +16,16 @@ from fatcat_tools.transforms import entity_to_dict
@pytest.fixture(scope="function")
def entity_importer(api, mocker) -> Any:
es_client = elasticsearch.Elasticsearch("mockbackend")
- mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
yield EntityImporter(api, es_client=es_client)
+
def test_fuzzy_match_none(entity_importer, mocker) -> None:
"""
Simple ES-mocked test for "no search results" case
"""
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)),
(200, {}, json.dumps(ES_RELEASE_EMPTY_RESP)),
@@ -39,6 +39,7 @@ def test_fuzzy_match_none(entity_importer, mocker) -> None:
resp = entity_importer.match_existing_release_fuzzy(release)
assert resp is None
+
def test_fuzzy_match_different(entity_importer, mocker) -> None:
"""
Simple fuzzycat-mocked test for "strong match" case
@@ -60,7 +61,7 @@ def test_fuzzy_match_different(entity_importer, mocker) -> None:
ext_ids=ReleaseExtIds(),
)
- match_raw = mocker.patch('fatcat_tools.importers.common.match_release_fuzzy')
+ match_raw = mocker.patch("fatcat_tools.importers.common.match_release_fuzzy")
match_raw.side_effect = [[r3, r2, r3, r2]]
resp = entity_importer.match_existing_release_fuzzy(r1)
assert (resp[0], resp[2]) == ("STRONG", r2)
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index a8a1ec44..eb931eb1 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -1,4 +1,3 @@
-
import gzip
import json
@@ -10,67 +9,79 @@ from fatcat_tools.importers import CrossrefImporter, JsonLinePusher
@pytest.fixture(scope="function")
def crossref_importer(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True)
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield CrossrefImporter(
+ api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True
+ )
+
@pytest.fixture(scope="function")
def crossref_importer_existing(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False)
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield CrossrefImporter(
+ api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False
+ )
+
-@pytest.mark.skip(reason="slow/huge crossref import is a corner-case and slows tests significantly")
+@pytest.mark.skip(
+ reason="slow/huge crossref import is a corner-case and slows tests significantly"
+)
def test_crossref_importer_huge(crossref_importer):
last_index = crossref_importer.api.get_changelog(limit=1)[0].index
- with gzip.open('tests/files/huge_crossref_doi.json.gz', 'rt') as f:
+ with gzip.open("tests/files/huge_crossref_doi.json.gz", "rt") as f:
crossref_importer.bezerk_mode = True
line = f.readline()
mega_blob = [line for i in range(95)]
counts = JsonLinePusher(crossref_importer, mega_blob).run()
- assert counts['insert'] == 95
- change = crossref_importer.api.get_changelog_entry(index=last_index+1)
+ assert counts["insert"] == 95
+ change = crossref_importer.api.get_changelog_entry(index=last_index + 1)
release = crossref_importer.api.get_release(change.editgroup.edits.releases[0].ident)
assert len(release.contribs) == 1014
+
def test_crossref_importer(crossref_importer):
last_index = crossref_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
+ with open("tests/files/crossref-works.2018-01-21.badsample.json", "r") as f:
crossref_importer.bezerk_mode = True
counts = JsonLinePusher(crossref_importer, f).run()
- assert counts['insert'] == 14
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 14
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = crossref_importer.api.get_changelog_entry(index=last_index+1)
+ change = crossref_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "crossref" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.CrossrefImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.CrossrefImporter" in eg.extra["agent"]
last_index = crossref_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
+ with open("tests/files/crossref-works.2018-01-21.badsample.json", "r") as f:
crossref_importer.bezerk_mode = False
crossref_importer.reset()
counts = JsonLinePusher(crossref_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 14
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 14
+ assert counts["skip"] == 0
assert last_index == crossref_importer.api.get_changelog(limit=1)[0].index
+
def test_crossref_mappings(crossref_importer):
- assert crossref_importer.map_release_type('journal-article') == "article-journal"
- assert crossref_importer.map_release_type('asdf') is None
- assert crossref_importer.map_release_type('book-series') is None
- assert crossref_importer.map_release_type('standard') == 'standard'
+ assert crossref_importer.map_release_type("journal-article") == "article-journal"
+ assert crossref_importer.map_release_type("asdf") is None
+ assert crossref_importer.map_release_type("book-series") is None
+ assert crossref_importer.map_release_type("standard") == "standard"
+
def test_crossref_importer_create(crossref_importer):
crossref_importer.create_containers = True
- with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
+ with open("tests/files/crossref-works.2018-01-21.badsample.json", "r") as f:
JsonLinePusher(crossref_importer, f).run()
+
def test_crossref_dict_parse(crossref_importer):
- with open('tests/files/crossref-works.single.json', 'r') as f:
+ with open("tests/files/crossref-works.single.json", "r") as f:
# not a single line
raw = json.loads(f.read())
r = crossref_importer.parse_record(raw)
@@ -78,7 +89,10 @@ def test_crossref_dict_parse(crossref_importer):
JsonLinePusher(crossref_importer, [json.dumps(raw)]).run()
print(r.extra)
- assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators"
+ assert (
+ r.title
+ == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators"
+ )
assert r.publisher == "Wiley-Blackwell"
assert r.release_type == "article-journal"
assert r.release_stage == "published"
@@ -88,22 +102,22 @@ def test_crossref_dict_parse(crossref_importer):
assert r.ext_ids.isbn13 == "978-3-16-148410-0"
assert r.language == "fr"
assert r.subtitle is None
- assert 'subtitle' not in r.extra
- assert 'subtitle' not in r.extra['crossref']
- assert 'funder' not in r.extra
- assert 'funder' not in r.extra['crossref']
+ assert "subtitle" not in r.extra
+ assert "subtitle" not in r.extra["crossref"]
+ assert "funder" not in r.extra
+ assert "funder" not in r.extra["crossref"]
# matched by ISSN, so shouldn't be in there
- #assert extra['container_name'] == "International Journal of Quantum Chemistry"
- assert r.extra['aliases'] == ["some other title"]
- assert r.extra['crossref']['archive'] == ['Portico', 'LOCKSS']
+ # assert extra['container_name'] == "International Journal of Quantum Chemistry"
+ assert r.extra["aliases"] == ["some other title"]
+ assert r.extra["crossref"]["archive"] == ["Portico", "LOCKSS"]
assert len(r.contribs) == 6
assert r.contribs[0].raw_name == "Marcelo D. Radicioni"
assert r.contribs[0].given_name == "Marcelo D."
assert r.contribs[0].surname == "Radicioni"
assert r.contribs[0].index == 0
- assert r.contribs[0].extra['seq'] == "first"
+ assert r.contribs[0].extra["seq"] == "first"
assert r.contribs[1].raw_affiliation == "Some University"
- assert r.contribs[1].extra['more_affiliations'] == ["Some Department"]
+ assert r.contribs[1].extra["more_affiliations"] == ["Some Department"]
assert r.contribs[1].role == "author"
assert r.contribs[4].role == "editor"
assert r.contribs[4].index is None
@@ -115,32 +129,47 @@ def test_crossref_dict_parse(crossref_importer):
assert r.refs[0].year == 1972
assert r.refs[0].locator == "1734"
assert r.refs[0].container_name == "J. Chem. Phys."
- assert r.refs[0].extra == {"volume": "57", "authors": ["Swenson"], "doi": "10.1063/1.1678462", "medium": "DVD"}
- assert r.refs[2].key == 'BIB3'
- assert r.refs[2].extra.get('author') is None
- assert r.refs[2].container_name == "Hypervirial Theorem's, Lecture Notes in Chemistry <3"
- assert r.refs[3].container_name == "Large Order Perturbation Theory and Summation Methods in Quantum Mechanics, Lecture Notes in Chemistry"
+ assert r.refs[0].extra == {
+ "volume": "57",
+ "authors": ["Swenson"],
+ "doi": "10.1063/1.1678462",
+ "medium": "DVD",
+ }
+ assert r.refs[2].key == "BIB3"
+ assert r.refs[2].extra.get("author") is None
+ assert (
+ r.refs[2].container_name == "Hypervirial Theorem's, Lecture Notes in Chemistry <3"
+ )
+ assert (
+ r.refs[3].container_name
+ == "Large Order Perturbation Theory and Summation Methods in Quantum Mechanics, Lecture Notes in Chemistry"
+ )
+
def test_crossref_subtitle(crossref_importer):
"""
Tests new subtitle field, explicitly
"""
- with open('tests/files/crossref-works.single.json', 'r') as f:
+ with open("tests/files/crossref-works.single.json", "r") as f:
# not a single line
raw = json.loads(f.read())
- raw['subtitle'] = ["some bogus subtitle", "blah"]
+ raw["subtitle"] = ["some bogus subtitle", "blah"]
r = crossref_importer.parse_record(raw)
# ensure the API server is ok with format
JsonLinePusher(crossref_importer, [json.dumps(raw)]).run()
print(r.extra)
- assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators"
+ assert (
+ r.title
+ == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators"
+ )
assert r.subtitle == "some bogus subtitle"
- assert 'subtitle' not in r.extra
- assert 'subtitle' not in r.extra['crossref']
+ assert "subtitle" not in r.extra
+ assert "subtitle" not in r.extra["crossref"]
+
def test_stateful_checking(crossref_importer_existing):
- with open('tests/files/crossref-works.single.json', 'r') as f:
+ with open("tests/files/crossref-works.single.json", "r") as f:
# not a single line, a whole document
raw = f.read()
# might not exist yet...
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index e1c79bc4..220dc0f6 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -54,9 +54,7 @@ def test_datacite_importer_huge(datacite_importer):
counts = JsonLinePusher(datacite_importer, f).run()
assert counts["insert"] == 998
change = datacite_importer.api.get_changelog_entry(index=last_index + 1)
- release = datacite_importer.api.get_release(
- change.editgroup.edits.releases[0].ident
- )
+ release = datacite_importer.api.get_release(change.editgroup.edits.releases[0].ident)
assert len(release.contribs) == 3
@@ -76,17 +74,26 @@ def test_find_original_language_title():
),
Case(
"empty subdict is ignored",
- {"title": "Noise Reduction", "original_language_title": {},},
+ {
+ "title": "Noise Reduction",
+ "original_language_title": {},
+ },
None,
),
Case(
"unknown subdict keys are ignored",
- {"title": "Noise Reduction", "original_language_title": {"broken": "kv"},},
+ {
+ "title": "Noise Reduction",
+ "original_language_title": {"broken": "kv"},
+ },
None,
),
Case(
"original string",
- {"title": "Noise Reduction", "original_language_title": "Подавление шума",},
+ {
+ "title": "Noise Reduction",
+ "original_language_title": "Подавление шума",
+ },
"Подавление шума",
),
Case(
@@ -163,7 +170,10 @@ def test_parse_datacite_titles():
),
Case(
"multiple titles, first wins",
- [{"title": "Total carbon dioxide"}, {"title": "Meeting Heterogeneity"},],
+ [
+ {"title": "Total carbon dioxide"},
+ {"title": "Meeting Heterogeneity"},
+ ],
("Total carbon dioxide", None, None),
),
Case(
@@ -201,7 +211,9 @@ def test_parse_datacite_titles():
[
{
"title": "Total carbon dioxide",
- "original_language_title": {"__content__": "Total carbon dioxide",},
+ "original_language_title": {
+ "__content__": "Total carbon dioxide",
+ },
},
{"title": "Station TT043_7-9", "titleType": "Subtitle"},
],
@@ -239,9 +251,7 @@ def test_parse_datacite_dates():
Case("year only yields year only", [{"date": "2019"}], (None, None, 2019)),
Case("int year", [{"date": 2019}], (None, None, 2019)),
Case("first wins", [{"date": "2019"}, {"date": "2020"}], (None, None, 2019)),
- Case(
- "skip bogus year", [{"date": "abc"}, {"date": "2020"}], (None, None, 2020)
- ),
+ Case("skip bogus year", [{"date": "abc"}, {"date": "2020"}], (None, None, 2020)),
Case(
"first with type",
[{"date": "2019", "dateType": "Accepted"}, {"date": "2020"}],
@@ -249,7 +259,9 @@ def test_parse_datacite_dates():
),
Case(
"full date",
- [{"date": "2019-12-01", "dateType": "Valid"},],
+ [
+ {"date": "2019-12-01", "dateType": "Valid"},
+ ],
(datetime.date(2019, 12, 1), 12, 2019),
),
Case(
@@ -294,22 +306,30 @@ def test_parse_datacite_dates():
),
Case(
"fuzzy year only",
- [{"date": "Year 2010", "dateType": "Issued"},],
+ [
+ {"date": "Year 2010", "dateType": "Issued"},
+ ],
(None, None, 2010),
),
Case(
"fuzzy year and month",
- [{"date": "Year 2010 Feb", "dateType": "Issued"},],
+ [
+ {"date": "Year 2010 Feb", "dateType": "Issued"},
+ ],
(None, 2, 2010),
),
Case(
"fuzzy year, month, day",
- [{"date": "Year 2010 Feb 24", "dateType": "Issued"},],
+ [
+ {"date": "Year 2010 Feb 24", "dateType": "Issued"},
+ ],
(datetime.date(2010, 2, 24), 2, 2010),
),
Case(
"ignore broken date",
- [{"date": "Febrrr 45", "dateType": "Updated"},],
+ [
+ {"date": "Febrrr 45", "dateType": "Updated"},
+ ],
(None, None, None),
),
]
@@ -317,13 +337,19 @@ def test_parse_datacite_dates():
result = parse_datacite_dates(case.input)
assert result == case.result, case.about
+
def test_datacite_spammy_title(datacite_importer):
- r = datacite_importer.parse_record({"title": """HD! My Hero academia
+ r = datacite_importer.parse_record(
+ {
+ "title": """HD! My Hero academia
Heroes: Rising [2020]Full Movie Watch
Online And Free Download""",
- "attributes": {"doi": "10.1234/1234"}})
+ "attributes": {"doi": "10.1234/1234"},
+ }
+ )
assert r is False
+
def test_datacite_importer(datacite_importer):
last_index = datacite_importer.api.get_changelog(limit=1)[0].index
with open("tests/files/datacite_sample.jsonl", "r") as f:
@@ -361,9 +387,7 @@ def test_datacite_dict_parse(datacite_importer):
print(r.extra)
assert r.title == "Triticum turgidum L. subsp. durum (Desf.) Husn. 97090"
- assert (
- r.publisher == "International Centre for Agricultural Research in Dry Areas"
- )
+ assert r.publisher == "International Centre for Agricultural Research in Dry Areas"
assert r.release_type == "article"
assert r.release_stage == "published"
assert r.license_slug is None
@@ -424,9 +448,7 @@ def test_index_form_to_display_name():
),
Case("Solomon, P. M.", "P. M. Solomon"),
Case("Sujeevan Ratnasingham", "Sujeevan Ratnasingham"),
- Case(
- "Paul Stöckli (1906-1991), Künstler", "Paul Stöckli (1906-1991), Künstler"
- ),
+ Case("Paul Stöckli (1906-1991), Künstler", "Paul Stöckli (1906-1991), Künstler"),
]
for c in cases:
@@ -450,9 +472,7 @@ def test_lookup_license_slug():
"https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess.xhtml",
"ADS-UK",
),
- Case(
- "https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess", "ADS-UK"
- ),
+ Case("https://archaeologydataservice.ac.uk/advice/termsOfUseAndAccess", "ADS-UK"),
Case("https://creativecommons.org/public-domain/cc0", "CC-0"),
Case("https://creativecommons.org/publicdomain/zero/1.0", "CC-0"),
Case("https://creativecommons.org/share-your-work/public-domain/cc0", "CC-0"),
diff --git a/python/tests/import_dblp.py b/python/tests/import_dblp.py
index 4fb178b8..399523e3 100644
--- a/python/tests/import_dblp.py
+++ b/python/tests/import_dblp.py
@@ -1,4 +1,3 @@
-
import io
import pytest
@@ -15,39 +14,45 @@ from fatcat_tools.importers import (
@pytest.fixture(scope="function")
def dblp_importer(api):
- with open('tests/files/dblp_container_map.tsv', 'r') as tsv_file:
+ with open("tests/files/dblp_container_map.tsv", "r") as tsv_file:
yield DblpReleaseImporter(api, tsv_file, bezerk_mode=True)
+
@pytest.fixture(scope="function")
def dblp_container_importer(api):
- with open('tests/files/dblp_container_map.tsv', 'r') as tsv_file:
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield DblpContainerImporter(api, issn_file, tsv_file, io.StringIO(), bezerk_mode=True)
+ with open("tests/files/dblp_container_map.tsv", "r") as tsv_file:
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield DblpContainerImporter(
+ api, issn_file, tsv_file, io.StringIO(), bezerk_mode=True
+ )
+
def test_dblp_importer(dblp_importer):
last_index = dblp_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_dblp.xml', 'rb') as f:
+ with open("tests/files/example_dblp.xml", "rb") as f:
dblp_importer.bezerk_mode = True
- counts = Bs4XmlLargeFilePusher(dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True).run()
- #print(counts)
- assert counts['insert'] == 3
- assert counts['exists'] == 0
- assert counts['skip'] == 1
+ counts = Bs4XmlLargeFilePusher(
+ dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True
+ ).run()
+ # print(counts)
+ assert counts["insert"] == 3
+ assert counts["exists"] == 0
+ assert counts["skip"] == 1
# fetch most recent editgroup
- change = dblp_importer.api.get_changelog_entry(index=last_index+1)
+ change = dblp_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "dblp" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.DblpReleaseImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.DblpReleaseImporter" in eg.extra["agent"]
# check that entity name mangling was fixed on import
eg = dblp_importer.api.get_editgroup(eg.editgroup_id)
release = dblp_importer.api.get_release(eg.edits.releases[0].ident)
for r_edit in eg.edits.releases:
release = dblp_importer.api.get_release(r_edit.ident)
- #print(release.ext_ids.dblp)
+ # print(release.ext_ids.dblp)
if release.ext_ids.dblp == "conf/er/Norrie08":
break
assert release.ext_ids.dblp == "conf/er/Norrie08"
@@ -55,34 +60,37 @@ def test_dblp_importer(dblp_importer):
assert release.contribs[1].raw_name == "Michael H. Böhlen"
last_index = dblp_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_dblp.xml', 'rb') as f:
+ with open("tests/files/example_dblp.xml", "rb") as f:
dblp_importer.bezerk_mode = False
dblp_importer.reset()
- counts = Bs4XmlLargeFilePusher(dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True).run()
- #print(counts)
- assert counts['insert'] == 0
- assert counts['exists'] == 3
- assert counts['skip'] == 1
+ counts = Bs4XmlLargeFilePusher(
+ dblp_importer, f, dblp_importer.ELEMENT_TYPES, use_lxml=True
+ ).run()
+ # print(counts)
+ assert counts["insert"] == 0
+ assert counts["exists"] == 3
+ assert counts["skip"] == 1
assert last_index == dblp_importer.api.get_changelog(limit=1)[0].index
+
def test_dblp_container_importer(dblp_container_importer):
last_index = dblp_container_importer.api.get_changelog(limit=1)[0].index
output_tsv_map = io.StringIO()
- with open('tests/files/example_dblp_containers.json', 'r') as f:
+ with open("tests/files/example_dblp_containers.json", "r") as f:
dblp_container_importer.bezerk_mode = True
dblp_container_importer.dblp_container_map_output = output_tsv_map
counts = JsonLinePusher(dblp_container_importer, f).run()
- assert counts['insert'] == 10
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 10
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = dblp_container_importer.api.get_changelog_entry(index=last_index+1)
+ change = dblp_container_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "dblp" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.DblpContainerImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.DblpContainerImporter" in eg.extra["agent"]
# check that entity name mangling was fixed on import
eg = dblp_container_importer.api.get_editgroup(eg.editgroup_id)
@@ -93,34 +101,35 @@ def test_dblp_container_importer(dblp_container_importer):
assert container.name == "Atlantis Thinking Machines"
assert container.issnl == "1877-3273"
assert container.container_type == "book-series"
- assert container.extra['dblp']['prefix'] == "series/atlantis"
- assert container.extra['urls'] == ["http://link.springer.com/bookseries/10077"]
+ assert container.extra["dblp"]["prefix"] == "series/atlantis"
+ assert container.extra["urls"] == ["http://link.springer.com/bookseries/10077"]
last_index = dblp_container_importer.api.get_changelog(limit=1)[0].index
output_tsv_map.seek(0)
- #print(output_tsv_map.read())
- #output_tsv_map.seek(0)
- with open('tests/files/example_dblp_containers.json', 'r') as f:
+ # print(output_tsv_map.read())
+ # output_tsv_map.seek(0)
+ with open("tests/files/example_dblp_containers.json", "r") as f:
dblp_container_importer.reset()
dblp_container_importer.bezerk_mode = False
dblp_container_importer.dblp_container_map_output = io.StringIO()
dblp_container_importer.read_dblp_container_map_file(output_tsv_map)
counts = JsonLinePusher(dblp_container_importer, f).run()
print(counts)
- assert counts['insert'] == 0
- assert counts['exists'] == 10
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 10
+ assert counts["skip"] == 0
assert last_index == dblp_container_importer.api.get_changelog(limit=1)[0].index
+
def test_dblp_xml_parse(dblp_importer):
- with open('tests/files/example_dblp_article.xml', 'r') as f:
+ with open("tests/files/example_dblp_article.xml", "r") as f:
soup = BeautifulSoup(f, "xml")
r1 = dblp_importer.parse_record(soup.find_all("article")[0])
assert r1.title == "Jim Gray, astronomer"
assert r1.contribs[0].raw_name == "Alexander S. Szalay"
# tested above, in LXML import path
- #assert r1.contribs[1].raw_name == "Michael H. Bohlen"
+ # assert r1.contribs[1].raw_name == "Michael H. Bohlen"
assert r1.contribs[2].raw_name == "Nicolas Heist"
# XXX: assert r1.contribs[2].extra['orcid'] == "0000-0002-4354-9138"
assert r1.contribs[3].raw_name == "Jens Lehmann"
@@ -130,5 +139,5 @@ def test_dblp_xml_parse(dblp_importer):
assert r1.issue == "11"
assert r1.volume == "51"
assert r1.release_year == 2008
- #assert r1.extra['container_name'] == "Commun. ACM"
- assert r1.extra['dblp']['type'] == "article"
+ # assert r1.extra['container_name'] == "Commun. ACM"
+ assert r1.extra["dblp"]["type"] == "article"
diff --git a/python/tests/import_doaj.py b/python/tests/import_doaj.py
index 4cd3b518..9c4ba552 100644
--- a/python/tests/import_doaj.py
+++ b/python/tests/import_doaj.py
@@ -1,4 +1,3 @@
-
import datetime
import json
@@ -14,7 +13,7 @@ from fatcat_tools.transforms import entity_to_dict
@pytest.fixture(scope="function")
def doaj_importer(api, mocker):
es_client = elasticsearch.Elasticsearch("mockbackend")
- mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
yield DoajArticleImporter(
api,
@@ -23,6 +22,7 @@ def doaj_importer(api, mocker):
es_client=es_client,
)
+
def test_doaj_importer(doaj_importer):
last_index = doaj_importer.api.get_changelog(limit=1)[0].index
with open("tests/files/example_doaj_articles.json", "r") as f:
@@ -60,6 +60,7 @@ def test_doaj_importer(doaj_importer):
doaj_importer.api.delete_release(eg.editgroup_id, release_edit.ident)
doaj_importer.api.accept_editgroup(eg.editgroup_id)
+
def test_doaj_importer_existing_doi(doaj_importer):
"""
One of the DOAJ test entities has a dummy DOI (10.123/abc); this test
@@ -118,12 +119,16 @@ def test_doaj_importer_existing_doi(doaj_importer):
)
doaj_importer.api.accept_editgroup(eg.editgroup_id)
+
def test_doaj_dict_parse(doaj_importer):
with open("tests/files/example_doaj_articles.json", "r") as f:
raw = json.loads(f.readline())
r = doaj_importer.parse_record(raw)
- assert r.title == "Effect of hydrogen on tensile properties and fracture behavior of PH 13-8 Mo steel"
+ assert (
+ r.title
+ == "Effect of hydrogen on tensile properties and fracture behavior of PH 13-8 Mo steel"
+ )
assert r.publisher == "Elsevier"
assert r.release_type == "article-journal"
assert r.release_stage == "published"
@@ -140,7 +145,7 @@ def test_doaj_dict_parse(doaj_importer):
assert r.version is None
assert r.language == "en"
# matched by ISSN, so wouldn't be defined normally
- assert r.extra['container_name'] == "Materials & Design"
+ assert r.extra["container_name"] == "Materials & Design"
assert len(r.abstracts) == 1
assert len(r.abstracts[0].content) == 1033
assert len(r.contribs) == 5
@@ -149,6 +154,6 @@ def test_doaj_dict_parse(doaj_importer):
assert r.contribs[0].surname is None
assert not r.refs
- #print(r.extra)
- assert r.extra['release_month'] == 10
- assert r.extra['country'] == 'gb'
+ # print(r.extra)
+ assert r.extra["release_month"] == 10
+ assert r.extra["country"] == "gb"
diff --git a/python/tests/import_file_generic.py b/python/tests/import_file_generic.py
index e1d987e8..9eeeb2bb 100644
--- a/python/tests/import_file_generic.py
+++ b/python/tests/import_file_generic.py
@@ -1,4 +1,3 @@
-
import pytest
from fatcat_openapi_client import *
@@ -23,77 +22,116 @@ def test_file_update_generic():
url_sets = [
# dummy
{
- 'before': [],
- 'after': [],
+ "before": [],
+ "after": [],
},
# social => academicsocial
{
- 'before': [
+ "before": [
FileUrl(url="https://academic.edu/blah.pdf", rel="social"),
],
- 'after': [
+ "after": [
FileUrl(url="https://academic.edu/blah.pdf", rel="academicsocial"),
],
},
# archive.org repository => archive
{
- 'before': [
+ "before": [
FileUrl(url="https://archive.org/download/item/blah.pdf", rel="repository"),
],
- 'after': [
+ "after": [
FileUrl(url="https://archive.org/download/item/blah.pdf", rel="archive"),
],
},
# :80 in URL is redundant
{
- 'before': [
- FileUrl(url="http://homepages.math.uic.edu/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", rel="web"),
- FileUrl(url="http://homepages.math.uic.edu:80/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", rel="web"),
+ "before": [
+ FileUrl(
+ url="http://homepages.math.uic.edu/~rosendal/PapersWebsite/BanachMinimalExamples.pdf",
+ rel="web",
+ ),
+ FileUrl(
+ url="http://homepages.math.uic.edu:80/~rosendal/PapersWebsite/BanachMinimalExamples.pdf",
+ rel="web",
+ ),
FileUrl(url="http://mit.edu/item/blah.pdf", rel="web"),
FileUrl(url="http://mit.edu:80/item/blah.pdf", rel="web"),
],
- 'after': [
- FileUrl(url="http://homepages.math.uic.edu/~rosendal/PapersWebsite/BanachMinimalExamples.pdf", rel="web"),
+ "after": [
+ FileUrl(
+ url="http://homepages.math.uic.edu/~rosendal/PapersWebsite/BanachMinimalExamples.pdf",
+ rel="web",
+ ),
FileUrl(url="http://mit.edu/item/blah.pdf", rel="web"),
],
},
{
- 'before': [
+ "before": [
FileUrl(url="http://mit.edu:80/item/blah.pdf", rel="web"),
],
- 'after': [
+ "after": [
FileUrl(url="http://mit.edu:80/item/blah.pdf", rel="web"),
],
},
# http/https redundant
{
- 'before': [
- FileUrl(url="https://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", rel="web"),
- FileUrl(url="http://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", rel="web"),
+ "before": [
+ FileUrl(
+ url="https://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf",
+ rel="web",
+ ),
+ FileUrl(
+ url="http://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf",
+ rel="web",
+ ),
FileUrl(url="https://mit.edu/item/blah.pdf", rel="web"),
- FileUrl(url="https://web.archive.org/web/12345542/http://mit.edu/item/blah.pdf", rel="webarchive"),
+ FileUrl(
+ url="https://web.archive.org/web/12345542/http://mit.edu/item/blah.pdf",
+ rel="webarchive",
+ ),
FileUrl(url="http://mit.edu/item/blah.pdf", rel="web"),
- FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"),
+ FileUrl(
+ url="https://web.archive.org/web/12345542/something.com/blah.pdf",
+ rel="webarchive",
+ ),
],
- 'after': [
- FileUrl(url="https://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf", rel="web"),
+ "after": [
+ FileUrl(
+ url="https://eo1.gsfc.nasa.gov/new/validationReport/Technology/JoeCD/asner_etal_PNAS_20041.pdf",
+ rel="web",
+ ),
FileUrl(url="https://mit.edu/item/blah.pdf", rel="web"),
- FileUrl(url="https://web.archive.org/web/12345542/http://mit.edu/item/blah.pdf", rel="webarchive"),
- FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"),
+ FileUrl(
+ url="https://web.archive.org/web/12345542/http://mit.edu/item/blah.pdf",
+ rel="webarchive",
+ ),
+ FileUrl(
+ url="https://web.archive.org/web/12345542/something.com/blah.pdf",
+ rel="webarchive",
+ ),
],
},
# short /2017/ wayback datetime
{
- 'before': [
- FileUrl(url="https://web.archive.org/web/2017/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", rel="webarchive"),
- FileUrl(url="https://web.archive.org/web/20170922010835/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", rel="webarchive"),
+ "before": [
+ FileUrl(
+ url="https://web.archive.org/web/2017/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38",
+ rel="webarchive",
+ ),
+ FileUrl(
+ url="https://web.archive.org/web/20170922010835/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38",
+ rel="webarchive",
+ ),
],
- 'after': [
- FileUrl(url="https://web.archive.org/web/20170922010835/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38", rel="webarchive"),
+ "after": [
+ FileUrl(
+ url="https://web.archive.org/web/20170922010835/http://www.geoamazonia.net/index.php/revista/article/download/51/pdf_38",
+ rel="webarchive",
+ ),
],
},
]
for pair in url_sets:
- f1.urls = pair['before']
- assert EntityImporter.generic_file_cleanups(f1).urls == pair['after']
+ f1.urls = pair["before"]
+ assert EntityImporter.generic_file_cleanups(f1).urls == pair["after"]
diff --git a/python/tests/import_file_meta.py b/python/tests/import_file_meta.py
index 949a1075..2555f351 100644
--- a/python/tests/import_file_meta.py
+++ b/python/tests/import_file_meta.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -12,35 +11,42 @@ from fatcat_tools.importers import FileMetaImporter, JsonLinePusher
def file_meta_importer(api):
yield FileMetaImporter(api)
+
def test_file_meta_importer_basic(file_meta_importer):
# insert two file entities
api = file_meta_importer.api
eg = quick_eg(file_meta_importer.api)
# with full metadata
- f1edit = api.create_file(eg.editgroup_id, FileEntity(
- size=372121,
- md5="e1fd97475c8aa102568f5d70a1bd0c07",
- sha1="0000045687dad717ed6512e395b04ec9c00995b7",
- sha256="51bdc9e40cc175089fcb60b0b188e6cbcdcddb1ff8acbe6b039b8f8fff0afff0",
- mimetype="application/pdf",
- ))
+ f1edit = api.create_file(
+ eg.editgroup_id,
+ FileEntity(
+ size=372121,
+ md5="e1fd97475c8aa102568f5d70a1bd0c07",
+ sha1="0000045687dad717ed6512e395b04ec9c00995b7",
+ sha256="51bdc9e40cc175089fcb60b0b188e6cbcdcddb1ff8acbe6b039b8f8fff0afff0",
+ mimetype="application/pdf",
+ ),
+ )
# partial/stub metadata
- f2edit = api.create_file(eg.editgroup_id, FileEntity(
- sha1="00000376ad49f56145721503f1eb5e6e49e779fd",
- mimetype="application/pdf",
- ))
+ f2edit = api.create_file(
+ eg.editgroup_id,
+ FileEntity(
+ sha1="00000376ad49f56145721503f1eb5e6e49e779fd",
+ mimetype="application/pdf",
+ ),
+ )
api.accept_editgroup(eg.editgroup_id)
- with open('tests/files/example_file_meta.json', 'r') as f:
+ with open("tests/files/example_file_meta.json", "r") as f:
counts = JsonLinePusher(file_meta_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 0
- assert counts['update'] == 1
- assert counts['skip-no-match'] == 4
- assert counts['skip-missing-field'] == 1
- assert counts['skip-existing-complete'] == 1
+ assert counts["insert"] == 0
+ assert counts["exists"] == 0
+ assert counts["update"] == 1
+ assert counts["skip-no-match"] == 4
+ assert counts["skip-missing-field"] == 1
+ assert counts["skip-existing-complete"] == 1
# cleanup file entities
eg = quick_eg(file_meta_importer.api)
@@ -48,8 +54,9 @@ def test_file_meta_importer_basic(file_meta_importer):
api.delete_file(eg.editgroup_id, f2edit.ident)
api.accept_editgroup(eg.editgroup_id)
+
def test_file_meta_dict_parse(file_meta_importer):
- with open('tests/files/example_file_meta.json', 'r') as f:
+ with open("tests/files/example_file_meta.json", "r") as f:
raw = json.loads(f.readline())
f = file_meta_importer.parse_record(raw)
diff --git a/python/tests/import_fileset_generic.py b/python/tests/import_fileset_generic.py
index 8d30a83b..eadf5c43 100644
--- a/python/tests/import_fileset_generic.py
+++ b/python/tests/import_fileset_generic.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -11,48 +10,54 @@ from fatcat_tools.importers import FilesetImporter, JsonLinePusher
def fileset_importer(api):
yield FilesetImporter(api)
+
# TODO: use API to check that entities actually created...
def test_fileset_importer_basic(fileset_importer):
- with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f:
+ with open("tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json", "r") as f:
JsonLinePusher(fileset_importer, f).run()
+
def test_fileset_importer(fileset_importer):
last_index = fileset_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f:
+ with open("tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json", "r") as f:
fileset_importer.bezerk_mode = True
counts = JsonLinePusher(fileset_importer, f).run()
- assert counts['insert'] == 1
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 1
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = fileset_importer.api.get_changelog_entry(index=last_index+1)
+ change = fileset_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "generic fileset" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.FilesetImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.FilesetImporter" in eg.extra["agent"]
# re-insert; should skip
- with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f:
+ with open("tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json", "r") as f:
fileset_importer.reset()
fileset_importer.bezerk_mode = False
counts = JsonLinePusher(fileset_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 1
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 1
+ assert counts["skip"] == 0
+
def test_fileset_dict_parse(fileset_importer):
- with open('tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json', 'r') as f:
+ with open("tests/files/fileset_ltjp7k2nrbes3or5h4na5qgxlu.json", "r") as f:
raw = json.loads(f.readline())
fs = fileset_importer.parse_record(raw)
assert fs.manifest[0].sha1 == "cc9bd558ca79b30b2966714da7ef4129537fde0c"
assert fs.manifest[0].md5 == "742c40404c9a4dbbd77c0985201c639f"
- assert fs.manifest[0].sha256 == "3a7c07ad17ce3638d5a1dd21f995a496e430b952eef00270ad741d506984370f"
+ assert (
+ fs.manifest[0].sha256
+ == "3a7c07ad17ce3638d5a1dd21f995a496e430b952eef00270ad741d506984370f"
+ )
assert fs.manifest[0].size == 640500
assert fs.manifest[0].path == "070111_LatA_100nM.txt"
- assert fs.manifest[0].extra['mimetype'] == "text/plain"
+ assert fs.manifest[0].extra["mimetype"] == "text/plain"
assert len(fs.urls) == 3
for u in fs.urls:
if u.rel == "repo":
diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py
index 862b310f..a06e7aa4 100644
--- a/python/tests/import_grobid_metadata.py
+++ b/python/tests/import_grobid_metadata.py
@@ -1,4 +1,3 @@
-
import base64
import json
import os
@@ -13,17 +12,21 @@ WARNING: these tests are currently very fragile because they have database
side-effects. Should probably be disabled or re-written.
"""
+
@pytest.fixture(scope="function")
def grobid_metadata_importer(api):
yield GrobidMetadataImporter(api)
def test_grobid_metadata_parse(grobid_metadata_importer):
- with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
- raw = json.loads(f.readline().split('\t')[4])
+ with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f:
+ raw = json.loads(f.readline().split("\t")[4])
re = grobid_metadata_importer.parse_grobid_json(raw)
assert re
- assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA"
+ assert (
+ re.title
+ == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA"
+ )
assert len(re.contribs) == 5
print(re.contribs)
assert re.contribs[0].raw_name == "Wahyu Ary"
@@ -31,19 +34,21 @@ def test_grobid_metadata_parse(grobid_metadata_importer):
assert re.contribs[0].surname == "Ary"
assert re.publisher is None
if re.extra:
- assert re.extra.get('container_name') is None
+ assert re.extra.get("container_name") is None
assert len(re.refs) == 27
+
def test_file_metadata_parse(grobid_metadata_importer):
- with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f:
f.readline()
- raw = f.readline().split('\t')
+ raw = f.readline().split("\t")
# randomize sha1 so tests are repeatable
- random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper())
+ random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode("utf-8").upper())
fe = grobid_metadata_importer.parse_file_metadata(
- random_sha1, json.loads(raw[1]), raw[2], int(raw[3]))
+ random_sha1, json.loads(raw[1]), raw[2], int(raw[3])
+ )
assert fe
- #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V"
+ # assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V"
assert fe.md5 is None
assert fe.mimetype == "application/pdf"
assert fe.size == 142710
@@ -53,29 +58,30 @@ def test_file_metadata_parse(grobid_metadata_importer):
assert fe.urls[0].rel == "webarchive"
assert len(fe.release_ids) == 0
+
def test_grobid_metadata_importer(grobid_metadata_importer):
last_index = grobid_metadata_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f:
grobid_metadata_importer.bezerk_mode = True
counts = LinePusher(grobid_metadata_importer, f).run()
- assert counts['insert'] == 10
- assert counts['inserted.release'] == 10
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 10
+ assert counts["inserted.release"] == 10
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = grobid_metadata_importer.api.get_changelog_entry(index=last_index+1)
+ change = grobid_metadata_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "grobid" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.GrobidMetadataImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.GrobidMetadataImporter" in eg.extra["agent"]
- with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f:
grobid_metadata_importer.reset()
grobid_metadata_importer.bezerk_mode = False
counts = LinePusher(grobid_metadata_importer, f).run()
- assert counts['insert'] == 0
- assert counts['inserted.release'] == 0
- assert counts['exists'] == 10
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["inserted.release"] == 0
+ assert counts["exists"] == 10
+ assert counts["skip"] == 0
diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py
index 04a8e7f6..d9e7d294 100644
--- a/python/tests/import_ingest.py
+++ b/python/tests/import_ingest.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -15,94 +14,100 @@ from fatcat_tools.importers import (
def ingest_importer(api):
yield IngestFileResultImporter(api)
+
@pytest.fixture(scope="function")
def ingest_web_importer(api):
yield IngestWebResultImporter(api)
+
# TODO: use API to check that entities actually created...
def test_ingest_importer_basic(ingest_importer):
- with open('tests/files/example_ingest.json', 'r') as f:
+ with open("tests/files/example_ingest.json", "r") as f:
JsonLinePusher(ingest_importer, f).run()
+
def test_ingest_importer(ingest_importer):
last_index = ingest_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_ingest.json', 'r') as f:
+ with open("tests/files/example_ingest.json", "r") as f:
ingest_importer.bezerk_mode = True
counts = JsonLinePusher(ingest_importer, f).run()
- assert counts['insert'] == 1
- assert counts['exists'] == 0
- assert counts['skip'] == 1
+ assert counts["insert"] == 1
+ assert counts["exists"] == 0
+ assert counts["skip"] == 1
# fetch most recent editgroup
- change = ingest_importer.api.get_changelog_entry(index=last_index+1)
+ change = ingest_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "crawled from web" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.IngestFileResultImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.IngestFileResultImporter" in eg.extra["agent"]
# re-insert; should skip
- with open('tests/files/example_ingest.json', 'r') as f:
+ with open("tests/files/example_ingest.json", "r") as f:
ingest_importer.reset()
ingest_importer.bezerk_mode = False
counts = JsonLinePusher(ingest_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 1
- assert counts['skip'] == 1
+ assert counts["insert"] == 0
+ assert counts["exists"] == 1
+ assert counts["skip"] == 1
+
def test_ingest_importer_xml(ingest_importer):
last_index = ingest_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_ingest_xml.json', 'r') as f:
+ with open("tests/files/example_ingest_xml.json", "r") as f:
ingest_importer.bezerk_mode = True
counts = JsonLinePusher(ingest_importer, f).run()
print(counts)
- assert counts['insert'] == 1
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 1
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = ingest_importer.api.get_changelog_entry(index=last_index+1)
+ change = ingest_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "crawled from web" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.IngestFileResultImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.IngestFileResultImporter" in eg.extra["agent"]
# re-import should skip
- with open('tests/files/example_ingest_xml.json', 'r') as f:
+ with open("tests/files/example_ingest_xml.json", "r") as f:
ingest_importer.reset()
ingest_importer.bezerk_mode = False
counts = JsonLinePusher(ingest_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 1
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 1
+ assert counts["skip"] == 0
+
def test_ingest_importer_web(ingest_web_importer):
last_index = ingest_web_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_ingest_html.json', 'r') as f:
+ with open("tests/files/example_ingest_html.json", "r") as f:
ingest_web_importer.bezerk_mode = True
counts = JsonLinePusher(ingest_web_importer, f).run()
print(counts)
- assert counts['insert'] == 1
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 1
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = ingest_web_importer.api.get_changelog_entry(index=last_index+1)
+ change = ingest_web_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "crawled from web" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.IngestWebResultImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.IngestWebResultImporter" in eg.extra["agent"]
# re-import should skip
- with open('tests/files/example_ingest_html.json', 'r') as f:
+ with open("tests/files/example_ingest_html.json", "r") as f:
ingest_web_importer.reset()
ingest_web_importer.bezerk_mode = False
counts = JsonLinePusher(ingest_web_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 1
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 1
+ assert counts["skip"] == 0
+
def test_ingest_importer_stage(ingest_importer, api):
"""
@@ -117,29 +122,30 @@ def test_ingest_importer_stage(ingest_importer, api):
dict(request_stage="published", release_stage="draft", status="skip-release-stage"),
]
ingest_importer.bezerk_mode = True
- with open('tests/files/example_ingest.json', 'r') as f:
+ with open("tests/files/example_ingest.json", "r") as f:
raw = json.loads(f.readline())
for row in test_table:
- #print(row)
+ # print(row)
# set dummy record stage
eg = quick_eg(api)
r1 = api.lookup_release(doi="10.123/abc")
- r1.release_stage = row['release_stage']
+ r1.release_stage = row["release_stage"]
api.update_release(eg.editgroup_id, r1.ident, r1)
api.accept_editgroup(eg.editgroup_id)
# set ingest request stage
- raw['request']['release_stage'] = row['request_stage']
+ raw["request"]["release_stage"] = row["request_stage"]
ingest_importer.reset()
ingest_importer.push_record(raw)
counts = ingest_importer.finish()
print(counts)
assert counts["total"] == 1
- assert counts[row['status']] == 1
+ assert counts[row["status"]] == 1
+
def test_ingest_dict_parse(ingest_importer):
- with open('tests/files/example_ingest.json', 'r') as f:
+ with open("tests/files/example_ingest.json", "r") as f:
raw = json.loads(f.readline())
f = ingest_importer.parse_record(raw)
assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313"
@@ -154,14 +160,15 @@ def test_ingest_dict_parse(ingest_importer):
assert u.url.startswith("https://web.archive.org/")
assert len(f.release_ids) == 1
+
def test_ingest_dict_parse_old(ingest_importer):
- with open('tests/files/example_ingest.old.json', 'r') as f:
+ with open("tests/files/example_ingest.old.json", "r") as f:
raw = json.loads(f.readline())
# ancient ingest requests had no type; skip them
f = ingest_importer.parse_record(raw)
assert f is None
- raw['request']['ingest_type'] = 'pdf'
+ raw["request"]["ingest_type"] = "pdf"
f = ingest_importer.parse_record(raw)
assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313"
diff --git a/python/tests/import_jalc.py b/python/tests/import_jalc.py
index c11e2307..4ebc87b4 100644
--- a/python/tests/import_jalc.py
+++ b/python/tests/import_jalc.py
@@ -1,4 +1,3 @@
-
import pytest
from bs4 import BeautifulSoup
from fixtures import *
@@ -8,70 +7,78 @@ from fatcat_tools.importers import Bs4XmlFilePusher, Bs4XmlLinesPusher, JalcImpo
@pytest.fixture(scope="function")
def jalc_importer(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield JalcImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True)
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield JalcImporter(
+ api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True
+ )
+
@pytest.fixture(scope="function")
def jalc_importer_existing(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield JalcImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False)
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield JalcImporter(
+ api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False
+ )
+
def test_jalc_importer(jalc_importer):
last_index = jalc_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/jalc_lod_sample.xml', 'r') as f:
+ with open("tests/files/jalc_lod_sample.xml", "r") as f:
jalc_importer.bezerk_mode = True
counts = Bs4XmlFilePusher(jalc_importer, f, "Description").run()
- assert counts['insert'] == 2
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 2
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = jalc_importer.api.get_changelog_entry(index=last_index+1)
+ change = jalc_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "jalc" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.JalcImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.JalcImporter" in eg.extra["agent"]
last_index = jalc_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/jalc_lod_sample.xml', 'r') as f:
+ with open("tests/files/jalc_lod_sample.xml", "r") as f:
jalc_importer.bezerk_mode = False
jalc_importer.reset()
counts = Bs4XmlFilePusher(jalc_importer, f, "Description").run()
- assert counts['insert'] == 0
- assert counts['exists'] == 2
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 2
+ assert counts["skip"] == 0
assert last_index == jalc_importer.api.get_changelog(limit=1)[0].index
+
def test_jalc_importer_lines(jalc_importer):
last_index = jalc_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/jalc_rdf_sample_100.xml', 'r') as f:
+ with open("tests/files/jalc_rdf_sample_100.xml", "r") as f:
jalc_importer.bezerk_mode = True
counts = Bs4XmlLinesPusher(jalc_importer, f, "<rdf:Description").run()
- assert counts['insert'] == 93
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 93
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = jalc_importer.api.get_changelog_entry(index=last_index+1)
+ change = jalc_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "jalc" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.JalcImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.JalcImporter" in eg.extra["agent"]
last_index = jalc_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/jalc_rdf_sample_100.xml', 'r') as f:
+ with open("tests/files/jalc_rdf_sample_100.xml", "r") as f:
jalc_importer.bezerk_mode = False
jalc_importer.reset()
counts = Bs4XmlLinesPusher(jalc_importer, f, "<rdf:Description").run()
- assert counts['insert'] == 0
- assert counts['exists'] == 93
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 93
+ assert counts["skip"] == 0
assert last_index == jalc_importer.api.get_changelog(limit=1)[0].index
+
def test_jalc_xml_parse(jalc_importer):
- with open('tests/files/jalc_lod_sample.xml', 'r') as f:
+ with open("tests/files/jalc_lod_sample.xml", "r") as f:
soup = BeautifulSoup(f, "xml")
r = jalc_importer.parse_record(soup.find_all("Description")[0])
@@ -90,21 +97,21 @@ def test_jalc_xml_parse(jalc_importer):
assert r.pages == "898-902"
assert r.release_year == 1989
# matched by ISSN, so shouldn't be in there?
- #assert extra['container_name'] == "International Journal of Quantum Chemistry"
+ # assert extra['container_name'] == "International Journal of Quantum Chemistry"
assert len(r.contribs) == 4
assert r.contribs[0].raw_name == "Yasuhiko Hashimoto"
assert r.contribs[0].given_name == "Yasuhiko"
assert r.contribs[0].surname == "Hashimoto"
- assert r.contribs[0].extra['original_name']['raw_name'] == "橋本 雍彦"
- assert r.contribs[0].extra['original_name']['given_name'] == "雍彦"
- assert r.contribs[0].extra['original_name']['surname'] == "橋本"
+ assert r.contribs[0].extra["original_name"]["raw_name"] == "橋本 雍彦"
+ assert r.contribs[0].extra["original_name"]["given_name"] == "雍彦"
+ assert r.contribs[0].extra["original_name"]["surname"] == "橋本"
assert r.contribs[3].raw_name == "Teruo Takahashi"
assert r.contribs[3].given_name == "Teruo"
assert r.contribs[3].surname == "Takahashi"
- assert r.contribs[3].extra['original_name']['raw_name'] == "高橋 輝男"
- assert r.contribs[3].extra['original_name']['given_name'] == "輝男"
- assert r.contribs[3].extra['original_name']['surname'] == "高橋"
+ assert r.contribs[3].extra["original_name"]["raw_name"] == "高橋 輝男"
+ assert r.contribs[3].extra["original_name"]["given_name"] == "輝男"
+ assert r.contribs[3].extra["original_name"]["surname"] == "高橋"
assert not r.refs
diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py
index cdf4e8ed..ff9dcb7a 100644
--- a/python/tests/import_journal_metadata.py
+++ b/python/tests/import_journal_metadata.py
@@ -1,4 +1,3 @@
-
import pytest
from fixtures import *
@@ -9,32 +8,34 @@ from fatcat_tools.importers import JournalMetadataImporter, JsonLinePusher
def journal_metadata_importer(api):
yield JournalMetadataImporter(api)
+
# TODO: use API to check that entities actually created...
def test_journal_metadata_importer_batch(journal_metadata_importer):
- with open('tests/files/journal_metadata.sample.json', 'r') as f:
+ with open("tests/files/journal_metadata.sample.json", "r") as f:
JsonLinePusher(journal_metadata_importer, f).run()
+
def test_journal_metadata_importer(journal_metadata_importer):
last_index = journal_metadata_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/journal_metadata.sample.json', 'r') as f:
+ with open("tests/files/journal_metadata.sample.json", "r") as f:
journal_metadata_importer.bezerk_mode = True
counts = JsonLinePusher(journal_metadata_importer, f).run()
- assert counts['insert'] == 20
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 20
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = journal_metadata_importer.api.get_changelog_entry(index=last_index+1)
+ change = journal_metadata_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "container" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.JournalMetadataImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.JournalMetadataImporter" in eg.extra["agent"]
- with open('tests/files/journal_metadata.sample.json', 'r') as f:
+ with open("tests/files/journal_metadata.sample.json", "r") as f:
journal_metadata_importer.reset()
journal_metadata_importer.bezerk_mode = False
counts = JsonLinePusher(journal_metadata_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 20
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 20
+ assert counts["skip"] == 0
diff --git a/python/tests/import_jstor.py b/python/tests/import_jstor.py
index 270da3ec..8ad550b3 100644
--- a/python/tests/import_jstor.py
+++ b/python/tests/import_jstor.py
@@ -1,4 +1,3 @@
-
import pytest
from bs4 import BeautifulSoup
from fixtures import *
@@ -8,48 +7,58 @@ from fatcat_tools.importers import Bs4XmlFilePusher, JstorImporter
@pytest.fixture(scope="function")
def jstor_importer(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield JstorImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True)
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield JstorImporter(
+ api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=True
+ )
+
@pytest.fixture(scope="function")
def jstor_importer_existing(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield JstorImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False)
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield JstorImporter(
+ api, issn_file, extid_map_file="tests/files/example_map.sqlite3", bezerk_mode=False
+ )
+
def test_jstor_importer(jstor_importer):
last_index = jstor_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/jstor-article-10.2307_111039.xml', 'r') as f:
+ with open("tests/files/jstor-article-10.2307_111039.xml", "r") as f:
jstor_importer.bezerk_mode = True
counts = Bs4XmlFilePusher(jstor_importer, f, "article").run()
- assert counts['insert'] == 1
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 1
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = jstor_importer.api.get_changelog_entry(index=last_index+1)
+ change = jstor_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "jstor" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.JstorImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.JstorImporter" in eg.extra["agent"]
last_index = jstor_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/jstor-article-10.2307_111039.xml', 'r') as f:
+ with open("tests/files/jstor-article-10.2307_111039.xml", "r") as f:
jstor_importer.bezerk_mode = False
jstor_importer.reset()
counts = Bs4XmlFilePusher(jstor_importer, f, "article").run()
- assert counts['insert'] == 0
- assert counts['exists'] == 1
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 1
+ assert counts["skip"] == 0
assert last_index == jstor_importer.api.get_changelog(limit=1)[0].index
+
def test_jstor_xml_parse(jstor_importer):
- with open('tests/files/jstor-article-10.2307_111039.xml', 'r') as f:
+ with open("tests/files/jstor-article-10.2307_111039.xml", "r") as f:
soup = BeautifulSoup(f, "xml")
r = jstor_importer.parse_record(soup.find_all("article")[0])
print(r.extra)
- assert r.title == "On the Universal Law of Attraction, Including that of Gravitation, as a Particular Case of Approximation Deducible from the Principle that Equal and Similar Particles of Matter Move Similarly, Relatively to Each other. [Abstract]"
+ assert (
+ r.title
+ == "On the Universal Law of Attraction, Including that of Gravitation, as a Particular Case of Approximation Deducible from the Principle that Equal and Similar Particles of Matter Move Similarly, Relatively to Each other. [Abstract]"
+ )
assert r.subtitle is None
assert r.original_title is None
assert r.publisher == "The Royal Society"
@@ -66,9 +75,9 @@ def test_jstor_xml_parse(jstor_importer):
assert r.release_date is None
assert r.release_year == 1843
# matched by ISSN, so shouldn't be in there?
- #assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"
+ # assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"
assert len(r.contribs) == 1
- assert r.extra['jstor']['journal_ids'] == ['abstpapecommroya', 'j100687']
+ assert r.extra["jstor"]["journal_ids"] == ["abstpapecommroya", "j100687"]
assert r.contribs[0].raw_name == "John Kinnersley Smythies"
assert r.contribs[0].given_name == "John Kinnersley"
diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py
index aad8e799..4ade6e04 100644
--- a/python/tests/import_matched.py
+++ b/python/tests/import_matched.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -11,39 +10,42 @@ from fatcat_tools.importers import JsonLinePusher, MatchedImporter
def matched_importer(api):
yield MatchedImporter(api)
+
# TODO: use API to check that entities actually created...
def test_matched_importer_basic(matched_importer):
- with open('tests/files/example_matched.json', 'r') as f:
+ with open("tests/files/example_matched.json", "r") as f:
JsonLinePusher(matched_importer, f).run()
+
def test_matched_importer(matched_importer):
last_index = matched_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_matched.json', 'r') as f:
+ with open("tests/files/example_matched.json", "r") as f:
matched_importer.bezerk_mode = True
counts = JsonLinePusher(matched_importer, f).run()
- assert counts['insert'] == 2
- assert counts['exists'] == 0
- assert counts['skip'] == 11
+ assert counts["insert"] == 2
+ assert counts["exists"] == 0
+ assert counts["skip"] == 11
# fetch most recent editgroup
- change = matched_importer.api.get_changelog_entry(index=last_index+1)
+ change = matched_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "file-to-release" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.MatchedImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.MatchedImporter" in eg.extra["agent"]
# re-insert; should skip
- with open('tests/files/example_matched.json', 'r') as f:
+ with open("tests/files/example_matched.json", "r") as f:
matched_importer.reset()
matched_importer.bezerk_mode = False
counts = JsonLinePusher(matched_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 2
- assert counts['skip'] == 11
+ assert counts["insert"] == 0
+ assert counts["exists"] == 2
+ assert counts["skip"] == 11
+
def test_matched_dict_parse(matched_importer):
- with open('tests/files/example_matched.json', 'r') as f:
+ with open("tests/files/example_matched.json", "r") as f:
raw = json.loads(f.readline())
f = matched_importer.parse_record(raw)
assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313"
diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py
index 88a4b871..9566719b 100644
--- a/python/tests/import_orcid.py
+++ b/python/tests/import_orcid.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -11,44 +10,48 @@ from fatcat_tools.importers import JsonLinePusher, OrcidImporter
def orcid_importer(api):
yield OrcidImporter(api)
+
def test_orcid_importer_badid(orcid_importer):
- with open('tests/files/0000-0001-8254-710X.json', 'r') as f:
+ with open("tests/files/0000-0001-8254-710X.json", "r") as f:
JsonLinePusher(orcid_importer, f).run()
+
# TODO: use API to check that entities actually created...
def test_orcid_importer(orcid_importer):
last_index = orcid_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
+ with open("tests/files/0000-0001-8254-7103.json", "r") as f:
orcid_importer.bezerk_mode = True
counts = JsonLinePusher(orcid_importer, f).run()
- assert counts['insert'] == 1
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 1
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = orcid_importer.api.get_changelog_entry(index=last_index+1)
+ change = orcid_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "orcid" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.OrcidImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.OrcidImporter" in eg.extra["agent"]
- with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
+ with open("tests/files/0000-0001-8254-7103.json", "r") as f:
orcid_importer.reset()
orcid_importer.bezerk_mode = False
counts = JsonLinePusher(orcid_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 1
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 1
+ assert counts["skip"] == 0
+
def test_orcid_importer_x(orcid_importer):
- with open('tests/files/0000-0003-3953-765X.json', 'r') as f:
+ with open("tests/files/0000-0003-3953-765X.json", "r") as f:
JsonLinePusher(orcid_importer, f).run()
c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X")
assert c is not None
+
def test_orcid_dict_parse(orcid_importer):
- with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
+ with open("tests/files/0000-0001-8254-7103.json", "r") as f:
raw = json.loads(f.readline())
c = orcid_importer.parse_record(raw)
assert c.given_name == "Man-Hui"
diff --git a/python/tests/import_pubmed.py b/python/tests/import_pubmed.py
index 371c59bb..a5301f29 100644
--- a/python/tests/import_pubmed.py
+++ b/python/tests/import_pubmed.py
@@ -1,4 +1,3 @@
-
import pytest
from bs4 import BeautifulSoup
from fixtures import *
@@ -8,43 +7,58 @@ from fatcat_tools.importers import Bs4XmlLargeFilePusher, PubmedImporter
@pytest.fixture(scope="function")
def pubmed_importer(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield PubmedImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True, lookup_refs=True)
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield PubmedImporter(
+ api,
+ issn_file,
+ extid_map_file="tests/files/example_map.sqlite3",
+ bezerk_mode=True,
+ lookup_refs=True,
+ )
+
@pytest.fixture(scope="function")
def pubmed_importer_existing(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield PubmedImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False, lookup_refs=True)
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
+ yield PubmedImporter(
+ api,
+ issn_file,
+ extid_map_file="tests/files/example_map.sqlite3",
+ bezerk_mode=False,
+ lookup_refs=True,
+ )
+
def test_pubmed_importer(pubmed_importer):
last_index = pubmed_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/pubmedsample_2019.xml', 'r') as f:
+ with open("tests/files/pubmedsample_2019.xml", "r") as f:
pubmed_importer.bezerk_mode = True
counts = Bs4XmlLargeFilePusher(pubmed_importer, f, ["PubmedArticle"]).run()
- assert counts['insert'] == 176
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 176
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = pubmed_importer.api.get_changelog_entry(index=last_index+1)
+ change = pubmed_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "pubmed" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.PubmedImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.PubmedImporter" in eg.extra["agent"]
last_index = pubmed_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/pubmedsample_2019.xml', 'r') as f:
+ with open("tests/files/pubmedsample_2019.xml", "r") as f:
pubmed_importer.bezerk_mode = False
pubmed_importer.reset()
counts = Bs4XmlLargeFilePusher(pubmed_importer, f, ["PubmedArticle"]).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 176
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["exists"] == 176
+ assert counts["skip"] == 0
assert last_index == pubmed_importer.api.get_changelog(limit=1)[0].index
+
def test_pubmed_xml_parse(pubmed_importer):
- with open('tests/files/pubmedsample_2019.xml', 'r') as f:
+ with open("tests/files/pubmedsample_2019.xml", "r") as f:
soup = BeautifulSoup(f, "xml")
r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0])
r2 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[-1])
@@ -62,10 +76,10 @@ def test_pubmed_xml_parse(pubmed_importer):
assert r1.volume == "3"
assert r1.issue == "1"
assert r1.pages == "69-81"
- assert r1.release_date is None # not "1976-12-03", which is medline ingest date
+ assert r1.release_date is None # not "1976-12-03", which is medline ingest date
assert r1.release_year == 1976
# matched by ISSN, so shouldn't be in there?
- #assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"
+ # assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"
assert len(r1.contribs) == 1
assert r1.contribs[0].raw_name == "F R Blume"
@@ -73,10 +87,13 @@ def test_pubmed_xml_parse(pubmed_importer):
assert r1.contribs[0].surname == "Blume"
print(r1.extra)
- assert r1.extra['pubmed']['pub_types'] == ['Journal Article']
+ assert r1.extra["pubmed"]["pub_types"] == ["Journal Article"]
assert not r1.refs
- assert r2.title == "Synthesis and Antibacterial Activity of Metal(loid) Nanostructures by Environmental Multi-Metal(loid) Resistant Bacteria and Metal(loid)-Reducing Flavoproteins"
+ assert (
+ r2.title
+ == "Synthesis and Antibacterial Activity of Metal(loid) Nanostructures by Environmental Multi-Metal(loid) Resistant Bacteria and Metal(loid)-Reducing Flavoproteins"
+ )
assert r2.subtitle is None
assert r2.original_title is None
assert r2.publisher is None
@@ -93,46 +110,56 @@ def test_pubmed_xml_parse(pubmed_importer):
assert str(r2.release_date) == "2018-05-15"
assert r2.release_year == 2018
# matched by ISSN, so shouldn't be in there?
- #assert extra['container_name'] == "Frontiers in microbiology"
+ # assert extra['container_name'] == "Frontiers in microbiology"
assert len(r2.contribs) > 3
assert r2.contribs[0].raw_name == "Maximiliano Figueroa"
assert r2.contribs[0].given_name == "Maximiliano"
assert r2.contribs[0].surname == "Figueroa"
- assert r2.contribs[0].raw_affiliation == "Laboratorio Microbiología Molecular, Departamento de Biología, Facultad de Química y Biología, Universidad de Santiago de Chile, Santiago, Chile."
+ assert (
+ r2.contribs[0].raw_affiliation
+ == "Laboratorio Microbiología Molecular, Departamento de Biología, Facultad de Química y Biología, Universidad de Santiago de Chile, Santiago, Chile."
+ )
assert r2.contribs[4].surname == "Muñoz-Villagrán"
assert r2.contribs[7].surname == "Latorre"
- assert r2.contribs[7].raw_affiliation == "Mathomics, Centro de Modelamiento Matemático, Universidad de Chile, Beauchef, Santiago, Chile."
- assert r2.contribs[7].extra['more_affiliations'] == [
+ assert (
+ r2.contribs[7].raw_affiliation
+ == "Mathomics, Centro de Modelamiento Matemático, Universidad de Chile, Beauchef, Santiago, Chile."
+ )
+ assert r2.contribs[7].extra["more_affiliations"] == [
"Fondap-Center of Genome Regulation, Facultad de Ciencias, Universidad de Chile, Santiago, Chile.",
"Laboratorio de Bioinformática y Expresión Génica, INTA, Universidad de Chile, Santiago, Chile.",
"Instituto de Ciencias de la Ingeniería, Universidad de O'Higgins, Rancagua, Chile.",
]
assert r2.contribs[-1].raw_name == "Felipe Arenas"
- assert r2.abstracts[0].content.startswith("Microbes are suitable candidates to recover and decontaminate different environments from soluble metal ions, either via reduction")
+ assert r2.abstracts[0].content.startswith(
+ "Microbes are suitable candidates to recover and decontaminate different environments from soluble metal ions, either via reduction"
+ )
assert r2.abstracts[0].lang == "en"
print(r2.extra)
- assert r2.extra['pubmed']['pub_types'] == ['Journal Article']
+ assert r2.extra["pubmed"]["pub_types"] == ["Journal Article"]
- assert r2.refs[0].extra['unstructured'] == "Microbiology. 2009 Jun;155(Pt 6):1840-6"
- assert r2.refs[0].extra['pmid'] == "19383690"
+ assert r2.refs[0].extra["unstructured"] == "Microbiology. 2009 Jun;155(Pt 6):1840-6"
+ assert r2.refs[0].extra["pmid"] == "19383690"
assert len(r2.refs) > 1
+
def test_pubmed_xml_dates(pubmed_importer):
- with open('tests/files/pubmed_31393839.xml', 'r') as f:
+ with open("tests/files/pubmed_31393839.xml", "r") as f:
soup = BeautifulSoup(f, "xml")
r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0])
assert r1.release_year == 2019
+
def test_pubmed_xml_parse_refs(pubmed_importer):
"""
Tests the case of multiple nested ReferenceList/Reference objects, instead
of a single ReferenceList with multiple Reference
"""
- with open('tests/files/pubmed_19129924.xml', 'r') as f:
+ with open("tests/files/pubmed_19129924.xml", "r") as f:
soup = BeautifulSoup(f, "xml")
r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0])
diff --git a/python/tests/import_shadow.py b/python/tests/import_shadow.py
index 1caaf780..66661084 100644
--- a/python/tests/import_shadow.py
+++ b/python/tests/import_shadow.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -11,39 +10,42 @@ from fatcat_tools.importers import JsonLinePusher, ShadowLibraryImporter
def shadow_importer(api):
yield ShadowLibraryImporter(api)
+
# TODO: use API to check that entities actually created...
def test_shadow_importer_basic(shadow_importer):
- with open('tests/files/example_shadow.json', 'r') as f:
+ with open("tests/files/example_shadow.json", "r") as f:
JsonLinePusher(shadow_importer, f).run()
+
def test_shadow_importer(shadow_importer):
last_index = shadow_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_shadow.json', 'r') as f:
+ with open("tests/files/example_shadow.json", "r") as f:
shadow_importer.bezerk_mode = True
counts = JsonLinePusher(shadow_importer, f).run()
- assert counts['insert'] == 2
- assert counts['exists'] == 0
- assert counts['skip'] == 8
+ assert counts["insert"] == 2
+ assert counts["exists"] == 0
+ assert counts["skip"] == 8
# fetch most recent editgroup
- change = shadow_importer.api.get_changelog_entry(index=last_index+1)
+ change = shadow_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "shadow library" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.ShadowLibraryImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.ShadowLibraryImporter" in eg.extra["agent"]
# re-insert; should skip
- with open('tests/files/example_shadow.json', 'r') as f:
+ with open("tests/files/example_shadow.json", "r") as f:
shadow_importer.reset()
shadow_importer.bezerk_mode = False
counts = JsonLinePusher(shadow_importer, f).run()
- assert counts['insert'] == 0
- assert counts['exists'] == 2
- assert counts['skip'] == 8
+ assert counts["insert"] == 0
+ assert counts["exists"] == 2
+ assert counts["skip"] == 8
+
def test_shadow_dict_parse(shadow_importer):
- with open('tests/files/example_shadow.json', 'r') as f:
+ with open("tests/files/example_shadow.json", "r") as f:
raw = json.loads(f.readline())
f = shadow_importer.parse_record(raw)
@@ -55,7 +57,9 @@ def test_shadow_dict_parse(shadow_importer):
assert len(f.urls) == 2
for u in f.urls:
if u.rel == "publisher":
- assert u.url.startswith("https://link.springer.com/content/pdf/10.1007%2Fs11626-008-9119-8.pdf")
+ assert u.url.startswith(
+ "https://link.springer.com/content/pdf/10.1007%2Fs11626-008-9119-8.pdf"
+ )
if u.rel == "webarchive":
assert u.url.startswith("https://web.archive.org/")
assert "20180729135948" in u.url
diff --git a/python/tests/importer.py b/python/tests/importer.py
index 1f0e942f..76baa83d 100644
--- a/python/tests/importer.py
+++ b/python/tests/importer.py
@@ -1,22 +1,22 @@
-
from fixtures import *
from fatcat_tools.importers import CrossrefImporter, OrcidImporter
def test_issnl_mapping_lookup(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
fi = CrossrefImporter(api, issn_map_file=issn_file)
- assert fi.issn2issnl('0000-0027') == '0002-0027'
- assert fi.issn2issnl('0002-0027') == '0002-0027'
- assert fi.issn2issnl('9999-0027') is None
+ assert fi.issn2issnl("0000-0027") == "0002-0027"
+ assert fi.issn2issnl("0002-0027") == "0002-0027"
+ assert fi.issn2issnl("9999-0027") is None
+
+ assert fi.lookup_issnl("9999-9999") is None
- assert fi.lookup_issnl('9999-9999') is None
def test_identifiers(api):
- with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
+ with open("tests/files/ISSN-to-ISSN-L.snip.txt", "r") as issn_file:
ci = CrossrefImporter(api, issn_map_file=issn_file)
assert ci.is_issnl("1234-5678") is True
diff --git a/python/tests/subentity_state.py b/python/tests/subentity_state.py
index 2843176d..fda4b582 100644
--- a/python/tests/subentity_state.py
+++ b/python/tests/subentity_state.py
@@ -1,4 +1,3 @@
-
import json
from fatcat_openapi_client import *
@@ -19,6 +18,7 @@ Current set of such references:
=> work -> release
"""
+
def test_relation_states(api, app, mocker):
j1 = ContainerEntity(name="test journal")
@@ -31,14 +31,14 @@ def test_relation_states(api, app, mocker):
# WIP container
# these are basic ES stats for the container view pages
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(ES_CONTAINER_STATS_RESP)),
(200, {}, json.dumps(ES_CONTAINER_RANDOM_RESP)),
]
eg = quick_eg(api)
j2 = api.get_container(api.create_container(eg.editgroup_id, j2).ident)
- rv = app.get('/container/{}'.format(j2.ident))
+ rv = app.get("/container/{}".format(j2.ident))
assert rv.status_code == 200
# create inter-related entities
@@ -59,15 +59,15 @@ def test_relation_states(api, app, mocker):
assert r1.contribs[0].creator_id == c1.ident
assert r1.contribs[0].creator.display_name == "test person"
assert r1.contribs[0].creator.state == "active"
- rv = app.get('/release/{}'.format(r1.ident))
+ rv = app.get("/release/{}".format(r1.ident))
assert rv.status_code == 200
# delete creator
eg = quick_eg(api)
api.delete_creator(eg.editgroup_id, c1.ident)
api.accept_editgroup(eg.editgroup_id)
- rv = app.get('/creator/{}'.format(c1.ident))
- assert rv.status_code == 200 # TODO: HTTP status "Gone"?
+ rv = app.get("/creator/{}".format(c1.ident))
+ assert rv.status_code == 200 # TODO: HTTP status "Gone"?
c1_deleted = api.get_creator(c1.ident)
assert c1_deleted.state == "deleted"
@@ -79,7 +79,7 @@ def test_relation_states(api, app, mocker):
assert r1.contribs[0].creator_id == c1.ident
assert r1.contribs[0].creator.display_name is None
assert r1.contribs[0].creator.state == "deleted"
- rv = app.get('/release/{}'.format(r1.ident))
+ rv = app.get("/release/{}".format(r1.ident))
assert rv.status_code == 200
# wip container
@@ -96,14 +96,16 @@ def test_relation_states(api, app, mocker):
assert r1.contribs[0].creator_id == c1.ident
assert r1.contribs[0].creator.display_name is None
assert r1.contribs[0].creator.state == "deleted"
- rv = app.get('/release/{}'.format(r1.ident))
+ rv = app.get("/release/{}".format(r1.ident))
assert rv.status_code == 200
# redirect release
r2 = api.get_release(r2.ident, expand="container,creators,files")
assert r2.files == []
eg = quick_eg(api)
- api.update_release(eg.editgroup_id, r2.ident, ReleaseEntity(redirect=r1.ident, ext_ids=ReleaseExtIds()))
+ api.update_release(
+ eg.editgroup_id, r2.ident, ReleaseEntity(redirect=r1.ident, ext_ids=ReleaseExtIds())
+ )
f2.release_ids = [r2.ident]
f2 = api.get_file(api.create_file(eg.editgroup_id, f2).ident)
api.accept_editgroup(eg.editgroup_id)
@@ -116,9 +118,9 @@ def test_relation_states(api, app, mocker):
assert r2.contribs[0].creator_id == c1.ident
assert r2.contribs[0].creator.display_name is None
assert r2.contribs[0].creator.state == "deleted"
- rv = app.get('/release/{}'.format(r2.ident))
+ rv = app.get("/release/{}".format(r2.ident))
assert rv.status_code == 302
- rv = app.get('/file/{}'.format(f2.ident))
+ rv = app.get("/file/{}".format(f2.ident))
assert rv.status_code == 200
# delete release
@@ -130,9 +132,9 @@ def test_relation_states(api, app, mocker):
assert r2.container is None
assert r2.files is None
assert r2.contribs is None
- rv = app.get('/release/{}'.format(r2.ident))
- assert rv.status_code == 200 # TODO: HTTP Gone?
- rv = app.get('/file/{}'.format(f2.ident))
+ rv = app.get("/release/{}".format(r2.ident))
+ assert rv.status_code == 200 # TODO: HTTP Gone?
+ rv = app.get("/file/{}".format(f2.ident))
print(rv.data)
assert rv.status_code == 200
@@ -140,7 +142,7 @@ def test_relation_states(api, app, mocker):
def test_app_entity_states(api, app, mocker):
# these are basic ES stats for the container view pages
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(ES_CONTAINER_STATS_RESP)),
(200, {}, json.dumps(ES_CONTAINER_RANDOM_RESP)),
@@ -175,30 +177,32 @@ def test_app_entity_states(api, app, mocker):
api.update_container(eg.editgroup_id, j2.ident, ContainerEntity(redirect=j1.ident))
api.update_creator(eg.editgroup_id, c2.ident, CreatorEntity(redirect=c1.ident))
api.update_file(eg.editgroup_id, f2.ident, FileEntity(redirect=f1.ident))
- api.update_release(eg.editgroup_id, r2.ident, ReleaseEntity(redirect=r1.ident, ext_ids=ReleaseExtIds()))
+ api.update_release(
+ eg.editgroup_id, r2.ident, ReleaseEntity(redirect=r1.ident, ext_ids=ReleaseExtIds())
+ )
api.update_work(eg.editgroup_id, r2.work_id, WorkEntity(redirect=r1.work_id))
api.accept_editgroup(eg.editgroup_id)
# all entities
- rv = app.get('/container/{}'.format(j1.ident))
+ rv = app.get("/container/{}".format(j1.ident))
assert rv.status_code == 200
- rv = app.get('/container/{}'.format(j2.ident))
+ rv = app.get("/container/{}".format(j2.ident))
assert rv.status_code == 302
- rv = app.get('/creator/{}'.format(c1.ident))
+ rv = app.get("/creator/{}".format(c1.ident))
assert rv.status_code == 200
- rv = app.get('/creator/{}'.format(c2.ident))
+ rv = app.get("/creator/{}".format(c2.ident))
assert rv.status_code == 302
- rv = app.get('/file/{}'.format(f1.ident))
+ rv = app.get("/file/{}".format(f1.ident))
assert rv.status_code == 200
- rv = app.get('/file/{}'.format(f2.ident))
+ rv = app.get("/file/{}".format(f2.ident))
assert rv.status_code == 302
- rv = app.get('/release/{}'.format(r1.ident))
+ rv = app.get("/release/{}".format(r1.ident))
assert rv.status_code == 200
- rv = app.get('/release/{}'.format(r2.ident))
+ rv = app.get("/release/{}".format(r2.ident))
assert rv.status_code == 302
- rv = app.get('/work/{}'.format(r1.work_id))
+ rv = app.get("/work/{}".format(r1.work_id))
assert rv.status_code == 200
- rv = app.get('/work/{}'.format(r2.work_id))
+ rv = app.get("/work/{}".format(r2.work_id))
assert rv.status_code == 302
# delete targets
@@ -211,23 +215,23 @@ def test_app_entity_states(api, app, mocker):
api.accept_editgroup(eg.editgroup_id)
# all entities
- rv = app.get('/container/{}'.format(j1.ident))
+ rv = app.get("/container/{}".format(j1.ident))
assert rv.status_code == 200
- rv = app.get('/container/{}'.format(j2.ident))
+ rv = app.get("/container/{}".format(j2.ident))
assert rv.status_code == 302
- rv = app.get('/creator/{}'.format(c1.ident))
+ rv = app.get("/creator/{}".format(c1.ident))
assert rv.status_code == 200
- rv = app.get('/creator/{}'.format(c2.ident))
+ rv = app.get("/creator/{}".format(c2.ident))
assert rv.status_code == 302
- rv = app.get('/file/{}'.format(f1.ident))
+ rv = app.get("/file/{}".format(f1.ident))
assert rv.status_code == 200
- rv = app.get('/file/{}'.format(f2.ident))
+ rv = app.get("/file/{}".format(f2.ident))
assert rv.status_code == 302
- rv = app.get('/release/{}'.format(r1.ident))
+ rv = app.get("/release/{}".format(r1.ident))
assert rv.status_code == 200
- rv = app.get('/release/{}'.format(r2.ident))
+ rv = app.get("/release/{}".format(r2.ident))
assert rv.status_code == 302
- rv = app.get('/work/{}'.format(r1.work_id))
+ rv = app.get("/work/{}".format(r1.work_id))
assert rv.status_code == 200
- rv = app.get('/work/{}'.format(r2.work_id))
+ rv = app.get("/work/{}".format(r2.work_id))
assert rv.status_code == 302
diff --git a/python/tests/tools_api.py b/python/tests/tools_api.py
index 6d514210..0de43f9b 100644
--- a/python/tests/tools_api.py
+++ b/python/tests/tools_api.py
@@ -1,4 +1,3 @@
-
import pytest
from fatcat_openapi_client.rest import ApiException
@@ -10,6 +9,7 @@ def test_authenticated_api():
api.get_changelog()
api.auth_check()
+
def test_public_api():
api = public_api("http://localhost:9411/v0")
api.get_changelog()
diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py
index 83266669..2bf584ba 100644
--- a/python/tests/transform_csl.py
+++ b/python/tests/transform_csl.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -10,15 +9,15 @@ from fatcat_tools.transforms import citeproc_csl, entity_from_json, release_to_c
def test_csl_crossref(crossref_importer):
- with open('tests/files/crossref-works.single.json', 'r') as f:
+ with open("tests/files/crossref-works.single.json", "r") as f:
# not a single line
raw = json.loads(f.read())
r = crossref_importer.parse_record(raw)
csl = release_to_csl(r)
- citeproc_csl(csl, 'csl-json')
- citeproc_csl(csl, 'bibtex')
- citeproc_csl(csl, 'harvard1')
- citeproc_csl(csl, 'harvard1', html=True)
+ citeproc_csl(csl, "csl-json")
+ citeproc_csl(csl, "bibtex")
+ citeproc_csl(csl, "harvard1")
+ citeproc_csl(csl, "harvard1", html=True)
# check that with no author surnames, can't run
for c in r.contribs:
@@ -28,26 +27,30 @@ def test_csl_crossref(crossref_importer):
release_to_csl(r)
with pytest.raises(ValueError):
csl = release_to_csl(r)
- citeproc_csl(csl, 'csl-json')
+ citeproc_csl(csl, "csl-json")
+
def test_csl_pubmed(crossref_importer):
- with open('tests/files/example_releases_pubmed19n0972.json', 'r') as f:
+ with open("tests/files/example_releases_pubmed19n0972.json", "r") as f:
# multiple single lines
for line in f:
r = entity_from_json(line, ReleaseEntity)
csl = release_to_csl(r)
- citeproc_csl(csl, 'csl-json')
- citeproc_csl(csl, 'bibtex')
- citeproc_csl(csl, 'harvard1')
- citeproc_csl(csl, 'harvard1', html=True)
+ citeproc_csl(csl, "csl-json")
+ citeproc_csl(csl, "bibtex")
+ citeproc_csl(csl, "harvard1")
+ citeproc_csl(csl, "harvard1", html=True)
+
def test_csl_pubmed_bibtex(crossref_importer):
- with open('tests/files/example_releases_pubmed19n0972.json', 'r') as f:
+ with open("tests/files/example_releases_pubmed19n0972.json", "r") as f:
r = entity_from_json(f.readline(), ReleaseEntity)
csl = release_to_csl(r)
- print(citeproc_csl(csl, 'bibtex'))
+ print(citeproc_csl(csl, "bibtex"))
# TODO: what's with the '`' in volume?
- assert citeproc_csl(csl, 'bibtex').strip() == """
+ assert (
+ citeproc_csl(csl, "bibtex").strip()
+ == """
@article{mędrela-kuder_szymura_2018,
title={Selected anti-health behaviours among women with osteoporosis},
volume={69`},
@@ -60,6 +63,10 @@ def test_csl_pubmed_bibtex(crossref_importer):
year={2018}
}
""".strip()
- assert citeproc_csl(csl, 'harvard1', html=True).strip() == """
+ )
+ assert (
+ citeproc_csl(csl, "harvard1", html=True).strip()
+ == """
Mędrela-Kuder and Szymura (2018) ‘Selected anti-health behaviours among women with osteoporosis’, <i>Roczniki Panstwowego Zakladu Higieny</i>, 69`(4). doi: 10.32394/rpzh.2018.0046.
""".strip()
+ )
diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py
index cee37867..082a4e99 100644
--- a/python/tests/transform_elasticsearch.py
+++ b/python/tests/transform_elasticsearch.py
@@ -24,13 +24,14 @@ from fatcat_tools.transforms import (
def test_basic_elasticsearch_convert(crossref_importer):
- with open('tests/files/crossref-works.single.json', 'r') as f:
+ with open("tests/files/crossref-works.single.json", "r") as f:
# not a single line
raw = json.loads(f.read())
r = crossref_importer.parse_record(raw)
- r.state = 'active'
+ r.state = "active"
release_to_elasticsearch(r)
+
def test_rich_elasticsearch_convert():
r = ReleaseEntity(
title="something",
@@ -42,7 +43,7 @@ def test_rich_elasticsearch_convert():
ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"),
],
)
- r.state = 'active'
+ r.state = "active"
r.container = ContainerEntity(
name="dummy journal",
extra={
@@ -63,117 +64,132 @@ def test_rich_elasticsearch_convert():
"doaj": {"as_of": "2010-02-03"},
},
)
- r.files = [FileEntity(
- mimetype="application/pdf",
- urls=[
- FileUrl(rel="dweb", url="dat://a954329dlk/thingie"),
- FileUrl(rel="webarchive", url="https://web.archive.org/web/20001122030405/http://example.com"),
- FileUrl(rel="web", url="https://archive.org/details/blah/file.pdf"),
- ],
- extra={
- "shadows": {},
- },
- )]
+ r.files = [
+ FileEntity(
+ mimetype="application/pdf",
+ urls=[
+ FileUrl(rel="dweb", url="dat://a954329dlk/thingie"),
+ FileUrl(
+ rel="webarchive",
+ url="https://web.archive.org/web/20001122030405/http://example.com",
+ ),
+ FileUrl(rel="web", url="https://archive.org/details/blah/file.pdf"),
+ ],
+ extra={
+ "shadows": {},
+ },
+ )
+ ]
es = release_to_elasticsearch(r)
- assert es['release_year'] == r.release_year
- assert es['file_count'] == 1
- assert es['fileset_count'] == 0
- assert es['webcapture_count'] == 0
- assert es['ref_count'] == 2
- assert es['ref_linked_count'] == 1
-
- assert es['preservation'] == "bright"
- assert es['is_oa'] is True
- assert es['is_longtail_oa'] is False
- assert es['is_preserved'] is True
- assert es['in_web'] is True
- assert es['in_dweb'] is True
- assert es['in_ia'] is True
- assert es['in_ia_sim'] is False
- assert es['in_kbart'] is True
- assert es['in_jstor'] is True
+ assert es["release_year"] == r.release_year
+ assert es["file_count"] == 1
+ assert es["fileset_count"] == 0
+ assert es["webcapture_count"] == 0
+ assert es["ref_count"] == 2
+ assert es["ref_linked_count"] == 1
+
+ assert es["preservation"] == "bright"
+ assert es["is_oa"] is True
+ assert es["is_longtail_oa"] is False
+ assert es["is_preserved"] is True
+ assert es["in_web"] is True
+ assert es["in_dweb"] is True
+ assert es["in_ia"] is True
+ assert es["in_ia_sim"] is False
+ assert es["in_kbart"] is True
+ assert es["in_jstor"] is True
+
def test_elasticsearch_release_from_json():
- r = entity_from_json(open('./tests/files/release_etodop5banbndg3faecnfm6ozi.json', 'r').read(), ReleaseEntity)
+ r = entity_from_json(
+ open("./tests/files/release_etodop5banbndg3faecnfm6ozi.json", "r").read(), ReleaseEntity
+ )
es = release_to_elasticsearch(r)
- assert es['subtitle'] == "Correpondence"
- assert es['ident'] == "etodop5banbndg3faecnfm6ozi"
- assert es['container_name'] == "BJOG: an International Journal of Obstetrics and Gynaecology"
- assert es['first_page'] == "1404"
- assert es['issue'] == "11"
- assert es['volume'] == "118"
- assert es['number'] is None
-
- assert es['preservation'] == "dark"
- assert es['is_oa'] is False
- assert es['is_longtail_oa'] is False
- assert es['is_preserved'] is True
- assert es['in_web'] is False
- assert es['in_dweb'] is False
- assert es['in_ia'] is False
- assert es['in_ia_sim'] is True
- assert es['in_kbart'] is True
- assert es['in_jstor'] is False
+ assert es["subtitle"] == "Correpondence"
+ assert es["ident"] == "etodop5banbndg3faecnfm6ozi"
+ assert (
+ es["container_name"] == "BJOG: an International Journal of Obstetrics and Gynaecology"
+ )
+ assert es["first_page"] == "1404"
+ assert es["issue"] == "11"
+ assert es["volume"] == "118"
+ assert es["number"] is None
+
+ assert es["preservation"] == "dark"
+ assert es["is_oa"] is False
+ assert es["is_longtail_oa"] is False
+ assert es["is_preserved"] is True
+ assert es["in_web"] is False
+ assert es["in_dweb"] is False
+ assert es["in_ia"] is False
+ assert es["in_ia_sim"] is True
+ assert es["in_kbart"] is True
+ assert es["in_jstor"] is False
# this release has a fileset, and no file
- r = entity_from_json(open('./tests/files/release_3mssw2qnlnblbk7oqyv2dafgey.json', 'r').read(), ReleaseEntity)
+ r = entity_from_json(
+ open("./tests/files/release_3mssw2qnlnblbk7oqyv2dafgey.json", "r").read(), ReleaseEntity
+ )
es = release_to_elasticsearch(r)
- assert es['title'] == "Jakobshavn Glacier Bed Elevation"
- assert es['ident'] == "3mssw2qnlnblbk7oqyv2dafgey"
- assert es['file_count'] == 0
- assert es['fileset_count'] == 1
- assert es['webcapture_count'] == 0
-
- assert es['preservation'] == "dark"
- assert es['is_oa'] is True
- assert es['is_longtail_oa'] is False
- assert es['is_preserved'] is True
- assert es['in_web'] is True
- assert es['in_dweb'] is True
- assert es['in_ia'] is False
- assert es['in_ia_sim'] is False
- assert es['in_kbart'] is False
- assert es['in_jstor'] is False
+ assert es["title"] == "Jakobshavn Glacier Bed Elevation"
+ assert es["ident"] == "3mssw2qnlnblbk7oqyv2dafgey"
+ assert es["file_count"] == 0
+ assert es["fileset_count"] == 1
+ assert es["webcapture_count"] == 0
+
+ assert es["preservation"] == "dark"
+ assert es["is_oa"] is True
+ assert es["is_longtail_oa"] is False
+ assert es["is_preserved"] is True
+ assert es["in_web"] is True
+ assert es["in_dweb"] is True
+ assert es["in_ia"] is False
+ assert es["in_ia_sim"] is False
+ assert es["in_kbart"] is False
+ assert es["in_jstor"] is False
# this release has a web capture, and no file (edited the JSON to remove file)
- r = entity_from_json(open('./tests/files/release_mjtqtuyhwfdr7j2c3l36uor7uy.json', 'r').read(), ReleaseEntity)
+ r = entity_from_json(
+ open("./tests/files/release_mjtqtuyhwfdr7j2c3l36uor7uy.json", "r").read(), ReleaseEntity
+ )
es = release_to_elasticsearch(r)
- assert es['title'] == "Rethinking Personal Digital Archiving, Part 1"
- assert es['ident'] == "mjtqtuyhwfdr7j2c3l36uor7uy"
- assert es['file_count'] == 0
- assert es['fileset_count'] == 0
- assert es['webcapture_count'] == 1
-
- assert es['preservation'] == "bright"
- assert es['is_oa'] is True
- assert es['is_longtail_oa'] is False
- assert es['is_preserved'] is True
- assert es['in_web'] is True
- assert es['in_dweb'] is False
- assert es['in_ia'] is True
- assert es['in_ia_sim'] is False
- assert es['in_kbart'] is False
- assert es['in_jstor'] is False
+ assert es["title"] == "Rethinking Personal Digital Archiving, Part 1"
+ assert es["ident"] == "mjtqtuyhwfdr7j2c3l36uor7uy"
+ assert es["file_count"] == 0
+ assert es["fileset_count"] == 0
+ assert es["webcapture_count"] == 1
+
+ assert es["preservation"] == "bright"
+ assert es["is_oa"] is True
+ assert es["is_longtail_oa"] is False
+ assert es["is_preserved"] is True
+ assert es["in_web"] is True
+ assert es["in_dweb"] is False
+ assert es["in_ia"] is True
+ assert es["in_ia_sim"] is False
+ assert es["in_kbart"] is False
+ assert es["in_jstor"] is False
+
def test_elasticsearch_container_transform(journal_metadata_importer):
- with open('tests/files/journal_metadata.sample.json', 'r') as f:
+ with open("tests/files/journal_metadata.sample.json", "r") as f:
raw1 = json.loads(f.readline())
raw2 = json.loads(f.readline())
c1 = journal_metadata_importer.parse_record(raw1)
- c1.state = 'active'
+ c1.state = "active"
c2 = journal_metadata_importer.parse_record(raw2)
- c2.state = 'active'
+ c2.state = "active"
- c1.extra['publisher_type'] = "big5"
- c1.extra['discipline'] = "history"
+ c1.extra["publisher_type"] = "big5"
+ c1.extra["discipline"] = "history"
es = container_to_elasticsearch(c1)
- assert es['publisher'] == c1.publisher
- assert es['discipline'] == c1.extra['discipline']
- assert es['publisher_type'] == c1.extra['publisher_type']
- assert es['keepers'] == []
+ assert es["publisher"] == c1.publisher
+ assert es["discipline"] == c1.extra["discipline"]
+ assert es["publisher_type"] == c1.extra["publisher_type"]
+ assert es["keepers"] == []
stats = {
"ident": "en4qj5ijrbf5djxx7p5zzpjyoq",
@@ -186,71 +202,70 @@ def test_elasticsearch_container_transform(journal_metadata_importer):
"dark": 1635,
"none": 0,
"shadows_only": 0,
- "total": 11136
+ "total": 11136,
},
- "release_type": {
- "_unknown": 9,
- "article-journal": 11124,
- "editorial": 2,
- "letter": 1
- },
- "total": 11136
+ "release_type": {"_unknown": 9, "article-journal": 11124, "editorial": 2, "letter": 1},
+ "total": 11136,
}
es = container_to_elasticsearch(c2, stats=stats)
- assert es['name'] == c2.name
- assert es['publisher'] == c2.publisher
- assert es['keepers'] == list(c2.extra['kbart'].keys()) == ["portico"]
- assert es['any_kbart'] is True
+ assert es["name"] == c2.name
+ assert es["publisher"] == c2.publisher
+ assert es["keepers"] == list(c2.extra["kbart"].keys()) == ["portico"]
+ assert es["any_kbart"] is True
def test_elasticsearch_file_transform():
- with open('./tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json', 'r') as f:
+ with open("./tests/files/file_bcah4zp5tvdhjl5bqci2c2lgfa.json", "r") as f:
json_str = f.read()
fe = entity_from_json(json_str, FileEntity)
- fe.state = 'active'
+ fe.state = "active"
es = file_to_elasticsearch(fe)
# pylint infers type of 'fe' incorrectly for some reason (as str/bytes)
- assert es['sha1'] == fe.sha1 # pylint: disable=no-member
- assert es['sha256'] == fe.sha256 # pylint: disable=no-member
- assert es['md5'] == fe.md5 # pylint: disable=no-member
- assert es['size_bytes'] == fe.size # pylint: disable=no-member
- assert es['mimetype'] == fe.mimetype # pylint: disable=no-member
- assert es['in_ia'] is True
-
- assert 'web' in es['rels']
- assert 'www.zhros.ru' in es['hosts']
- assert 'zhros.ru' in es['domains']
- assert 'archive.org' in (es['hosts'] + es['domains'])
- assert 'web.archive.org' in (es['hosts'] + es['domains'])
+ assert es["sha1"] == fe.sha1 # pylint: disable=no-member
+ assert es["sha256"] == fe.sha256 # pylint: disable=no-member
+ assert es["md5"] == fe.md5 # pylint: disable=no-member
+ assert es["size_bytes"] == fe.size # pylint: disable=no-member
+ assert es["mimetype"] == fe.mimetype # pylint: disable=no-member
+ assert es["in_ia"] is True
+
+ assert "web" in es["rels"]
+ assert "www.zhros.ru" in es["hosts"]
+ assert "zhros.ru" in es["domains"]
+ assert "archive.org" in (es["hosts"] + es["domains"])
+ assert "web.archive.org" in (es["hosts"] + es["domains"])
# old regression
- assert '.archive.org' not in (es['hosts'] + es['domains'])
+ assert ".archive.org" not in (es["hosts"] + es["domains"])
+
def test_elasticsearch_changelog_transform():
- ce = entity_from_json(open('./tests/files/changelog_3469683.json', 'r').read(), ChangelogEntry)
+ ce = entity_from_json(
+ open("./tests/files/changelog_3469683.json", "r").read(), ChangelogEntry
+ )
es = changelog_to_elasticsearch(ce)
- assert es['index'] == 3469683
+ assert es["index"] == 3469683
# len("2020-01-30T05:04:39") => 19
- assert es['timestamp'][:19] == "2020-01-30T05:04:39.738601Z"[:19]
- assert es['editor_id'] == "scmbogxw25evtcesfcab5qaboa"
- assert es['username'] == "crawl-bot"
- assert es['is_bot'] is True
- assert es['is_admin'] is True
- assert es['agent'] == "fatcat_tools.IngestFileResultImporter"
-
- assert es['total'] == 50
- assert es['files'] == 50
- assert es['new_files'] == 50
- assert es['created'] == 50
-
- assert es['releases'] == 0
- assert es['new_releases'] == 0
- assert es['updated'] == 0
- assert es['deleted'] == 0
+ assert es["timestamp"][:19] == "2020-01-30T05:04:39.738601Z"[:19]
+ assert es["editor_id"] == "scmbogxw25evtcesfcab5qaboa"
+ assert es["username"] == "crawl-bot"
+ assert es["is_bot"] is True
+ assert es["is_admin"] is True
+ assert es["agent"] == "fatcat_tools.IngestFileResultImporter"
+
+ assert es["total"] == 50
+ assert es["files"] == 50
+ assert es["new_files"] == 50
+ assert es["created"] == 50
+
+ assert es["releases"] == 0
+ assert es["new_releases"] == 0
+ assert es["updated"] == 0
+ assert es["deleted"] == 0
+
def test_elasticsearch_release_kbart_year():
this_year = datetime.date.today().year
@@ -264,7 +279,7 @@ def test_elasticsearch_release_kbart_year():
ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"),
],
)
- r.state = 'active'
+ r.state = "active"
r.container = ContainerEntity(
name="dummy journal",
extra={
@@ -276,18 +291,18 @@ def test_elasticsearch_release_kbart_year():
},
)
es = release_to_elasticsearch(r)
- assert es['release_year'] == this_year
-
- assert es['preservation'] == "none"
- assert es['is_oa'] is True
- assert es['is_longtail_oa'] is False
- assert es['is_preserved'] is False
- assert es['in_web'] is False
- assert es['in_dweb'] is False
- assert es['in_ia'] is False
- assert es['in_ia_sim'] is False
- assert es['in_kbart'] is False
- assert es['in_jstor'] is False
+ assert es["release_year"] == this_year
+
+ assert es["preservation"] == "none"
+ assert es["is_oa"] is True
+ assert es["is_longtail_oa"] is False
+ assert es["is_preserved"] is False
+ assert es["in_web"] is False
+ assert es["in_dweb"] is False
+ assert es["in_ia"] is False
+ assert es["in_ia_sim"] is False
+ assert es["in_kbart"] is False
+ assert es["in_jstor"] is False
r.container = ContainerEntity(
name="dummy journal",
@@ -300,15 +315,15 @@ def test_elasticsearch_release_kbart_year():
},
)
es = release_to_elasticsearch(r)
- assert es['release_year'] == this_year
-
- assert es['preservation'] == "dark"
- assert es['is_oa'] is True
- assert es['is_longtail_oa'] is False
- assert es['is_preserved'] is True
- assert es['in_web'] is False
- assert es['in_dweb'] is False
- assert es['in_ia'] is False
- assert es['in_ia_sim'] is False
- assert es['in_kbart'] is True
- assert es['in_jstor'] is False
+ assert es["release_year"] == this_year
+
+ assert es["preservation"] == "dark"
+ assert es["is_oa"] is True
+ assert es["is_longtail_oa"] is False
+ assert es["is_preserved"] is True
+ assert es["in_web"] is False
+ assert es["in_dweb"] is False
+ assert es["in_ia"] is False
+ assert es["in_ia_sim"] is False
+ assert es["in_kbart"] is True
+ assert es["in_jstor"] is False
diff --git a/python/tests/transform_ingest.py b/python/tests/transform_ingest.py
index 6a358c8e..4949c109 100644
--- a/python/tests/transform_ingest.py
+++ b/python/tests/transform_ingest.py
@@ -1,4 +1,3 @@
-
import json
from fatcat_openapi_client import *
@@ -9,14 +8,15 @@ from fatcat_tools.transforms import release_ingest_request
def test_basic_ingest_release(crossref_importer):
- with open('tests/files/crossref-works.single.json', 'r') as f:
+ with open("tests/files/crossref-works.single.json", "r") as f:
# not a single line
raw = json.loads(f.read())
r = crossref_importer.parse_record(raw)
- r.state = 'active'
+ r.state = "active"
req = release_ingest_request(r)
assert req is not None
+
def test_rich_ingest_release():
r = ReleaseEntity(
title="something",
@@ -29,7 +29,7 @@ def test_rich_ingest_release():
ReleaseRef(target_release_id="iznnn644szdwva7khyxqzc73bi"),
],
)
- r.state = 'active'
+ r.state = "active"
r.container = ContainerEntity(
name="dummy journal",
extra={
@@ -52,12 +52,12 @@ def test_rich_ingest_release():
)
ir = release_ingest_request(r)
assert ir is not None
- assert ir['base_url'] == 'https://doi.org/10.123/456'
- assert ir['ext_ids']['doi'] == '10.123/456'
- assert ir['ext_ids'].get('pmcid') is None
- assert ir['ingest_type'] == 'pdf'
+ assert ir["base_url"] == "https://doi.org/10.123/456"
+ assert ir["ext_ids"]["doi"] == "10.123/456"
+ assert ir["ext_ids"].get("pmcid") is None
+ assert ir["ingest_type"] == "pdf"
# check ingest type ("d-lib")
r.container_id = "ugbiirfvufgcjkx33r3cmemcuu"
ir = release_ingest_request(r)
- assert ir['ingest_type'] == 'html'
+ assert ir["ingest_type"] == "html"
diff --git a/python/tests/transform_toml.py b/python/tests/transform_toml.py
index 29da1d38..56bf32a1 100644
--- a/python/tests/transform_toml.py
+++ b/python/tests/transform_toml.py
@@ -1,4 +1,3 @@
-
import json
from fatcat_openapi_client import ReleaseEntity
@@ -9,15 +8,15 @@ from fatcat_tools.transforms import entity_from_toml, entity_to_toml
def test_basic_toml(crossref_importer):
- with open('tests/files/crossref-works.single.json', 'r') as f:
+ with open("tests/files/crossref-works.single.json", "r") as f:
# not a single line
raw = json.loads(f.read())
r = crossref_importer.parse_record(raw)
- r.state = 'active'
+ r.state = "active"
toml_str = entity_to_toml(r)
r2 = entity_from_toml(toml_str, ReleaseEntity)
assert r == r2
- toml_str = entity_to_toml(r, pop_fields=['ident', 'revision', 'blah', 'extra'])
+ toml_str = entity_to_toml(r, pop_fields=["ident", "revision", "blah", "extra"])
r3 = entity_from_toml(toml_str, ReleaseEntity)
assert r != r3
diff --git a/python/tests/web_auth.py b/python/tests/web_auth.py
index 643d806e..1238275e 100644
--- a/python/tests/web_auth.py
+++ b/python/tests/web_auth.py
@@ -1,4 +1,3 @@
-
import responses
from fixtures import *
@@ -9,53 +8,74 @@ def test_ia_xauth_fail(full_app):
# failed login
with full_app.test_client() as app:
- rv = app.get('/auth/ia/login')
+ rv = app.get("/auth/ia/login")
assert rv.status_code == 200
- responses.add(responses.POST, full_app.config['IA_XAUTH_URI'] + "?op=authenticate",
- status=401, json=dict(success=False))
- rv = app.post('/auth/ia/login', follow_redirects=True,
- data=dict(email="abcd@example.com", password="god"))
+ responses.add(
+ responses.POST,
+ full_app.config["IA_XAUTH_URI"] + "?op=authenticate",
+ status=401,
+ json=dict(success=False),
+ )
+ rv = app.post(
+ "/auth/ia/login",
+ follow_redirects=True,
+ data=dict(email="abcd@example.com", password="god"),
+ )
assert rv.status_code == 401
- rv = app.get('/auth/account', follow_redirects=False)
+ rv = app.get("/auth/account", follow_redirects=False)
assert rv.status_code == 302
+
@responses.activate
def test_ia_xauth(full_app):
# successful login
with full_app.test_client() as app:
- rv = app.get('/auth/token_login')
+ rv = app.get("/auth/token_login")
assert rv.status_code == 200
- responses.add(responses.POST, full_app.config['IA_XAUTH_URI'] + "?op=authenticate",
- status=200, json={'success': True})
- responses.add(responses.POST, full_app.config['IA_XAUTH_URI'] + "?op=info",
- status=200, json={
- 'success': True,
- 'values': {'screenname': "user123",
- 'itemname': "user_item123"}})
- rv = app.post('/auth/ia/login', follow_redirects=True,
- data=dict(email="abcd@example.com", password="god"))
+ responses.add(
+ responses.POST,
+ full_app.config["IA_XAUTH_URI"] + "?op=authenticate",
+ status=200,
+ json={"success": True},
+ )
+ responses.add(
+ responses.POST,
+ full_app.config["IA_XAUTH_URI"] + "?op=info",
+ status=200,
+ json={
+ "success": True,
+ "values": {"screenname": "user123", "itemname": "user_item123"},
+ },
+ )
+ rv = app.post(
+ "/auth/ia/login",
+ follow_redirects=True,
+ data=dict(email="abcd@example.com", password="god"),
+ )
assert rv.status_code == 200
- rv = app.get('/auth/account', follow_redirects=False)
+ rv = app.get("/auth/account", follow_redirects=False)
assert rv.status_code == 200
+
def test_basic_auth_views(app):
- rv = app.get('/auth/login')
+ rv = app.get("/auth/login")
assert rv.status_code == 200
- rv = app.get('/auth/logout')
+ rv = app.get("/auth/logout")
assert rv.status_code == 200
+
def test_auth_token(app_admin):
- rv = app_admin.get('/auth/account', follow_redirects=False)
+ rv = app_admin.get("/auth/account", follow_redirects=False)
assert rv.status_code == 200
- rv = app_admin.post('/auth/create_token', follow_redirects=False)
+ rv = app_admin.post("/auth/create_token", follow_redirects=False)
assert rv.status_code == 200
diff --git a/python/tests/web_citation_csl.py b/python/tests/web_citation_csl.py
index 508600e7..e3dbec2b 100644
--- a/python/tests/web_citation_csl.py
+++ b/python/tests/web_citation_csl.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -8,33 +7,39 @@ from fixtures import *
def test_release_bibtex(app, api):
# "realistic" demo entity
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaam")
assert rv.status_code == 200
- assert b'BibTeX' in rv.data
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam.bib')
+ assert b"BibTeX" in rv.data
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaam.bib")
assert rv.status_code == 200
- assert b'@article{' in rv.data
- rv = app.get('/release/ccccccccccccccccccccccccca.bib')
+ assert b"@article{" in rv.data
+ rv = app.get("/release/ccccccccccccccccccccccccca.bib")
assert rv.status_code == 404
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=bibtex')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=bibtex")
assert rv.status_code == 200
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=csl-json')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=csl-json")
assert rv.status_code == 200
# could also rv.get_json() here
- json.loads(rv.data.decode('utf-8'))
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=modern-language-association')
+ json.loads(rv.data.decode("utf-8"))
+ rv = app.get(
+ "/release/aaaaaaaaaaaaarceaaaaaaaaam/citeproc?style=modern-language-association"
+ )
assert rv.status_code == 200
- assert rv.data.decode('utf-8').startswith('Ioannidis, J.. Why Most Published Research Findings Are False')
+ assert rv.data.decode("utf-8").startswith(
+ "Ioannidis, J.. Why Most Published Research Findings Are False"
+ )
# "dummy" demo entity; very minimal metadata
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai")
assert rv.status_code == 200
- assert b'BibTeX' in rv.data
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai.bib')
+ assert b"BibTeX" in rv.data
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai.bib")
assert rv.status_code == 200
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=modern-language-association')
+ rv = app.get(
+ "/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=modern-language-association"
+ )
assert rv.status_code == 200
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=csl-json')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/citeproc?style=csl-json")
assert rv.status_code == 200
# create release which can not have citeproc run on it (no authors)
@@ -46,28 +51,26 @@ def test_release_bibtex(app, api):
r1edit = api.create_release(eg.editgroup_id, r1)
api.accept_editgroup(eg.editgroup_id)
- rv = app.get('/release/{}'.format(r1edit.ident))
+ rv = app.get("/release/{}".format(r1edit.ident))
assert rv.status_code == 200
- assert b'BibTeX' not in rv.data
+ assert b"BibTeX" not in rv.data
with pytest.raises(ValueError):
- rv = app.get('/release/{}.bib'.format(r1edit.ident))
+ rv = app.get("/release/{}.bib".format(r1edit.ident))
# create release can have citeproc run on it (no authors)
eg = quick_eg(api)
r2 = ReleaseEntity(
title="some title again",
contribs=[
- ReleaseContrib(
- given_name="Paul",
- surname="Otlet"),
+ ReleaseContrib(given_name="Paul", surname="Otlet"),
],
ext_ids=ReleaseExtIds(),
)
r2edit = api.create_release(eg.editgroup_id, r2)
api.accept_editgroup(eg.editgroup_id)
- rv = app.get('/release/{}'.format(r2edit.ident))
+ rv = app.get("/release/{}".format(r2edit.ident))
assert rv.status_code == 200
- assert b'BibTeX' in rv.data
- rv = app.get('/release/{}.bib'.format(r2edit.ident))
+ assert b"BibTeX" in rv.data
+ rv = app.get("/release/{}.bib".format(r2edit.ident))
assert rv.status_code == 200
diff --git a/python/tests/web_coverage.py b/python/tests/web_coverage.py
index 0dbf3df8..7e84f11e 100644
--- a/python/tests/web_coverage.py
+++ b/python/tests/web_coverage.py
@@ -1,4 +1,3 @@
-
import datetime
import json
@@ -11,56 +10,59 @@ def test_container_coverage(app, mocker):
# preservation by type histogram
elastic_resp1 = {
- 'took': 294,
- 'timed_out': False,
- '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},
- 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []},
- 'aggregations': {
- 'type_preservation': {
- 'buckets': [
- {'key': {'release_type': 'article-journal', 'preservation': 'bright'}, 'doc_count': 444},
- {'key': {'release_type': 'book', 'preservation': 'dark'}, 'doc_count': 111},
- ],
- 'sum_other_doc_count': 0,
+ "took": 294,
+ "timed_out": False,
+ "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0},
+ "hits": {"total": 4327, "max_score": 0.0, "hits": []},
+ "aggregations": {
+ "type_preservation": {
+ "buckets": [
+ {
+ "key": {"release_type": "article-journal", "preservation": "bright"},
+ "doc_count": 444,
+ },
+ {"key": {"release_type": "book", "preservation": "dark"}, "doc_count": 111},
+ ],
+ "sum_other_doc_count": 0,
},
},
}
# preservation by year histogram
elastic_resp2 = {
- 'took': 294,
- 'timed_out': False,
- '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},
- 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []},
- 'aggregations': {
- 'year_preservation': {
- 'buckets': [
- {'key': {'year': 2004.0, 'preservation': 'bright'}, 'doc_count': 444},
- {'key': {'year': 2005.0, 'preservation': 'dark'}, 'doc_count': 111},
- ],
- 'sum_other_doc_count': 0,
+ "took": 294,
+ "timed_out": False,
+ "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0},
+ "hits": {"total": 4327, "max_score": 0.0, "hits": []},
+ "aggregations": {
+ "year_preservation": {
+ "buckets": [
+ {"key": {"year": 2004.0, "preservation": "bright"}, "doc_count": 444},
+ {"key": {"year": 2005.0, "preservation": "dark"}, "doc_count": 111},
+ ],
+ "sum_other_doc_count": 0,
},
},
}
# preservation by volume histogram
elastic_resp3 = {
- 'took': 294,
- 'timed_out': False,
- '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},
- 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []},
- 'aggregations': {
- 'volume_preservation': {
- 'buckets': [
- {'key': {'volume': "12", 'preservation': 'bright'}, 'doc_count': 444},
- {'key': {'volume': "12", 'preservation': 'dark'}, 'doc_count': 111},
- ],
- 'sum_other_doc_count': 0,
+ "took": 294,
+ "timed_out": False,
+ "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0},
+ "hits": {"total": 4327, "max_score": 0.0, "hits": []},
+ "aggregations": {
+ "volume_preservation": {
+ "buckets": [
+ {"key": {"volume": "12", "preservation": "bright"}, "doc_count": 444},
+ {"key": {"volume": "12", "preservation": "dark"}, "doc_count": 111},
+ ],
+ "sum_other_doc_count": 0,
},
},
}
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
# status
(200, {}, json.dumps(ES_CONTAINER_STATS_RESP)),
@@ -68,23 +70,23 @@ def test_container_coverage(app, mocker):
(200, {}, json.dumps(elastic_resp1)),
]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/coverage')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/coverage")
assert rv.status_code == 200
es_raw.side_effect = [(200, {}, json.dumps(elastic_resp2))]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_year.svg')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_year.svg")
assert rv.status_code == 200
es_raw.side_effect = [(200, {}, json.dumps(elastic_resp2))]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_year.json')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_year.json")
assert rv.status_code == 200
es_raw.side_effect = [(200, {}, json.dumps(elastic_resp3))]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_volume.svg')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_volume.svg")
assert rv.status_code == 200
es_raw.side_effect = [(200, {}, json.dumps(elastic_resp3))]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_volume.json')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/preservation_by_volume.json")
assert rv.status_code == 200
@@ -92,34 +94,37 @@ def test_coverage_search(app, mocker):
# preservation by year histogram
elastic_resp1 = {
- 'took': 294,
- 'timed_out': False,
- '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},
- 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []},
- 'aggregations': {
- 'year_preservation': {
- 'buckets': [
- {'key': {'year': 2004.0, 'preservation': 'bright'}, 'doc_count': 444},
- {'key': {'year': 2005.0, 'preservation': 'dark'}, 'doc_count': 111},
- ],
- 'sum_other_doc_count': 0,
+ "took": 294,
+ "timed_out": False,
+ "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0},
+ "hits": {"total": 4327, "max_score": 0.0, "hits": []},
+ "aggregations": {
+ "year_preservation": {
+ "buckets": [
+ {"key": {"year": 2004.0, "preservation": "bright"}, "doc_count": 444},
+ {"key": {"year": 2005.0, "preservation": "dark"}, "doc_count": 111},
+ ],
+ "sum_other_doc_count": 0,
},
},
}
# preservation by type histogram
elastic_resp2 = {
- 'took': 294,
- 'timed_out': False,
- '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},
- 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []},
- 'aggregations': {
- 'type_preservation': {
- 'buckets': [
- {'key': {'release_type': 'article-journal', 'preservation': 'bright'}, 'doc_count': 444},
- {'key': {'release_type': 'book', 'preservation': 'dark'}, 'doc_count': 111},
- ],
- 'sum_other_doc_count': 0,
+ "took": 294,
+ "timed_out": False,
+ "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0},
+ "hits": {"total": 4327, "max_score": 0.0, "hits": []},
+ "aggregations": {
+ "type_preservation": {
+ "buckets": [
+ {
+ "key": {"release_type": "article-journal", "preservation": "bright"},
+ "doc_count": 444,
+ },
+ {"key": {"release_type": "book", "preservation": "dark"}, "doc_count": 111},
+ ],
+ "sum_other_doc_count": 0,
},
},
}
@@ -127,22 +132,28 @@ def test_coverage_search(app, mocker):
# preservation by date histogram
today = str(datetime.date.today())
elastic_resp3 = {
- 'took': 294,
- 'timed_out': False,
- '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},
- 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []},
- 'aggregations': {
- 'date_preservation': {
- 'buckets': [
- {'key': {'date': f'{today}T00:00.000Z', 'preservation': 'bright'}, 'doc_count': 444},
- {'key': {'date': f'{today}T00:00.000Z', 'preservation': 'dark'}, 'doc_count': 111},
- ],
- 'sum_other_doc_count': 0,
+ "took": 294,
+ "timed_out": False,
+ "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0},
+ "hits": {"total": 4327, "max_score": 0.0, "hits": []},
+ "aggregations": {
+ "date_preservation": {
+ "buckets": [
+ {
+ "key": {"date": f"{today}T00:00.000Z", "preservation": "bright"},
+ "doc_count": 444,
+ },
+ {
+ "key": {"date": f"{today}T00:00.000Z", "preservation": "dark"},
+ "doc_count": 111,
+ },
+ ],
+ "sum_other_doc_count": 0,
},
},
}
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
# counts summary
(200, {}, json.dumps(ES_CONTAINER_STATS_RESP)),
@@ -152,7 +163,7 @@ def test_coverage_search(app, mocker):
(200, {}, json.dumps(elastic_resp1)),
]
- rv = app.get('/coverage/search?q=*')
+ rv = app.get("/coverage/search?q=*")
assert rv.status_code == 200
es_raw.side_effect = [
@@ -164,7 +175,7 @@ def test_coverage_search(app, mocker):
(200, {}, json.dumps(elastic_resp3)),
]
- rv = app.get('/coverage/search?recent=1&q=*')
+ rv = app.get("/coverage/search?recent=1&q=*")
assert rv.status_code == 200
@@ -172,61 +183,62 @@ def test_legacy_container_coverage(app, mocker):
# legacy preservation by year
elastic_resp1 = {
- 'took': 294,
- 'timed_out': False,
- '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},
- 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []},
- 'aggregations': {
- 'year_in_ia': {
- 'after_key': {'year': 2020.0, 'in_ia': True},
- 'buckets': [
- {'key': {'year': 2004.0, 'in_ia': False}, 'doc_count': 4},
- {'key': {'year': 2004.0, 'in_ia': True}, 'doc_count': 68},
- {'key': {'year': 2005.0, 'in_ia': False}, 'doc_count': 26},
- {'key': {'year': 2005.0, 'in_ia': True}, 'doc_count': 428},
- {'key': {'year': 2006.0, 'in_ia': False}, 'doc_count': 14},
- {'key': {'year': 2006.0, 'in_ia': True}, 'doc_count': 487},
- {'key': {'year': 2007.0, 'in_ia': False}, 'doc_count': 13},
- {'key': {'year': 2007.0, 'in_ia': True}, 'doc_count': 345},
+ "took": 294,
+ "timed_out": False,
+ "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0},
+ "hits": {"total": 4327, "max_score": 0.0, "hits": []},
+ "aggregations": {
+ "year_in_ia": {
+ "after_key": {"year": 2020.0, "in_ia": True},
+ "buckets": [
+ {"key": {"year": 2004.0, "in_ia": False}, "doc_count": 4},
+ {"key": {"year": 2004.0, "in_ia": True}, "doc_count": 68},
+ {"key": {"year": 2005.0, "in_ia": False}, "doc_count": 26},
+ {"key": {"year": 2005.0, "in_ia": True}, "doc_count": 428},
+ {"key": {"year": 2006.0, "in_ia": False}, "doc_count": 14},
+ {"key": {"year": 2006.0, "in_ia": True}, "doc_count": 487},
+ {"key": {"year": 2007.0, "in_ia": False}, "doc_count": 13},
+ {"key": {"year": 2007.0, "in_ia": True}, "doc_count": 345},
],
},
},
}
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp1)),
]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.json')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.json")
assert rv.status_code == 200
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp1)),
]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.svg')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.svg")
assert rv.status_code == 200
def test_coverage_empty_years(app, mocker):
elastic_resp = {
- 'took': 294,
- 'timed_out': False,
- '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0},
- 'hits': {'total': 4327, 'max_score': 0.0, 'hits': []},
- 'aggregations': {'year_in_ia': {
- 'after_key': {'year': 2020.0, 'in_ia': True},
- 'buckets': [
- ],
- }},
+ "took": 294,
+ "timed_out": False,
+ "_shards": {"total": 5, "successful": 5, "skipped": 0, "failed": 0},
+ "hits": {"total": 4327, "max_score": 0.0, "hits": []},
+ "aggregations": {
+ "year_in_ia": {
+ "after_key": {"year": 2020.0, "in_ia": True},
+ "buckets": [],
+ }
+ },
}
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp)),
]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.svg')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/ia_coverage_years.svg")
assert rv.status_code == 200
diff --git a/python/tests/web_editgroup.py b/python/tests/web_editgroup.py
index 20dc8d93..62a5df2e 100644
--- a/python/tests/web_editgroup.py
+++ b/python/tests/web_editgroup.py
@@ -1,59 +1,60 @@
-
from fixtures import *
def test_editgroup_basics(app):
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaae')
+ rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaae")
assert rv.status_code == 200
- rv = app.get('/editgroup/ccccccccccccccccccccccccca')
+ rv = app.get("/editgroup/ccccccccccccccccccccccccca")
assert rv.status_code == 404
- rv = app.get('/editor/aaaaaaaaaaaabkvkaaaaaaaaae')
+ rv = app.get("/editor/aaaaaaaaaaaabkvkaaaaaaaaae")
assert rv.status_code == 200
- rv = app.get('/editor/ccccccccccccccccccccccccca')
+ rv = app.get("/editor/ccccccccccccccccccccccccca")
assert rv.status_code == 404
- rv = app.get('/editor/aaaaaaaaaaaabkvkaaaaaaaaae/editgroups')
+ rv = app.get("/editor/aaaaaaaaaaaabkvkaaaaaaaaae/editgroups")
assert rv.status_code == 200
- rv = app.get('/editor/ccccccccccccccccccccccccca/editgroups')
+ rv = app.get("/editor/ccccccccccccccccccccccccca/editgroups")
assert rv.status_code == 404
- rv = app.get('/changelog')
+ rv = app.get("/changelog")
assert rv.status_code == 200
- rv = app.get('/changelog/1')
+ rv = app.get("/changelog/1")
assert rv.status_code == 200
- rv = app.get('/changelog/1.3')
+ rv = app.get("/changelog/1.3")
assert rv.status_code == 404
- rv = app.get('/changelog/9999999')
+ rv = app.get("/changelog/9999999")
assert rv.status_code == 404
- rv = app.get('/reviewable')
+ rv = app.get("/reviewable")
assert rv.status_code == 200
+
def test_editgroup_annotations(app):
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaa4')
+ rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaa4")
assert rv.status_code == 200
- assert b'updated with changes, please re-review' in rv.data
- assert b'concerns about this edit...' in rv.data
- assert b'Admin' in rv.data
- assert b'demo-user' in rv.data
- assert b'claire' in rv.data
- assert b'Signup' in rv.data
- assert b'Add Comment' not in rv.data
+ assert b"updated with changes, please re-review" in rv.data
+ assert b"concerns about this edit..." in rv.data
+ assert b"Admin" in rv.data
+ assert b"demo-user" in rv.data
+ assert b"claire" in rv.data
+ assert b"Signup" in rv.data
+ assert b"Add Comment" not in rv.data
- rv = app.get('/editor/aaaaaaaaaaaabkvkaaaaaaaaaq/annotations')
+ rv = app.get("/editor/aaaaaaaaaaaabkvkaaaaaaaaaq/annotations")
assert rv.status_code == 200
- assert b'updated with changes, please re-review' not in rv.data
- assert b'concerns about this edit...' in rv.data
- assert b'Admin' not in rv.data
- assert b'claire' in rv.data
- assert b'aaaaaaaaaaaabo53aaaaaaaaa4' in rv.data
+ assert b"updated with changes, please re-review" not in rv.data
+ assert b"concerns about this edit..." in rv.data
+ assert b"Admin" not in rv.data
+ assert b"claire" in rv.data
+ assert b"aaaaaaaaaaaabo53aaaaaaaaa4" in rv.data
+
def test_editgroup_annotations_login(app_admin):
# if logged in, should see form
- rv = app_admin.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaa4')
+ rv = app_admin.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaa4")
assert rv.status_code == 200
- assert b'Signup' not in rv.data
- assert b'Add Comment' in rv.data
+ assert b"Signup" not in rv.data
+ assert b"Add Comment" in rv.data
diff --git a/python/tests/web_editing.py b/python/tests/web_editing.py
index fb8b3f93..cbde095d 100644
--- a/python/tests/web_editing.py
+++ b/python/tests/web_editing.py
@@ -1,4 +1,3 @@
-
from fixtures import *
@@ -6,87 +5,97 @@ def test_web_release_create_accept(app_admin, api):
eg = quick_eg(api)
- rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id))
+ rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id))
assert rv.status_code == 200
- assert b'Release Edits (0)' in rv.data
+ assert b"Release Edits (0)" in rv.data
# bogus/bad submit
- rv = app_admin.post('/release/create',
+ rv = app_admin.post(
+ "/release/create",
data={
- 'editgroup_id': eg.editgroup_id,
- 'release_type': 'badmojo',
- 'release_stage': 'published',
- 'title': 'something bogus',
+ "editgroup_id": eg.editgroup_id,
+ "release_type": "badmojo",
+ "release_stage": "published",
+ "title": "something bogus",
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 400
# Does not insert bad choices into drop-down
- #assert b'badmojo' in rv.data
- assert b'Not a valid choice' in rv.data
+ # assert b'badmojo' in rv.data
+ assert b"Not a valid choice" in rv.data
# bad wikidata QID
- rv = app_admin.post('/release/create',
+ rv = app_admin.post(
+ "/release/create",
data={
- 'editgroup_id': eg.editgroup_id,
- 'release_type': 'article-journal',
- 'release_stage': 'published',
- 'title': 'something bogus',
- 'wikidata_qid': '884',
+ "editgroup_id": eg.editgroup_id,
+ "release_type": "article-journal",
+ "release_stage": "published",
+ "title": "something bogus",
+ "wikidata_qid": "884",
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 400
# ok/valid submit
- rv = app_admin.post('/release/create',
+ rv = app_admin.post(
+ "/release/create",
data={
- 'editgroup_id': eg.editgroup_id,
- 'release_type': 'article-journal',
- 'release_stage': 'published',
- 'title': 'something bogus',
- 'doi': '10.1234/999999',
+ "editgroup_id": eg.editgroup_id,
+ "release_type": "article-journal",
+ "release_stage": "published",
+ "title": "something bogus",
+ "doi": "10.1234/999999",
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 200
- assert b'10.1234/999999' in rv.data
+ assert b"10.1234/999999" in rv.data
- rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id))
+ rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id))
assert rv.status_code == 200
- assert b'Release Edits (1)' in rv.data
+ assert b"Release Edits (1)" in rv.data
- rv = app_admin.post('/editgroup/{}/submit'.format(eg.editgroup_id),
+ rv = app_admin.post(
+ "/editgroup/{}/submit".format(eg.editgroup_id),
follow_redirects=True,
)
assert rv.status_code == 200
- rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id))
+ rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id))
assert rv.status_code == 200
- assert b'Submitted' in rv.data
- assert b'None!' in rv.data
+ assert b"Submitted" in rv.data
+ assert b"None!" in rv.data
- rv = app_admin.post('/editgroup/{}/unsubmit'.format(eg.editgroup_id),
+ rv = app_admin.post(
+ "/editgroup/{}/unsubmit".format(eg.editgroup_id),
follow_redirects=True,
)
assert rv.status_code == 200
- rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id))
+ rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id))
assert rv.status_code == 200
- rv = app_admin.post('/editgroup/{}/annotation'.format(eg.editgroup_id),
+ rv = app_admin.post(
+ "/editgroup/{}/annotation".format(eg.editgroup_id),
data={
- 'comment_markdown': "This is an **example** of markdown in a test annotation",
+ "comment_markdown": "This is an **example** of markdown in a test annotation",
},
follow_redirects=True,
)
assert rv.status_code == 200
- rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id))
+ rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id))
assert rv.status_code == 200
- assert b'<p>This is an <strong>example</strong> of markdown in a test annotation' in rv.data
+ assert b"<p>This is an <strong>example</strong> of markdown in a test annotation" in rv.data
- rv = app_admin.post('/editgroup/{}/accept'.format(eg.editgroup_id),
+ rv = app_admin.post(
+ "/editgroup/{}/accept".format(eg.editgroup_id),
follow_redirects=True,
)
assert rv.status_code == 200
- rv = app_admin.get('/editgroup/{}'.format(eg.editgroup_id))
+ rv = app_admin.get("/editgroup/{}".format(eg.editgroup_id))
assert rv.status_code == 200
- assert b'Merged' in rv.data
+ assert b"Merged" in rv.data
def test_web_container_create(app_admin, api):
@@ -94,20 +103,24 @@ def test_web_container_create(app_admin, api):
eg = quick_eg(api)
# bogus/bad submit
- rv = app_admin.post('/container/create',
+ rv = app_admin.post(
+ "/container/create",
data={
- 'editgroup_id': eg.editgroup_id,
+ "editgroup_id": eg.editgroup_id,
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 400
# ok/valid submit
- rv = app_admin.post('/container/create',
+ rv = app_admin.post(
+ "/container/create",
data={
- 'editgroup_id': eg.editgroup_id,
- 'name': 'blah blah journal blah',
+ "editgroup_id": eg.editgroup_id,
+ "name": "blah blah journal blah",
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 200
@@ -116,144 +129,167 @@ def test_web_file_create(app_admin, api):
eg = quick_eg(api)
# bogus/bad submit
- rv = app_admin.post('/file/create',
+ rv = app_admin.post(
+ "/file/create",
data={
- 'editgroup_id': eg.editgroup_id,
+ "editgroup_id": eg.editgroup_id,
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 400
# ok/valid submit
- rv = app_admin.post('/file/create',
+ rv = app_admin.post(
+ "/file/create",
data={
- 'editgroup_id': eg.editgroup_id,
- 'size': '12345',
- 'sha1': '45be56a396c4d03faaa41e055170c23534dec736',
+ "editgroup_id": eg.editgroup_id,
+ "size": "12345",
+ "sha1": "45be56a396c4d03faaa41e055170c23534dec736",
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 200
# upper-case SHA-1
- rv = app_admin.post('/file/create',
+ rv = app_admin.post(
+ "/file/create",
data={
- 'editgroup_id': eg.editgroup_id,
- 'size': '12345',
- 'sha1': '45BE56A396C4D03FAAA41E055170C23534DEC736',
+ "editgroup_id": eg.editgroup_id,
+ "size": "12345",
+ "sha1": "45BE56A396C4D03FAAA41E055170C23534DEC736",
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 200
+
def test_web_file_toml_create(app_admin, api):
eg = quick_eg(api)
# bogus/bad submit
- rv = app_admin.post('/file/create/toml',
+ rv = app_admin.post(
+ "/file/create/toml",
data={
- 'editgroup_id': eg.editgroup_id,
+ "editgroup_id": eg.editgroup_id,
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 400
# ok/valid submit
- rv = app_admin.post('/file/create/toml',
+ rv = app_admin.post(
+ "/file/create/toml",
data={
- 'editgroup_id': eg.editgroup_id,
- 'toml': """
+ "editgroup_id": eg.editgroup_id,
+ "toml": """
size = 12345
sha1 = "45be56a396c4d03faaa41e055170c23534dec736"
""",
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 200
# upper-case SHA-1
- rv = app_admin.post('/file/create/toml',
+ rv = app_admin.post(
+ "/file/create/toml",
data={
- 'editgroup_id': eg.editgroup_id,
- 'toml': """
+ "editgroup_id": eg.editgroup_id,
+ "toml": """
size = 12345
sha1 = "45BE56A396C4D03FAAA41E055170C23534DEC736"
""",
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 400
+
def test_web_file_delete(app_admin, api):
eg = quick_eg(api)
- rv = app_admin.get('/file/aaaaaaaaaaaaamztaaaaaaaaam/delete')
+ rv = app_admin.get("/file/aaaaaaaaaaaaamztaaaaaaaaam/delete")
assert rv.status_code == 200
- rv = app_admin.post('/file/aaaaaaaaaaaaamztaaaaaaaaam/delete',
+ rv = app_admin.post(
+ "/file/aaaaaaaaaaaaamztaaaaaaaaam/delete",
data={
- 'editgroup_id': eg.editgroup_id,
+ "editgroup_id": eg.editgroup_id,
},
- follow_redirects=True)
+ follow_redirects=True,
+ )
assert rv.status_code == 200
# NOTE: did not *accept* the deletion edit
+
DUMMY_DEMO_ENTITIES = {
- 'container': 'aaaaaaaaaaaaaeiraaaaaaaaam',
- 'creator': 'aaaaaaaaaaaaaircaaaaaaaaaq',
- 'file': 'aaaaaaaaaaaaamztaaaaaaaaam',
- 'fileset': 'aaaaaaaaaaaaaztgaaaaaaaaai',
- 'webcapture': 'aaaaaaaaaaaaa53xaaaaaaaaai',
- 'release': 'aaaaaaaaaaaaarceaaaaaaaaai',
- 'work': 'aaaaaaaaaaaaavkvaaaaaaaaai',
+ "container": "aaaaaaaaaaaaaeiraaaaaaaaam",
+ "creator": "aaaaaaaaaaaaaircaaaaaaaaaq",
+ "file": "aaaaaaaaaaaaamztaaaaaaaaam",
+ "fileset": "aaaaaaaaaaaaaztgaaaaaaaaai",
+ "webcapture": "aaaaaaaaaaaaa53xaaaaaaaaai",
+ "release": "aaaaaaaaaaaaarceaaaaaaaaai",
+ "work": "aaaaaaaaaaaaavkvaaaaaaaaai",
}
+
def test_web_edit_get(app_admin):
# these are all existing entities
- for entity_type in ['release', 'file', 'container']:
- rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit')
+ for entity_type in ["release", "file", "container"]:
+ rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit")
assert rv.status_code == 200
- if entity_type == 'release':
- assert b'A bigger example' in rv.data
- elif entity_type == 'file':
- assert b'ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362' in rv.data
- elif entity_type == 'container':
- assert b'1549-1277' in rv.data
-
- rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit/toml')
+ if entity_type == "release":
+ assert b"A bigger example" in rv.data
+ elif entity_type == "file":
+ assert (
+ b"ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362" in rv.data
+ )
+ elif entity_type == "container":
+ assert b"1549-1277" in rv.data
+
+ rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit/toml")
assert rv.status_code == 200
- if entity_type == 'release':
- assert b'A bigger example' in rv.data
- elif entity_type == 'file':
- assert b'ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362' in rv.data
- elif entity_type == 'container':
- assert b'1549-1277' in rv.data
+ if entity_type == "release":
+ assert b"A bigger example" in rv.data
+ elif entity_type == "file":
+ assert (
+ b"ffc1005680cb620eec4c913437dfabbf311b535cfe16cbaeb2faec1f92afc362" in rv.data
+ )
+ elif entity_type == "container":
+ assert b"1549-1277" in rv.data
# TOML-only endpoints
- for entity_type in ['creator', 'fileset', 'webcapture', 'work']:
- rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit')
+ for entity_type in ["creator", "fileset", "webcapture", "work"]:
+ rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit")
assert rv.status_code == 302
- rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit/toml')
+ rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/edit/toml")
assert rv.status_code == 200
def test_web_create_get(app_admin):
- for entity_type in ['release', 'file', 'container']:
- rv = app_admin.get(f'/{entity_type}/create')
+ for entity_type in ["release", "file", "container"]:
+ rv = app_admin.get(f"/{entity_type}/create")
assert rv.status_code == 200
- rv = app_admin.get(f'/{entity_type}/create/toml')
+ rv = app_admin.get(f"/{entity_type}/create/toml")
assert rv.status_code == 200
# these are TOML only
- for entity_type in ['creator', 'fileset', 'webcapture', 'work']:
- rv = app_admin.get(f'/{entity_type}/create')
+ for entity_type in ["creator", "fileset", "webcapture", "work"]:
+ rv = app_admin.get(f"/{entity_type}/create")
assert rv.status_code == 302
- rv = app_admin.get(f'/{entity_type}/create/toml')
+ rv = app_admin.get(f"/{entity_type}/create/toml")
assert rv.status_code == 200
+
def test_web_edit_delete(app_admin):
for entity_type in DUMMY_DEMO_ENTITIES.keys():
- rv = app_admin.get(f'/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/delete')
+ rv = app_admin.get(f"/{entity_type}/{DUMMY_DEMO_ENTITIES[entity_type]}/delete")
assert rv.status_code == 200
diff --git a/python/tests/web_editor.py b/python/tests/web_editor.py
index 0d0679bb..b7c249ba 100644
--- a/python/tests/web_editor.py
+++ b/python/tests/web_editor.py
@@ -1,32 +1,35 @@
-
from fixtures import *
def test_change_username(app_admin):
# these tests aren't supposed to mutate database
- rv = app_admin.post('/auth/change_username', data={'username': 'admin-tmp'},
- follow_redirects=True)
+ rv = app_admin.post(
+ "/auth/change_username", data={"username": "admin-tmp"}, follow_redirects=True
+ )
assert rv.status_code == 200
- rv = app_admin.get('/auth/account')
- assert b'admin-tmp' in rv.data
+ rv = app_admin.get("/auth/account")
+ assert b"admin-tmp" in rv.data
- rv = app_admin.post('/auth/change_username', data={'username': 'claire'},
- follow_redirects=True)
+ rv = app_admin.post(
+ "/auth/change_username", data={"username": "claire"}, follow_redirects=True
+ )
assert rv.status_code == 400
- rv = app_admin.get('/auth/account')
- assert b'admin-tmp' in rv.data
+ rv = app_admin.get("/auth/account")
+ assert b"admin-tmp" in rv.data
- rv = app_admin.post('/auth/change_username', data={'username': 'admin'},
- follow_redirects=True)
+ rv = app_admin.post(
+ "/auth/change_username", data={"username": "admin"}, follow_redirects=True
+ )
assert rv.status_code == 200
- rv = app_admin.get('/auth/account')
- assert b'admin-tmp' not in rv.data
+ rv = app_admin.get("/auth/account")
+ assert b"admin-tmp" not in rv.data
+
def test_username_redirect(app_admin):
- rv = app_admin.get('/u/admin')
+ rv = app_admin.get("/u/admin")
assert rv.status_code == 302
- rv = app_admin.get('/u/bogus-not-registered')
+ rv = app_admin.get("/u/bogus-not-registered")
assert rv.status_code == 404
diff --git a/python/tests/web_entity_views.py b/python/tests/web_entity_views.py
index 0d51f87e..4a3a89f8 100644
--- a/python/tests/web_entity_views.py
+++ b/python/tests/web_entity_views.py
@@ -1,4 +1,3 @@
-
import json
from fixtures import *
@@ -6,30 +5,30 @@ from fixtures import *
from fatcat_web.forms import ContainerEntityForm, FileEntityForm, ReleaseEntityForm
DUMMY_DEMO_ENTITIES = {
- 'container': ('aaaaaaaaaaaaaeiraaaaaaaaai', '00000000-0000-0000-1111-fff000000002'),
+ "container": ("aaaaaaaaaaaaaeiraaaaaaaaai", "00000000-0000-0000-1111-fff000000002"),
# note inconsistency here (q not i)
- 'creator': ('aaaaaaaaaaaaaircaaaaaaaaaq', '00000000-0000-0000-2222-fff000000002'),
- 'file': ('aaaaaaaaaaaaamztaaaaaaaaai', '00000000-0000-0000-3333-fff000000002'),
- 'fileset': ('aaaaaaaaaaaaaztgaaaaaaaaai', '00000000-0000-0000-6666-fff000000002'),
- 'webcapture': ('aaaaaaaaaaaaa53xaaaaaaaaai', '00000000-0000-0000-7777-fff000000002'),
- 'release': ('aaaaaaaaaaaaarceaaaaaaaaai', '00000000-0000-0000-4444-fff000000002'),
- 'work': ('aaaaaaaaaaaaavkvaaaaaaaaai', '00000000-0000-0000-5555-fff000000002'),
+ "creator": ("aaaaaaaaaaaaaircaaaaaaaaaq", "00000000-0000-0000-2222-fff000000002"),
+ "file": ("aaaaaaaaaaaaamztaaaaaaaaai", "00000000-0000-0000-3333-fff000000002"),
+ "fileset": ("aaaaaaaaaaaaaztgaaaaaaaaai", "00000000-0000-0000-6666-fff000000002"),
+ "webcapture": ("aaaaaaaaaaaaa53xaaaaaaaaai", "00000000-0000-0000-7777-fff000000002"),
+ "release": ("aaaaaaaaaaaaarceaaaaaaaaai", "00000000-0000-0000-4444-fff000000002"),
+ "work": ("aaaaaaaaaaaaavkvaaaaaaaaai", "00000000-0000-0000-5555-fff000000002"),
}
REALISTIC_DEMO_ENTITIES = {
- 'container': 'aaaaaaaaaaaaaeiraaaaaaaaam',
- 'creator': 'aaaaaaaaaaaaaircaaaaaaaaam',
- 'file': 'aaaaaaaaaaaaamztaaaaaaaaam',
- 'fileset': 'aaaaaaaaaaaaaztgaaaaaaaaam',
- 'webcapture': 'aaaaaaaaaaaaa53xaaaaaaaaam',
- 'release': 'aaaaaaaaaaaaarceaaaaaaaaam',
- 'work': 'aaaaaaaaaaaaavkvaaaaaaaaam',
+ "container": "aaaaaaaaaaaaaeiraaaaaaaaam",
+ "creator": "aaaaaaaaaaaaaircaaaaaaaaam",
+ "file": "aaaaaaaaaaaaamztaaaaaaaaam",
+ "fileset": "aaaaaaaaaaaaaztgaaaaaaaaam",
+ "webcapture": "aaaaaaaaaaaaa53xaaaaaaaaam",
+ "release": "aaaaaaaaaaaaarceaaaaaaaaam",
+ "work": "aaaaaaaaaaaaavkvaaaaaaaaam",
}
def test_entity_basics(app, mocker):
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
# these are basic ES stats for the container view pages
es_raw.side_effect = [
(200, {}, json.dumps(ES_CONTAINER_STATS_RESP)),
@@ -38,40 +37,43 @@ def test_entity_basics(app, mocker):
for entity_type, (ident, revision) in DUMMY_DEMO_ENTITIES.items():
# good requests
- rv = app.get('/{}/{}'.format(entity_type, ident))
+ rv = app.get("/{}/{}".format(entity_type, ident))
assert rv.status_code == 200
- rv = app.get('/{}_{}'.format(entity_type, ident))
+ rv = app.get("/{}_{}".format(entity_type, ident))
assert rv.status_code == 302
- rv = app.get('/{}/{}/history'.format(entity_type, ident))
+ rv = app.get("/{}/{}/history".format(entity_type, ident))
assert rv.status_code == 200
- rv = app.get('/{}/{}/metadata'.format(entity_type, ident))
+ rv = app.get("/{}/{}/metadata".format(entity_type, ident))
assert rv.status_code == 200
- rv = app.get('/{}/rev/{}'.format(entity_type, revision))
+ rv = app.get("/{}/rev/{}".format(entity_type, revision))
assert rv.status_code == 200
- rv = app.get('/{}/rev/{}_something'.format(entity_type, revision))
+ rv = app.get("/{}/rev/{}_something".format(entity_type, revision))
assert rv.status_code == 404
- rv = app.get('/{}/rev/{}/metadata'.format(entity_type, revision))
+ rv = app.get("/{}/rev/{}/metadata".format(entity_type, revision))
assert rv.status_code == 200
- print('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}'.format(entity_type, ident))
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}'.format(entity_type, ident))
+ print("/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}".format(entity_type, ident))
+ rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}".format(entity_type, ident))
assert rv.status_code == 200
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}/metadata'.format(entity_type, ident))
+ rv = app.get(
+ "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/{}/{}/metadata".format(entity_type, ident)
+ )
assert rv.status_code == 200
# bad requests
- rv = app.get('/{}/9999999999'.format(entity_type))
+ rv = app.get("/{}/9999999999".format(entity_type))
assert rv.status_code == 404
- rv = app.get('/{}/9999999999/history'.format(entity_type))
+ rv = app.get("/{}/9999999999/history".format(entity_type))
assert rv.status_code == 404
- rv = app.get('/{}/f1f046a3-45c9-ffff-ffff-ffffffffffff'.format(entity_type))
+ rv = app.get("/{}/f1f046a3-45c9-ffff-ffff-ffffffffffff".format(entity_type))
assert rv.status_code == 404
- rv = app.get('/{}/rev/f1f046a3-45c9-ffff-ffff-fffffffff'.format(entity_type))
+ rv = app.get("/{}/rev/f1f046a3-45c9-ffff-ffff-fffffffff".format(entity_type))
assert rv.status_code == 404
- rv = app.get('/{}/ccccccccccccccccccccccccca'.format(entity_type))
+ rv = app.get("/{}/ccccccccccccccccccccccccca".format(entity_type))
assert rv.status_code == 404
# TODO: redirects and deleted entities
+
def test_web_deleted_release(app, api):
# specific regression test for view of a deleted release
@@ -91,109 +93,113 @@ def test_web_deleted_release(app, api):
r2 = api.get_release(r1edit.ident)
assert r2.state == "deleted"
- rv = app.get('/release/{}'.format(r2.ident))
+ rv = app.get("/release/{}".format(r2.ident))
assert rv.status_code == 200
- rv = app.get('/release/{}/metadata'.format(r2.ident))
+ rv = app.get("/release/{}/metadata".format(r2.ident))
assert rv.status_code == 200
- rv = app.get('/release/{}/history'.format(r2.ident))
+ rv = app.get("/release/{}/history".format(r2.ident))
assert rv.status_code == 200
def test_lookups(app):
- rv = app.get('/container/lookup')
+ rv = app.get("/container/lookup")
assert rv.status_code == 200
- rv = app.get('/container/lookup?issnl=9999-9999')
+ rv = app.get("/container/lookup?issnl=9999-9999")
assert rv.status_code == 404
- rv = app.get('/container/lookup?issnl=1234-5678')
+ rv = app.get("/container/lookup?issnl=1234-5678")
assert rv.status_code == 302
- rv = app.get('/creator/lookup')
+ rv = app.get("/creator/lookup")
assert rv.status_code == 200
- rv = app.get('/creator/lookup?orcid=0000-0003-2088-7465')
+ rv = app.get("/creator/lookup?orcid=0000-0003-2088-7465")
assert rv.status_code == 302
- rv = app.get('/creator/lookup?orcid=0000-0003-2088-0000')
+ rv = app.get("/creator/lookup?orcid=0000-0003-2088-0000")
assert rv.status_code == 404
- rv = app.get('/file/lookup')
+ rv = app.get("/file/lookup")
assert rv.status_code == 200
- rv = app.get('/file/lookup?sha1=7d97e98f8af710c7e7fe703abc8f639e0ee507c4')
+ rv = app.get("/file/lookup?sha1=7d97e98f8af710c7e7fe703abc8f639e0ee507c4")
assert rv.status_code == 302
- rv = app.get('/file/lookup?sha1=7d97e98f8af710c7e7f00000000000000ee507c4')
+ rv = app.get("/file/lookup?sha1=7d97e98f8af710c7e7f00000000000000ee507c4")
assert rv.status_code == 404
- rv = app.get('/fileset/lookup')
+ rv = app.get("/fileset/lookup")
assert rv.status_code == 404
- rv = app.get('/webcapture/lookup')
+ rv = app.get("/webcapture/lookup")
assert rv.status_code == 404
- rv = app.get('/release/lookup')
+ rv = app.get("/release/lookup")
assert rv.status_code == 200
- rv = app.get('/release/lookup?doi=10.123/abc')
+ rv = app.get("/release/lookup?doi=10.123/abc")
assert rv.status_code == 302
- rv = app.get('/release/lookup?doi=10.123%2Fabc')
+ rv = app.get("/release/lookup?doi=10.123%2Fabc")
assert rv.status_code == 302
- rv = app.get('/release/lookup?doi=abcde')
+ rv = app.get("/release/lookup?doi=abcde")
assert rv.status_code == 400
- rv = app.get('/release/lookup?doi=10.1234/uuu')
+ rv = app.get("/release/lookup?doi=10.1234/uuu")
assert rv.status_code == 404
- rv = app.get('/work/lookup')
+ rv = app.get("/work/lookup")
assert rv.status_code == 404
def test_web_container(app, mocker):
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
# these are basic ES stats for the container view pages
es_raw.side_effect = [
(200, {}, json.dumps(ES_CONTAINER_STATS_RESP)),
(200, {}, json.dumps(ES_CONTAINER_RANDOM_RESP)),
]
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaai')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaai/metadata')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaai/metadata")
assert rv.status_code == 200
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit')
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit")
assert rv.status_code == 302
- rv = app.get('/container/create')
+ rv = app.get("/container/create")
assert rv.status_code == 302
- rv = app.get('/container/rev/00000000-0000-0000-1111-fff000000002')
+ rv = app.get("/container/rev/00000000-0000-0000-1111-fff000000002")
assert rv.status_code == 200
- rv = app.get('/container/rev/00000000-0000-0000-1111-fff000000002/metadata')
+ rv = app.get("/container/rev/00000000-0000-0000-1111-fff000000002/metadata")
assert rv.status_code == 200
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai')
+ rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai/metadata')
+ rv = app.get(
+ "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai/metadata"
+ )
assert rv.status_code == 200
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit')
+ rv = app.get(
+ "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit"
+ )
assert rv.status_code == 302
def test_web_container_login(full_app, app_admin):
- rv = app_admin.get('/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit')
+ rv = app_admin.get("/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit")
assert rv.status_code == 200
- assert b'Journal of Trivial Results' in rv.data
- assert b'1234-5678' in rv.data
- rv = app_admin.get('/container/create')
+ assert b"Journal of Trivial Results" in rv.data
+ assert b"1234-5678" in rv.data
+ rv = app_admin.get("/container/create")
assert rv.status_code == 200
# creation (via form)
with full_app.test_request_context():
form = ContainerEntityForm()
form.issnl.data = "invalid-issn"
- rv = app_admin.post('/container/create', data=form.data, follow_redirects=True)
+ rv = app_admin.post("/container/create", data=form.data, follow_redirects=True)
assert rv.status_code == 400
- assert b'invalid-issn' in rv.data
+ assert b"invalid-issn" in rv.data
with full_app.test_request_context():
form = ContainerEntityForm()
# these fields are required
form.name.data = "Journal of Experiments"
- rv = app_admin.post('/container/create', data=form.data, follow_redirects=True)
+ rv = app_admin.post("/container/create", data=form.data, follow_redirects=True)
assert rv.status_code == 200
assert b"Journal of Experiments" in rv.data
@@ -201,139 +207,148 @@ def test_web_container_login(full_app, app_admin):
with full_app.test_request_context():
form = ContainerEntityForm()
form.issnl.data = "invalid-issn"
- rv = app_admin.post('/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit',
- data=form.data, follow_redirects=True)
+ rv = app_admin.post(
+ "/container/aaaaaaaaaaaaaeiraaaaaaaaai/edit", data=form.data, follow_redirects=True
+ )
assert rv.status_code == 400
- assert b'invalid-issn' in rv.data
+ assert b"invalid-issn" in rv.data
def test_web_creator(app):
# not logged in
- rv = app.get('/creator/aaaaaaaaaaaaaircaaaaaaaaai')
+ rv = app.get("/creator/aaaaaaaaaaaaaircaaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/creator/aaaaaaaaaaaaaircaaaaaaaaai/edit')
+ rv = app.get("/creator/aaaaaaaaaaaaaircaaaaaaaaai/edit")
assert rv.status_code == 302
- rv = app.get('/creator/create')
+ rv = app.get("/creator/create")
assert rv.status_code == 302
def test_web_file(app):
# not logged in
- rv = app.get('/file/aaaaaaaaaaaaamztaaaaaaaaai')
+ rv = app.get("/file/aaaaaaaaaaaaamztaaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/file/aaaaaaaaaaaaamztaaaaaaaaai/edit')
+ rv = app.get("/file/aaaaaaaaaaaaamztaaaaaaaaai/edit")
assert rv.status_code == 302
- rv = app.get('/file/create')
+ rv = app.get("/file/create")
assert rv.status_code == 302
def test_web_file_login(full_app, app_admin):
- rv = app_admin.get('/file/aaaaaaaaaaaaamztaaaaaaaaai/edit')
+ rv = app_admin.get("/file/aaaaaaaaaaaaamztaaaaaaaaai/edit")
assert rv.status_code == 200
- assert b'7d97e98f8af710c7e7fe703abc8f639e0ee507c4' in rv.data
- assert b'archive.org/robots.txt' in rv.data
- rv = app_admin.get('/file/create')
+ assert b"7d97e98f8af710c7e7fe703abc8f639e0ee507c4" in rv.data
+ assert b"archive.org/robots.txt" in rv.data
+ rv = app_admin.get("/file/create")
assert rv.status_code == 200
# creation (via form)
with full_app.test_request_context():
form = FileEntityForm()
form.sha1.data = "invalidstring"
- rv = app_admin.post('/file/create', data=form.data, follow_redirects=True)
+ rv = app_admin.post("/file/create", data=form.data, follow_redirects=True)
assert rv.status_code == 400
- assert b'invalidstring' in rv.data
+ assert b"invalidstring" in rv.data
with full_app.test_request_context():
form = FileEntityForm()
# these fields are required
form.size.data = 1234
form.sha1.data = "202f899638fcaa97128b968a43a8f45e00b69a25"
- rv = app_admin.post('/file/create', data=form.data, follow_redirects=True)
+ rv = app_admin.post("/file/create", data=form.data, follow_redirects=True)
assert rv.status_code == 200
# editing (via form)
with full_app.test_request_context():
form = FileEntityForm()
form.md5.data = "invalidstring"
- rv = app_admin.post('/file/aaaaaaaaaaaaamztaaaaaaaaai/edit',
- data=form.data, follow_redirects=True)
+ rv = app_admin.post(
+ "/file/aaaaaaaaaaaaamztaaaaaaaaai/edit", data=form.data, follow_redirects=True
+ )
assert rv.status_code == 400
- assert b'invalidstring' in rv.data
+ assert b"invalidstring" in rv.data
+
def test_web_fileset(app):
# not logged in
- rv = app.get('/fileset/aaaaaaaaaaaaaztgaaaaaaaaai')
+ rv = app.get("/fileset/aaaaaaaaaaaaaztgaaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/fileset/aaaaaaaaaaaaaztgaaaaaaaaai/edit')
+ rv = app.get("/fileset/aaaaaaaaaaaaaztgaaaaaaaaai/edit")
assert rv.status_code == 302
- rv = app.get('/fileset/create')
+ rv = app.get("/fileset/create")
assert rv.status_code == 302
def test_web_webcatpure(app):
# not logged in
- rv = app.get('/webcapture/aaaaaaaaaaaaa53xaaaaaaaaai')
+ rv = app.get("/webcapture/aaaaaaaaaaaaa53xaaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/webcapture/aaaaaaaaaaaaa53xaaaaaaaaai/edit')
+ rv = app.get("/webcapture/aaaaaaaaaaaaa53xaaaaaaaaai/edit")
assert rv.status_code == 302
- rv = app.get('/webcapture/create')
+ rv = app.get("/webcapture/create")
assert rv.status_code == 302
def test_web_release(app):
# not logged in
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/contribs')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/contribs")
assert rv.status_code == 200
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/references')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/references")
assert rv.status_code == 200
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/metadata')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/metadata")
assert rv.status_code == 200
- rv = app.get('/release/rev/00000000-0000-0000-4444-fff000000002/contribs')
+ rv = app.get("/release/rev/00000000-0000-0000-4444-fff000000002/contribs")
assert rv.status_code == 200
- rv = app.get('/release/rev/00000000-0000-0000-4444-fff000000002/references')
+ rv = app.get("/release/rev/00000000-0000-0000-4444-fff000000002/references")
assert rv.status_code == 200
- rv = app.get('/release/rev/00000000-0000-0000-4444-fff000000002/metadata')
+ rv = app.get("/release/rev/00000000-0000-0000-4444-fff000000002/metadata")
assert rv.status_code == 200
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai')
+ rv = app.get("/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/contribs')
+ rv = app.get(
+ "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/contribs"
+ )
assert rv.status_code == 200
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/references')
+ rv = app.get(
+ "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/references"
+ )
assert rv.status_code == 200
- rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/metadata')
+ rv = app.get(
+ "/editgroup/aaaaaaaaaaaabo53aaaaaaaaaq/release/aaaaaaaaaaaaarceaaaaaaaaai/metadata"
+ )
assert rv.status_code == 200
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/edit')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/edit")
assert rv.status_code == 302
- rv = app.get('/release/create')
+ rv = app.get("/release/create")
assert rv.status_code == 302
def test_web_release_login(full_app, app_admin):
- rv = app_admin.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/edit')
+ rv = app_admin.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/edit")
assert rv.status_code == 200
- assert b'robin hood' in rv.data
- assert b'PMC555' in rv.data
- rv = app_admin.get('/release/create')
+ assert b"robin hood" in rv.data
+ assert b"PMC555" in rv.data
+ rv = app_admin.get("/release/create")
assert rv.status_code == 200
# creation (via form)
with full_app.test_request_context():
form = ReleaseEntityForm()
form.title.data = "My Research: Missing Some Stuff"
- rv = app_admin.post('/release/create', data=form.data, follow_redirects=True)
+ rv = app_admin.post("/release/create", data=form.data, follow_redirects=True)
assert rv.status_code == 400
- assert b'My Research: Missing Some Stuff' in rv.data
- assert b'already' not in rv.data
+ assert b"My Research: Missing Some Stuff" in rv.data
+ assert b"already" not in rv.data
with full_app.test_request_context():
form = ReleaseEntityForm()
@@ -341,7 +356,7 @@ def test_web_release_login(full_app, app_admin):
form.title.data = "Creating Releases: A Review"
form.release_type.data = "article-journal"
form.release_stage.data = "draft"
- rv = app_admin.post('/release/create', data=form.data, follow_redirects=True)
+ rv = app_admin.post("/release/create", data=form.data, follow_redirects=True)
assert rv.status_code == 200
with full_app.test_request_context():
@@ -352,33 +367,33 @@ def test_web_release_login(full_app, app_admin):
form.release_stage.data = "draft"
# already merged editgroup
form.editgroup_id.data = "aaaaaaaaaaaabo53aaaaaaaaae"
- rv = app_admin.post('/release/create', data=form.data, follow_redirects=True)
+ rv = app_admin.post("/release/create", data=form.data, follow_redirects=True)
assert rv.status_code == 400
# XXX: this should return the page with error annotated, not generic
# 400 page
- #assert b"already accepted" in rv.data
+ # assert b"already accepted" in rv.data
# editing
with full_app.test_request_context():
form = ReleaseEntityForm()
form.title.data = "My Research: Missing Some Stuff"
form.release_type.data = "bogus-release-type"
- rv = app_admin.post('/release/create', data=form.data, follow_redirects=True)
+ rv = app_admin.post("/release/create", data=form.data, follow_redirects=True)
assert rv.status_code == 400
- assert b'My Research: Missing Some Stuff' in rv.data
+ assert b"My Research: Missing Some Stuff" in rv.data
def test_web_search(app):
- rv = app.get('/release/search')
+ rv = app.get("/release/search")
assert rv.status_code == 200
def test_web_work(app):
- rv = app.get('/work/aaaaaaaaaaaaavkvaaaaaaaaai')
+ rv = app.get("/work/aaaaaaaaaaaaavkvaaaaaaaaai")
assert rv.status_code == 200
- rv = app.get('/work/aaaaaaaaaaaaavkvaaaaaaaaai/edit')
+ rv = app.get("/work/aaaaaaaaaaaaavkvaaaaaaaaai/edit")
assert rv.status_code == 302
- rv = app.get('/work/create')
+ rv = app.get("/work/create")
assert rv.status_code == 302
diff --git a/python/tests/web_refs.py b/python/tests/web_refs.py
index 0ab69607..61966480 100644
--- a/python/tests/web_refs.py
+++ b/python/tests/web_refs.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -10,14 +9,14 @@ from fatcat_web.search import get_elastic_container_random_releases
def test_basic_refs(app, mocker):
- with open('tests/files/elastic_refs_in_release.json') as f:
+ with open("tests/files/elastic_refs_in_release.json") as f:
elastic_resp_in = json.loads(f.read())
- with open('tests/files/elastic_refs_out_release.json') as f:
+ with open("tests/files/elastic_refs_out_release.json") as f:
elastic_resp_out = json.loads(f.read())
- with open('tests/files/elastic_empty.json') as f:
+ with open("tests/files/elastic_empty.json") as f:
elastic_resp_empty = json.loads(f.read())
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp_in)),
(200, {}, json.dumps(elastic_resp_in)),
@@ -28,27 +27,27 @@ def test_basic_refs(app, mocker):
]
# render refs-in
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in")
assert rv.status_code == 200
assert b"Why Most Published Research Findings Are False" in rv.data
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in.json')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in.json")
assert rv.status_code == 200
# empty (in)
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-in")
assert rv.status_code == 200
assert b"No References Found" in rv.data
# render refs-out
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out")
assert rv.status_code == 200
assert b"Why Most Published Research Findings Are False" in rv.data
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out.json')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out.json")
assert rv.status_code == 200
# empty (out)
- rv = app.get('/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out')
+ rv = app.get("/release/aaaaaaaaaaaaarceaaaaaaaaai/refs-out")
assert rv.status_code == 200
assert b"No References Found" in rv.data
diff --git a/python/tests/web_routes.py b/python/tests/web_routes.py
index 0edf06d1..3c4ad946 100644
--- a/python/tests/web_routes.py
+++ b/python/tests/web_routes.py
@@ -1,10 +1,8 @@
-
from fixtures import *
def test_static_routes(app):
- for route in ('/health.json', '/robots.txt', '/', '/about', '/rfc',
- '/static/fatcat.jpg'):
+ for route in ("/health.json", "/robots.txt", "/", "/about", "/rfc", "/static/fatcat.jpg"):
rv = app.get(route)
assert rv.status_code == 200
diff --git a/python/tests/web_search.py b/python/tests/web_search.py
index fdcffdea..47cd4057 100644
--- a/python/tests/web_search.py
+++ b/python/tests/web_search.py
@@ -1,4 +1,3 @@
-
import json
import pytest
@@ -10,68 +9,71 @@ from fatcat_web.search import get_elastic_container_random_releases
def test_generic_search(app):
- rv = app.get('/search?q=blood')
+ rv = app.get("/search?q=blood")
assert rv.status_code == 302
assert "/release/search" in rv.location
# file sha1sum
- rv = app.get('/search?q=0262d5351e8e7a0af27af8ceaf7b4e581da085f2')
+ rv = app.get("/search?q=0262d5351e8e7a0af27af8ceaf7b4e581da085f2")
assert rv.status_code == 302
assert "/file/lookup" in rv.location
# PMCID
- rv = app.get('/search?q=PMC12345')
+ rv = app.get("/search?q=PMC12345")
assert rv.status_code == 302
assert "/release/lookup" in rv.location
# ISSN
- rv = app.get('/search?q=1234-5678')
+ rv = app.get("/search?q=1234-5678")
assert rv.status_code == 302
assert "/container/lookup" in rv.location
+
def test_release_search(app, mocker):
- rv = app.get('/release/search')
+ rv = app.get("/release/search")
assert rv.status_code == 200
- with open('tests/files/elastic_release_search.json') as f:
- elastic_resp=json.loads(f.read())
+ with open("tests/files/elastic_release_search.json") as f:
+ elastic_resp = json.loads(f.read())
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp)),
]
- rv = app.get('/release/search?q=blood')
+ rv = app.get("/release/search?q=blood")
assert rv.status_code == 200
assert b"Showing" in rv.data
assert b"Quantum Studies of Acetylene Adsorption on Ice Surface" in rv.data
+
def test_container_search(app, mocker):
- rv = app.get('/container/search')
+ rv = app.get("/container/search")
assert rv.status_code == 200
- with open('tests/files/elastic_container_search.json') as f:
- elastic_resp=json.loads(f.read())
+ with open("tests/files/elastic_container_search.json") as f:
+ elastic_resp = json.loads(f.read())
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp)),
]
- rv = app.get('/container/search?q=blood')
+ rv = app.get("/container/search?q=blood")
assert rv.status_code == 200
assert b"Showing" in rv.data
assert b"European Instructional Course Lectures" in rv.data
assert b"British Editorial Society of Bone and Joint Surger" in rv.data
+
def test_random_releases(app, mocker):
- with open('tests/files/elastic_release_search.json') as f:
- elastic_resp=json.loads(f.read())
+ with open("tests/files/elastic_release_search.json") as f:
+ elastic_resp = json.loads(f.read())
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp)),
]
@@ -81,101 +83,120 @@ def test_random_releases(app, mocker):
elastic_resp1 = {
- 'timed_out': False,
- 'aggregations': {
- 'release_ref_count': {'value': 8031459}},
- 'hits': {'total': 80578584, 'hits': [], 'max_score': 0.0},
- '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0},
- 'took': 0
+ "timed_out": False,
+ "aggregations": {"release_ref_count": {"value": 8031459}},
+ "hits": {"total": 80578584, "hits": [], "max_score": 0.0},
+ "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0},
+ "took": 0,
}
elastic_resp2 = {
- 'timed_out': False,
- 'aggregations': {
- 'paper_like': {'buckets': {
- 'is_oa': {'doc_count': 8031459},
- 'in_kbart': {'doc_count': 51594200},
- 'in_web': {'doc_count': 10925092},
- 'in_web_not_kbart': {'doc_count': 5160359}}}},
- 'hits': {'total': 80578584, 'hits': [], 'max_score': 0.0},
- '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0},
- 'took': 0
+ "timed_out": False,
+ "aggregations": {
+ "paper_like": {
+ "buckets": {
+ "is_oa": {"doc_count": 8031459},
+ "in_kbart": {"doc_count": 51594200},
+ "in_web": {"doc_count": 10925092},
+ "in_web_not_kbart": {"doc_count": 5160359},
+ }
+ }
+ },
+ "hits": {"total": 80578584, "hits": [], "max_score": 0.0},
+ "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0},
+ "took": 0,
}
elastic_resp3 = {
- 'timed_out': False,
- 'hits': {'total': 80578584, 'hits': [], 'max_score': 0.0},
- '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0},
- 'took': 0
+ "timed_out": False,
+ "hits": {"total": 80578584, "hits": [], "max_score": 0.0},
+ "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0},
+ "took": 0,
}
+
def test_stats(app, mocker):
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp1)),
(200, {}, json.dumps(elastic_resp2)),
(200, {}, json.dumps(elastic_resp3)),
]
- rv = app.get('/stats')
+ rv = app.get("/stats")
assert rv.status_code == 200
assert b"80,578,584" in rv.data
+
def test_stats_json(app, mocker):
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp1)),
(200, {}, json.dumps(elastic_resp2)),
(200, {}, json.dumps(elastic_resp3)),
]
- rv = app.get('/stats.json')
+ rv = app.get("/stats.json")
assert rv.status_code == 200
- assert rv.json['papers']['in_kbart'] == 51594200
- assert rv.json['release']['refs_total'] == 8031459
+ assert rv.json["papers"]["in_kbart"] == 51594200
+ assert rv.json["release"]["refs_total"] == 8031459
+
def test_container_stats(app, mocker):
elastic_resp = {
- 'timed_out': False,
- 'aggregations': {
- 'container_stats': {'buckets': {
- 'is_preserved': {'doc_count': 461939},
- 'in_kbart': {'doc_count': 461939},
- 'in_web': {'doc_count': 2797}}},
- 'preservation': {
- 'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0,
- 'buckets': [{'key': 'bright', 'doc_count': 4143}, {'key': 'none',
- 'doc_count': 101}, {'key': 'dark', 'doc_count': 79}, {'key':
- 'shadows_only', 'doc_count': 5}]},
- 'release_type': {
- 'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0,
- 'buckets': [{'key': 'article-journal', 'doc_count': 4324}, {'key':
- 'article', 'doc_count': 2}, {'key': '_unknown', 'doc_count': 1},
- {'key': 'editorial', 'doc_count': 1}]},
+ "timed_out": False,
+ "aggregations": {
+ "container_stats": {
+ "buckets": {
+ "is_preserved": {"doc_count": 461939},
+ "in_kbart": {"doc_count": 461939},
+ "in_web": {"doc_count": 2797},
+ }
+ },
+ "preservation": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {"key": "bright", "doc_count": 4143},
+ {"key": "none", "doc_count": 101},
+ {"key": "dark", "doc_count": 79},
+ {"key": "shadows_only", "doc_count": 5},
+ ],
+ },
+ "release_type": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {"key": "article-journal", "doc_count": 4324},
+ {"key": "article", "doc_count": 2},
+ {"key": "_unknown", "doc_count": 1},
+ {"key": "editorial", "doc_count": 1},
+ ],
+ },
},
- 'hits': {'total': 461939, 'hits': [], 'max_score': 0.0},
- '_shards': {'successful': 5, 'total': 5, 'skipped': 0, 'failed': 0},
- 'took': 50
+ "hits": {"total": 461939, "hits": [], "max_score": 0.0},
+ "_shards": {"successful": 5, "total": 5, "skipped": 0, "failed": 0},
+ "took": 50,
}
- es_raw = mocker.patch('elasticsearch.connection.Urllib3HttpConnection.perform_request')
+ es_raw = mocker.patch("elasticsearch.connection.Urllib3HttpConnection.perform_request")
es_raw.side_effect = [
(200, {}, json.dumps(elastic_resp)),
(200, {}, json.dumps(elastic_resp)),
]
- rv = app.get('/container/issnl/1234-5678/stats.json')
+ rv = app.get("/container/issnl/1234-5678/stats.json")
assert rv.status_code == 200
stats = rv.json
- assert isinstance(stats['total'], int)
- assert isinstance(stats['release_type'], dict)
- assert isinstance(stats['preservation']['total'], int)
- assert isinstance(stats['preservation']['bright'], int)
- assert isinstance(stats['preservation']['dark'], int)
- assert isinstance(stats['preservation']['none'], int)
-
- rv = app.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/stats.json')
+ assert isinstance(stats["total"], int)
+ assert isinstance(stats["release_type"], dict)
+ assert isinstance(stats["preservation"]["total"], int)
+ assert isinstance(stats["preservation"]["bright"], int)
+ assert isinstance(stats["preservation"]["dark"], int)
+ assert isinstance(stats["preservation"]["none"], int)
+
+ rv = app.get("/container/aaaaaaaaaaaaaeiraaaaaaaaam/stats.json")
assert rv.status_code == 200
stats = rv.json
- assert isinstance(stats['total'], int)
- assert stats['ident'] == "aaaaaaaaaaaaaeiraaaaaaaaam"
+ assert isinstance(stats["total"], int)
+ assert stats["ident"] == "aaaaaaaaaaaaaeiraaaaaaaaam"