aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/import_grobid_metadata.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-11-02 18:13:14 -0700
commitcdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch)
tree5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python/tests/import_grobid_metadata.py
parent78f08280edea4ff65ca613ad30005c45cc48dea6 (diff)
downloadfatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz
fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip
fmt (black): tests/
Diffstat (limited to 'python/tests/import_grobid_metadata.py')
-rw-r--r--python/tests/import_grobid_metadata.py52
1 files changed, 29 insertions, 23 deletions
diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py
index 862b310f..a06e7aa4 100644
--- a/python/tests/import_grobid_metadata.py
+++ b/python/tests/import_grobid_metadata.py
@@ -1,4 +1,3 @@
-
import base64
import json
import os
@@ -13,17 +12,21 @@ WARNING: these tests are currently very fragile because they have database
side-effects. Should probably be disabled or re-written.
"""
+
@pytest.fixture(scope="function")
def grobid_metadata_importer(api):
yield GrobidMetadataImporter(api)
def test_grobid_metadata_parse(grobid_metadata_importer):
- with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
- raw = json.loads(f.readline().split('\t')[4])
+ with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f:
+ raw = json.loads(f.readline().split("\t")[4])
re = grobid_metadata_importer.parse_grobid_json(raw)
assert re
- assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA"
+ assert (
+ re.title
+ == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA"
+ )
assert len(re.contribs) == 5
print(re.contribs)
assert re.contribs[0].raw_name == "Wahyu Ary"
@@ -31,19 +34,21 @@ def test_grobid_metadata_parse(grobid_metadata_importer):
assert re.contribs[0].surname == "Ary"
assert re.publisher is None
if re.extra:
- assert re.extra.get('container_name') is None
+ assert re.extra.get("container_name") is None
assert len(re.refs) == 27
+
def test_file_metadata_parse(grobid_metadata_importer):
- with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f:
f.readline()
- raw = f.readline().split('\t')
+ raw = f.readline().split("\t")
# randomize sha1 so tests are repeatable
- random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper())
+ random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode("utf-8").upper())
fe = grobid_metadata_importer.parse_file_metadata(
- random_sha1, json.loads(raw[1]), raw[2], int(raw[3]))
+ random_sha1, json.loads(raw[1]), raw[2], int(raw[3])
+ )
assert fe
- #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V"
+ # assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V"
assert fe.md5 is None
assert fe.mimetype == "application/pdf"
assert fe.size == 142710
@@ -53,29 +58,30 @@ def test_file_metadata_parse(grobid_metadata_importer):
assert fe.urls[0].rel == "webarchive"
assert len(fe.release_ids) == 0
+
def test_grobid_metadata_importer(grobid_metadata_importer):
last_index = grobid_metadata_importer.api.get_changelog(limit=1)[0].index
- with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f:
grobid_metadata_importer.bezerk_mode = True
counts = LinePusher(grobid_metadata_importer, f).run()
- assert counts['insert'] == 10
- assert counts['inserted.release'] == 10
- assert counts['exists'] == 0
- assert counts['skip'] == 0
+ assert counts["insert"] == 10
+ assert counts["inserted.release"] == 10
+ assert counts["exists"] == 0
+ assert counts["skip"] == 0
# fetch most recent editgroup
- change = grobid_metadata_importer.api.get_changelog_entry(index=last_index+1)
+ change = grobid_metadata_importer.api.get_changelog_entry(index=last_index + 1)
eg = change.editgroup
assert eg.description
assert "grobid" in eg.description.lower()
- assert eg.extra['git_rev']
- assert "fatcat_tools.GrobidMetadataImporter" in eg.extra['agent']
+ assert eg.extra["git_rev"]
+ assert "fatcat_tools.GrobidMetadataImporter" in eg.extra["agent"]
- with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ with open("tests/files/example_grobid_metadata_lines.tsv", "r") as f:
grobid_metadata_importer.reset()
grobid_metadata_importer.bezerk_mode = False
counts = LinePusher(grobid_metadata_importer, f).run()
- assert counts['insert'] == 0
- assert counts['inserted.release'] == 0
- assert counts['exists'] == 10
- assert counts['skip'] == 0
+ assert counts["insert"] == 0
+ assert counts["inserted.release"] == 0
+ assert counts["exists"] == 10
+ assert counts["skip"] == 0