diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-12 23:37:28 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-12 23:37:28 -0800 |
commit | 7ebda2e051b51e49544ab75673b19ec5f27d9d45 (patch) | |
tree | 671fd995b2fdb0b9ec69cfcb0e7771f43c13d32d /python/tests/grobid_metadata_importer.py | |
parent | 889606ffac5096610e6123134cb89423c15ef464 (diff) | |
download | fatcat-7ebda2e051b51e49544ab75673b19ec5f27d9d45.tar.gz fatcat-7ebda2e051b51e49544ab75673b19ec5f27d9d45.zip |
more python module refactoring
Diffstat (limited to 'python/tests/grobid_metadata_importer.py')
-rw-r--r-- | python/tests/grobid_metadata_importer.py | 56 |
1 files changed, 0 insertions, 56 deletions
diff --git a/python/tests/grobid_metadata_importer.py b/python/tests/grobid_metadata_importer.py deleted file mode 100644 index 2c8565aa..00000000 --- a/python/tests/grobid_metadata_importer.py +++ /dev/null @@ -1,56 +0,0 @@ - -import os -import json -import base64 -import pytest -from fatcat.grobid_metadata_importer import FatcatGrobidMetadataImporter - -""" -WARNING: these tests are currently very fragile because they have database -side-effects. Should probably be disabled or re-written. -""" - -@pytest.fixture(scope="function") -def grobid_metadata_importer(): - yield FatcatGrobidMetadataImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -#def test_grobid_metadata_importer_batch(grobid_metadata_importer): -# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: -# grobid_metadata_importer.process_batch(f) - -def test_grobid_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - raw = json.loads(f.readline().split('\t')[4]) - re = grobid_metadata_importer.parse_grobid_json(raw) - assert re - assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" - assert len(re.contribs) == 5 - print(re.contribs) - assert re.contribs[0].raw_name == "Wahyu Ary" - assert re.publisher == None - assert re.extra.get('container_name') == None - assert len(re.refs) == 27 - -def test_file_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - f.readline() - raw = f.readline().split('\t') - # randomize sha1 so tests are repeatable - random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper()) - fe = grobid_metadata_importer.parse_file_metadata( - random_sha1, json.loads(raw[1]), raw[2], int(raw[3])) - assert fe - #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" - assert fe.md5 == None - assert fe.mimetype == "application/pdf" - assert fe.size == 142710 - assert fe.urls[1].url.startswith("http://via.library.depaul.edu") - assert fe.urls[1].rel == "web" - assert fe.urls[0].url.startswith("https://web.archive.org/") - assert fe.urls[0].rel == "webarchive" - assert len(fe.releases) == 0 - -def test_grobid_metadata_importer(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - grobid_metadata_importer.process_source(f) |