From 7ebda2e051b51e49544ab75673b19ec5f27d9d45 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 12 Nov 2018 23:37:28 -0800 Subject: more python module refactoring --- python/tests/crossref.py | 2 +- python/tests/entity_helpers.py | 15 ------- python/tests/fixtures.py | 8 ++-- python/tests/grobid_metadata_importer.py | 56 --------------------------- python/tests/grobid_metadata_importer_test.py | 56 +++++++++++++++++++++++++++ python/tests/importer.py | 2 +- python/tests/issn.py | 2 +- python/tests/matched_importer.py | 32 --------------- python/tests/matched_importer_test.py | 32 +++++++++++++++ python/tests/orcid.py | 2 +- python/tests/routes.py | 1 - python/tests/transform_tests.py | 15 +++++++ 12 files changed, 111 insertions(+), 112 deletions(-) delete mode 100644 python/tests/entity_helpers.py delete mode 100644 python/tests/grobid_metadata_importer.py create mode 100644 python/tests/grobid_metadata_importer_test.py delete mode 100644 python/tests/matched_importer.py create mode 100644 python/tests/matched_importer_test.py create mode 100644 python/tests/transform_tests.py (limited to 'python/tests') diff --git a/python/tests/crossref.py b/python/tests/crossref.py index 59be9886..04ac5e8e 100644 --- a/python/tests/crossref.py +++ b/python/tests/crossref.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat.crossref_importer import FatcatCrossrefImporter +from fatcat_tools.crossref_importer import FatcatCrossrefImporter @pytest.fixture(scope="function") diff --git a/python/tests/entity_helpers.py b/python/tests/entity_helpers.py deleted file mode 100644 index dd6fa00a..00000000 --- a/python/tests/entity_helpers.py +++ /dev/null @@ -1,15 +0,0 @@ - -import json -import pytest -from fatcat.crossref_importer import FatcatCrossrefImporter -from fatcat.entity_helpers import * - -from crossref import crossref_importer - -def test_elastic_convert(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: - # not a single line - raw = json.loads(f.read()) - (r, c) = crossref_importer.parse_crossref_dict(raw) - r.state = 'active' - release_elastic_dict(r) diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index 6ff05495..6f68cf5c 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -4,14 +4,14 @@ import time import json import signal import pytest -import fatcat +import fatcat_web @pytest.fixture def full_app(): - fatcat.app.testing = True - fatcat.app.debug = False - return fatcat.app + fatcat_web.app.testing = True + fatcat_web.app.debug = False + return fatcat_web.app @pytest.fixture def app(full_app): diff --git a/python/tests/grobid_metadata_importer.py b/python/tests/grobid_metadata_importer.py deleted file mode 100644 index 2c8565aa..00000000 --- a/python/tests/grobid_metadata_importer.py +++ /dev/null @@ -1,56 +0,0 @@ - -import os -import json -import base64 -import pytest -from fatcat.grobid_metadata_importer import FatcatGrobidMetadataImporter - -""" -WARNING: these tests are currently very fragile because they have database -side-effects. Should probably be disabled or re-written. -""" - -@pytest.fixture(scope="function") -def grobid_metadata_importer(): - yield FatcatGrobidMetadataImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -#def test_grobid_metadata_importer_batch(grobid_metadata_importer): -# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: -# grobid_metadata_importer.process_batch(f) - -def test_grobid_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - raw = json.loads(f.readline().split('\t')[4]) - re = grobid_metadata_importer.parse_grobid_json(raw) - assert re - assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" - assert len(re.contribs) == 5 - print(re.contribs) - assert re.contribs[0].raw_name == "Wahyu Ary" - assert re.publisher == None - assert re.extra.get('container_name') == None - assert len(re.refs) == 27 - -def test_file_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - f.readline() - raw = f.readline().split('\t') - # randomize sha1 so tests are repeatable - random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper()) - fe = grobid_metadata_importer.parse_file_metadata( - random_sha1, json.loads(raw[1]), raw[2], int(raw[3])) - assert fe - #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" - assert fe.md5 == None - assert fe.mimetype == "application/pdf" - assert fe.size == 142710 - assert fe.urls[1].url.startswith("http://via.library.depaul.edu") - assert fe.urls[1].rel == "web" - assert fe.urls[0].url.startswith("https://web.archive.org/") - assert fe.urls[0].rel == "webarchive" - assert len(fe.releases) == 0 - -def test_grobid_metadata_importer(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - grobid_metadata_importer.process_source(f) diff --git a/python/tests/grobid_metadata_importer_test.py b/python/tests/grobid_metadata_importer_test.py new file mode 100644 index 00000000..502ca74a --- /dev/null +++ b/python/tests/grobid_metadata_importer_test.py @@ -0,0 +1,56 @@ + +import os +import json +import base64 +import pytest +from fatcat_tools.grobid_metadata_importer import FatcatGrobidMetadataImporter + +""" +WARNING: these tests are currently very fragile because they have database +side-effects. Should probably be disabled or re-written. +""" + +@pytest.fixture(scope="function") +def grobid_metadata_importer(): + yield FatcatGrobidMetadataImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +#def test_grobid_metadata_importer_batch(grobid_metadata_importer): +# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: +# grobid_metadata_importer.process_batch(f) + +def test_grobid_metadata_parse(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + raw = json.loads(f.readline().split('\t')[4]) + re = grobid_metadata_importer.parse_grobid_json(raw) + assert re + assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" + assert len(re.contribs) == 5 + print(re.contribs) + assert re.contribs[0].raw_name == "Wahyu Ary" + assert re.publisher == None + assert re.extra.get('container_name') == None + assert len(re.refs) == 27 + +def test_file_metadata_parse(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + f.readline() + raw = f.readline().split('\t') + # randomize sha1 so tests are repeatable + random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper()) + fe = grobid_metadata_importer.parse_file_metadata( + random_sha1, json.loads(raw[1]), raw[2], int(raw[3])) + assert fe + #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" + assert fe.md5 == None + assert fe.mimetype == "application/pdf" + assert fe.size == 142710 + assert fe.urls[1].url.startswith("http://via.library.depaul.edu") + assert fe.urls[1].rel == "web" + assert fe.urls[0].url.startswith("https://web.archive.org/") + assert fe.urls[0].rel == "webarchive" + assert len(fe.releases) == 0 + +def test_grobid_metadata_importer(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + grobid_metadata_importer.process_source(f) diff --git a/python/tests/importer.py b/python/tests/importer.py index 22af37ed..0de86635 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -1,7 +1,7 @@ import pytest -from fatcat.importer_common import FatcatImporter +from fatcat_tools.importer_common import FatcatImporter def test_issnl_mapping_lookup(): diff --git a/python/tests/issn.py b/python/tests/issn.py index fff112f7..76c8aecb 100644 --- a/python/tests/issn.py +++ b/python/tests/issn.py @@ -1,6 +1,6 @@ import pytest -from fatcat.issn_importer import FatcatIssnImporter +from fatcat_tools.issn_importer import FatcatIssnImporter @pytest.fixture(scope="function") diff --git a/python/tests/matched_importer.py b/python/tests/matched_importer.py deleted file mode 100644 index 9cc6aa32..00000000 --- a/python/tests/matched_importer.py +++ /dev/null @@ -1,32 +0,0 @@ - -import json -import pytest -from fatcat.matched_importer import FatcatMatchedImporter - - -@pytest.fixture(scope="function") -def matched_importer(): - yield FatcatMatchedImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -def test_matched_importer_batch(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_batch(f) - -def test_matched_importer(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_source(f) - -def test_matched_dict_parse(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - raw = json.loads(f.readline()) - f = matched_importer.parse_matched_dict(raw) - assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" - assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" - assert f.mimetype == "application/pdf" - assert f.size == 255629 - assert f.urls[1].url.startswith("http://journals.plos.org") - assert f.urls[1].rel == "web" - assert f.urls[0].url.startswith("https://web.archive.org/") - assert f.urls[0].rel == "webarchive" - assert len(f.releases) == 1 diff --git a/python/tests/matched_importer_test.py b/python/tests/matched_importer_test.py new file mode 100644 index 00000000..4042eabb --- /dev/null +++ b/python/tests/matched_importer_test.py @@ -0,0 +1,32 @@ + +import json +import pytest +from fatcat_tools.matched_importer import FatcatMatchedImporter + + +@pytest.fixture(scope="function") +def matched_importer(): + yield FatcatMatchedImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +def test_matched_importer_batch(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + matched_importer.process_batch(f) + +def test_matched_importer(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + matched_importer.process_source(f) + +def test_matched_dict_parse(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + raw = json.loads(f.readline()) + f = matched_importer.parse_matched_dict(raw) + assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" + assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" + assert f.mimetype == "application/pdf" + assert f.size == 255629 + assert f.urls[1].url.startswith("http://journals.plos.org") + assert f.urls[1].rel == "web" + assert f.urls[0].url.startswith("https://web.archive.org/") + assert f.urls[0].rel == "webarchive" + assert len(f.releases) == 1 diff --git a/python/tests/orcid.py b/python/tests/orcid.py index ae3d0d0b..f8228cc0 100644 --- a/python/tests/orcid.py +++ b/python/tests/orcid.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat.orcid_importer import FatcatOrcidImporter +from fatcat_tools.orcid_importer import FatcatOrcidImporter @pytest.fixture(scope="function") diff --git a/python/tests/routes.py b/python/tests/routes.py index 8607e7c0..2e208d22 100644 --- a/python/tests/routes.py +++ b/python/tests/routes.py @@ -2,7 +2,6 @@ import json import tempfile import pytest -import fatcat from fatcat_client.rest import ApiException from fixtures import * diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py new file mode 100644 index 00000000..669c2526 --- /dev/null +++ b/python/tests/transform_tests.py @@ -0,0 +1,15 @@ + +import json +import pytest +from fatcat_tools.crossref_importer import FatcatCrossrefImporter +from fatcat_tools.transforms import * + +from crossref import crossref_importer + +def test_elastic_convert(crossref_importer): + with open('tests/files/crossref-works.single.json', 'r') as f: + # not a single line + raw = json.loads(f.read()) + (r, c) = crossref_importer.parse_crossref_dict(raw) + r.state = 'active' + release_elastic_dict(r) -- cgit v1.2.3