From 279b22e30d9b590838268f5f5acdaa1110ee593a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 13 Nov 2018 11:32:41 -0800 Subject: shuffle around fatcat_tools layout --- python/tests/crossref.py | 57 --------------------------- python/tests/grobid_metadata_importer_test.py | 56 -------------------------- python/tests/import_crossref.py | 57 +++++++++++++++++++++++++++ python/tests/import_grobid_metadata.py | 56 ++++++++++++++++++++++++++ python/tests/import_issn.py | 17 ++++++++ python/tests/import_matched.py | 32 +++++++++++++++ python/tests/import_orcid.py | 37 +++++++++++++++++ python/tests/importer.py | 2 +- python/tests/issn.py | 17 -------- python/tests/matched_importer_test.py | 32 --------------- python/tests/orcid.py | 37 ----------------- python/tests/transform_tests.py | 4 +- 12 files changed, 202 insertions(+), 202 deletions(-) delete mode 100644 python/tests/crossref.py delete mode 100644 python/tests/grobid_metadata_importer_test.py create mode 100644 python/tests/import_crossref.py create mode 100644 python/tests/import_grobid_metadata.py create mode 100644 python/tests/import_issn.py create mode 100644 python/tests/import_matched.py create mode 100644 python/tests/import_orcid.py delete mode 100644 python/tests/issn.py delete mode 100644 python/tests/matched_importer_test.py delete mode 100644 python/tests/orcid.py (limited to 'python/tests') diff --git a/python/tests/crossref.py b/python/tests/crossref.py deleted file mode 100644 index 04ac5e8e..00000000 --- a/python/tests/crossref.py +++ /dev/null @@ -1,57 +0,0 @@ - -import json -import pytest -from fatcat_tools.crossref_importer import FatcatCrossrefImporter - - -@pytest.fixture(scope="function") -def crossref_importer(): - with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield FatcatCrossrefImporter("http://localhost:9411/v0", issn_file, 'tests/files/example_map.sqlite3') - -def test_crossref_importer_batch(crossref_importer): - with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_batch(f) - -def test_crossref_importer(crossref_importer): - with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_source(f) - -def test_crossref_importer_create(crossref_importer): - crossref_importer.create_containers = True - with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_source(f) - -def test_crossref_dict_parse(crossref_importer): - with open('tests/files/crossref-works.single.json', 'r') as f: - # not a single line - raw = json.loads(f.read()) - (r, c) = crossref_importer.parse_crossref_dict(raw) - extra = r.extra['crossref'] - assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" - assert r.doi == "10.1002/(sici)1097-461x(1998)66:4<261::aid-qua1>3.0.co;2-t" - assert r.publisher == "Wiley-Blackwell" - print(extra) - assert extra['container-title'] == ["International Journal of Quantum Chemistry"] - assert r.release_type == "journal-article" - assert r.release_status == "published" - assert r.isbn13 == "978-3-16-148410-0" - assert 'subtitle' not in extra - assert 'archive' not in extra - assert 'funder' not in extra - assert len(r.contribs) == 5 - assert r.contribs[0].raw_name == "Marcelo D. Radicioni" - assert r.contribs[0].index == 0 - assert r.contribs[1].extra['affiliations'] == ["Some University"] - assert r.contribs[1].role == "author" - assert r.contribs[3].role == "editor" - assert r.contribs[3].index is None - assert r.contribs[4].role == "translator" - assert r.contribs[4].index is None - assert len(r.refs) == 25 - assert r.refs[0].key == "BIB1" - assert r.refs[0].year == 1972 - assert r.refs[0].locator == "1734" - assert r.refs[0].container_name == "J. Chem. Phys." - assert r.refs[0].extra['crossref'] == {"volume": "57", "author": "Swenson", "doi": "10.1063/1.1678462"} - assert r.refs[3].container_name == "Large Order Perturbation Theory and Summation Methods in Quantum Mechanics, Lecture Notes in Chemistry" diff --git a/python/tests/grobid_metadata_importer_test.py b/python/tests/grobid_metadata_importer_test.py deleted file mode 100644 index 502ca74a..00000000 --- a/python/tests/grobid_metadata_importer_test.py +++ /dev/null @@ -1,56 +0,0 @@ - -import os -import json -import base64 -import pytest -from fatcat_tools.grobid_metadata_importer import FatcatGrobidMetadataImporter - -""" -WARNING: these tests are currently very fragile because they have database -side-effects. Should probably be disabled or re-written. -""" - -@pytest.fixture(scope="function") -def grobid_metadata_importer(): - yield FatcatGrobidMetadataImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -#def test_grobid_metadata_importer_batch(grobid_metadata_importer): -# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: -# grobid_metadata_importer.process_batch(f) - -def test_grobid_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - raw = json.loads(f.readline().split('\t')[4]) - re = grobid_metadata_importer.parse_grobid_json(raw) - assert re - assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" - assert len(re.contribs) == 5 - print(re.contribs) - assert re.contribs[0].raw_name == "Wahyu Ary" - assert re.publisher == None - assert re.extra.get('container_name') == None - assert len(re.refs) == 27 - -def test_file_metadata_parse(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - f.readline() - raw = f.readline().split('\t') - # randomize sha1 so tests are repeatable - random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper()) - fe = grobid_metadata_importer.parse_file_metadata( - random_sha1, json.loads(raw[1]), raw[2], int(raw[3])) - assert fe - #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" - assert fe.md5 == None - assert fe.mimetype == "application/pdf" - assert fe.size == 142710 - assert fe.urls[1].url.startswith("http://via.library.depaul.edu") - assert fe.urls[1].rel == "web" - assert fe.urls[0].url.startswith("https://web.archive.org/") - assert fe.urls[0].rel == "webarchive" - assert len(fe.releases) == 0 - -def test_grobid_metadata_importer(grobid_metadata_importer): - with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - grobid_metadata_importer.process_source(f) diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py new file mode 100644 index 00000000..ab33d0fc --- /dev/null +++ b/python/tests/import_crossref.py @@ -0,0 +1,57 @@ + +import json +import pytest +from fatcat_tools.importers.crossref import FatcatCrossrefImporter + + +@pytest.fixture(scope="function") +def crossref_importer(): + with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: + yield FatcatCrossrefImporter("http://localhost:9411/v0", issn_file, 'tests/files/example_map.sqlite3') + +def test_crossref_importer_batch(crossref_importer): + with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + crossref_importer.process_batch(f) + +def test_crossref_importer(crossref_importer): + with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + crossref_importer.process_source(f) + +def test_crossref_importer_create(crossref_importer): + crossref_importer.create_containers = True + with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + crossref_importer.process_source(f) + +def test_crossref_dict_parse(crossref_importer): + with open('tests/files/crossref-works.single.json', 'r') as f: + # not a single line + raw = json.loads(f.read()) + (r, c) = crossref_importer.parse_crossref_dict(raw) + extra = r.extra['crossref'] + assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" + assert r.doi == "10.1002/(sici)1097-461x(1998)66:4<261::aid-qua1>3.0.co;2-t" + assert r.publisher == "Wiley-Blackwell" + print(extra) + assert extra['container-title'] == ["International Journal of Quantum Chemistry"] + assert r.release_type == "journal-article" + assert r.release_status == "published" + assert r.isbn13 == "978-3-16-148410-0" + assert 'subtitle' not in extra + assert 'archive' not in extra + assert 'funder' not in extra + assert len(r.contribs) == 5 + assert r.contribs[0].raw_name == "Marcelo D. Radicioni" + assert r.contribs[0].index == 0 + assert r.contribs[1].extra['affiliations'] == ["Some University"] + assert r.contribs[1].role == "author" + assert r.contribs[3].role == "editor" + assert r.contribs[3].index is None + assert r.contribs[4].role == "translator" + assert r.contribs[4].index is None + assert len(r.refs) == 25 + assert r.refs[0].key == "BIB1" + assert r.refs[0].year == 1972 + assert r.refs[0].locator == "1734" + assert r.refs[0].container_name == "J. Chem. Phys." + assert r.refs[0].extra['crossref'] == {"volume": "57", "author": "Swenson", "doi": "10.1063/1.1678462"} + assert r.refs[3].container_name == "Large Order Perturbation Theory and Summation Methods in Quantum Mechanics, Lecture Notes in Chemistry" diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py new file mode 100644 index 00000000..8b268e21 --- /dev/null +++ b/python/tests/import_grobid_metadata.py @@ -0,0 +1,56 @@ + +import os +import json +import base64 +import pytest +from fatcat_tools.importers.grobid_metadata import FatcatGrobidMetadataImporter + +""" +WARNING: these tests are currently very fragile because they have database +side-effects. Should probably be disabled or re-written. +""" + +@pytest.fixture(scope="function") +def grobid_metadata_importer(): + yield FatcatGrobidMetadataImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +#def test_grobid_metadata_importer_batch(grobid_metadata_importer): +# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: +# grobid_metadata_importer.process_batch(f) + +def test_grobid_metadata_parse(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + raw = json.loads(f.readline().split('\t')[4]) + re = grobid_metadata_importer.parse_grobid_json(raw) + assert re + assert re.title == "PEMBELAJARAN FISIKA DENGAN PENDEKATAN KETERAMPILAN PROSES MELALUI METODE DEMONSTRASI MENGGUNAKAN MEDIA RIIL DAN MEDIA VIRTUIL DITINJAU DARI MOTIVASI DAN GAYA BERFIKIR SISWA" + assert len(re.contribs) == 5 + print(re.contribs) + assert re.contribs[0].raw_name == "Wahyu Ary" + assert re.publisher == None + assert re.extra.get('container_name') == None + assert len(re.refs) == 27 + +def test_file_metadata_parse(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + f.readline() + raw = f.readline().split('\t') + # randomize sha1 so tests are repeatable + random_sha1 = "sha1:{}".format(base64.b32encode(os.urandom(20)).decode('utf-8').upper()) + fe = grobid_metadata_importer.parse_file_metadata( + random_sha1, json.loads(raw[1]), raw[2], int(raw[3])) + assert fe + #assert fe.sha1 == "d4a841744719518bf8bdd5d91576ccedc55efbb5" # "sha1:2SUEC5CHDFIYX6F52XMRK5WM5XCV565V" + assert fe.md5 == None + assert fe.mimetype == "application/pdf" + assert fe.size == 142710 + assert fe.urls[1].url.startswith("http://via.library.depaul.edu") + assert fe.urls[1].rel == "web" + assert fe.urls[0].url.startswith("https://web.archive.org/") + assert fe.urls[0].rel == "webarchive" + assert len(fe.releases) == 0 + +def test_grobid_metadata_importer(grobid_metadata_importer): + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + grobid_metadata_importer.process_source(f) diff --git a/python/tests/import_issn.py b/python/tests/import_issn.py new file mode 100644 index 00000000..f45747ed --- /dev/null +++ b/python/tests/import_issn.py @@ -0,0 +1,17 @@ + +import pytest +from fatcat_tools.importers.issn import FatcatIssnImporter + + +@pytest.fixture(scope="function") +def issn_importer(): + yield FatcatIssnImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +def test_issn_importer_batch(issn_importer): + with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: + issn_importer.process_csv_batch(f) + +def test_issn_importer(issn_importer): + with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: + issn_importer.process_csv_source(f) diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py new file mode 100644 index 00000000..8004e3bd --- /dev/null +++ b/python/tests/import_matched.py @@ -0,0 +1,32 @@ + +import json +import pytest +from fatcat_tools.importers.matched import FatcatMatchedImporter + + +@pytest.fixture(scope="function") +def matched_importer(): + yield FatcatMatchedImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +def test_matched_importer_batch(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + matched_importer.process_batch(f) + +def test_matched_importer(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + matched_importer.process_source(f) + +def test_matched_dict_parse(matched_importer): + with open('tests/files/example_matched.json', 'r') as f: + raw = json.loads(f.readline()) + f = matched_importer.parse_matched_dict(raw) + assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" + assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" + assert f.mimetype == "application/pdf" + assert f.size == 255629 + assert f.urls[1].url.startswith("http://journals.plos.org") + assert f.urls[1].rel == "web" + assert f.urls[0].url.startswith("https://web.archive.org/") + assert f.urls[0].rel == "webarchive" + assert len(f.releases) == 1 diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py new file mode 100644 index 00000000..2dc98d76 --- /dev/null +++ b/python/tests/import_orcid.py @@ -0,0 +1,37 @@ + +import json +import pytest +from fatcat_tools.importers.orcid import FatcatOrcidImporter + + +@pytest.fixture(scope="function") +def orcid_importer(): + yield FatcatOrcidImporter("http://localhost:9411/v0") + +# TODO: use API to check that entities actually created... +def test_orcid_importer_batch(orcid_importer): + with open('tests/files/0000-0001-8254-7103.json', 'r') as f: + orcid_importer.process_batch(f) + +def test_orcid_importer_badid(orcid_importer): + with open('tests/files/0000-0001-8254-710X.json', 'r') as f: + orcid_importer.process_batch(f) + +def test_orcid_importer(orcid_importer): + with open('tests/files/0000-0001-8254-7103.json', 'r') as f: + orcid_importer.process_source(f) + +def test_orcid_importer_x(orcid_importer): + with open('tests/files/0000-0003-3953-765X.json', 'r') as f: + orcid_importer.process_source(f) + c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X") + assert c is not None + +def test_orcid_dict_parse(orcid_importer): + with open('tests/files/0000-0001-8254-7103.json', 'r') as f: + raw = json.loads(f.readline()) + c = orcid_importer.parse_orcid_dict(raw) + assert c.given_name == "Man-Hui" + assert c.surname == "Li" + assert c.display_name == "Man-Hui Li" + assert c.orcid == "0000-0001-8254-7103" diff --git a/python/tests/importer.py b/python/tests/importer.py index 0de86635..d98638e4 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -1,7 +1,7 @@ import pytest -from fatcat_tools.importer_common import FatcatImporter +from fatcat_tools.importers.common import FatcatImporter def test_issnl_mapping_lookup(): diff --git a/python/tests/issn.py b/python/tests/issn.py deleted file mode 100644 index 76c8aecb..00000000 --- a/python/tests/issn.py +++ /dev/null @@ -1,17 +0,0 @@ - -import pytest -from fatcat_tools.issn_importer import FatcatIssnImporter - - -@pytest.fixture(scope="function") -def issn_importer(): - yield FatcatIssnImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -def test_issn_importer_batch(issn_importer): - with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: - issn_importer.process_csv_batch(f) - -def test_issn_importer(issn_importer): - with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: - issn_importer.process_csv_source(f) diff --git a/python/tests/matched_importer_test.py b/python/tests/matched_importer_test.py deleted file mode 100644 index 4042eabb..00000000 --- a/python/tests/matched_importer_test.py +++ /dev/null @@ -1,32 +0,0 @@ - -import json -import pytest -from fatcat_tools.matched_importer import FatcatMatchedImporter - - -@pytest.fixture(scope="function") -def matched_importer(): - yield FatcatMatchedImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -def test_matched_importer_batch(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_batch(f) - -def test_matched_importer(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_source(f) - -def test_matched_dict_parse(matched_importer): - with open('tests/files/example_matched.json', 'r') as f: - raw = json.loads(f.readline()) - f = matched_importer.parse_matched_dict(raw) - assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" - assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" - assert f.mimetype == "application/pdf" - assert f.size == 255629 - assert f.urls[1].url.startswith("http://journals.plos.org") - assert f.urls[1].rel == "web" - assert f.urls[0].url.startswith("https://web.archive.org/") - assert f.urls[0].rel == "webarchive" - assert len(f.releases) == 1 diff --git a/python/tests/orcid.py b/python/tests/orcid.py deleted file mode 100644 index f8228cc0..00000000 --- a/python/tests/orcid.py +++ /dev/null @@ -1,37 +0,0 @@ - -import json -import pytest -from fatcat_tools.orcid_importer import FatcatOrcidImporter - - -@pytest.fixture(scope="function") -def orcid_importer(): - yield FatcatOrcidImporter("http://localhost:9411/v0") - -# TODO: use API to check that entities actually created... -def test_orcid_importer_batch(orcid_importer): - with open('tests/files/0000-0001-8254-7103.json', 'r') as f: - orcid_importer.process_batch(f) - -def test_orcid_importer_badid(orcid_importer): - with open('tests/files/0000-0001-8254-710X.json', 'r') as f: - orcid_importer.process_batch(f) - -def test_orcid_importer(orcid_importer): - with open('tests/files/0000-0001-8254-7103.json', 'r') as f: - orcid_importer.process_source(f) - -def test_orcid_importer_x(orcid_importer): - with open('tests/files/0000-0003-3953-765X.json', 'r') as f: - orcid_importer.process_source(f) - c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X") - assert c is not None - -def test_orcid_dict_parse(orcid_importer): - with open('tests/files/0000-0001-8254-7103.json', 'r') as f: - raw = json.loads(f.readline()) - c = orcid_importer.parse_orcid_dict(raw) - assert c.given_name == "Man-Hui" - assert c.surname == "Li" - assert c.display_name == "Man-Hui Li" - assert c.orcid == "0000-0001-8254-7103" diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py index 669c2526..52a9965a 100644 --- a/python/tests/transform_tests.py +++ b/python/tests/transform_tests.py @@ -1,10 +1,10 @@ import json import pytest -from fatcat_tools.crossref_importer import FatcatCrossrefImporter +from fatcat_tools.importers.crossref import FatcatCrossrefImporter from fatcat_tools.transforms import * -from crossref import crossref_importer +from import_crossref import crossref_importer def test_elastic_convert(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: -- cgit v1.2.3