From e0f70bbbcbcb6232cfb508ad5c0ae637391c4871 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 22 Jan 2019 22:04:39 -0800 Subject: refactor remaining importers --- python/tests/import_crossref.py | 7 +++---- python/tests/import_grobid_metadata.py | 10 ++++------ python/tests/import_journal_metadata.py | 8 +++++--- python/tests/import_matched.py | 9 +++++---- python/tests/import_orcid.py | 19 +++++++++---------- python/tests/transform_tests.py | 2 +- 6 files changed, 27 insertions(+), 28 deletions(-) (limited to 'python/tests') diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index 5e0a150f..db49bb7f 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -17,13 +17,12 @@ def crossref_importer_existing(api): def test_crossref_importer_batch(crossref_importer): with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - pusher = JsonLinePusher(crossref_importer, f) - pusher.run() + JsonLinePusher(crossref_importer, f).run() def test_crossref_importer(crossref_importer): with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - pusher = JsonLinePusher(crossref_importer, f) - pusher.run() + crossref_importer.bezerk_mode = True + JsonLinePusher(crossref_importer, f).run() # fetch most recent editgroup changes = crossref_importer.api.get_changelog(limit=1) eg = changes[0].editgroup diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py index 97ebcaef..f00479d8 100644 --- a/python/tests/import_grobid_metadata.py +++ b/python/tests/import_grobid_metadata.py @@ -3,7 +3,7 @@ import os import json import base64 import pytest -from fatcat_tools.importers import GrobidMetadataImporter +from fatcat_tools.importers import GrobidMetadataImporter, LinePusher from fixtures import api """ @@ -15,10 +15,6 @@ side-effects. Should probably be disabled or re-written. def grobid_metadata_importer(api): yield GrobidMetadataImporter(api) -# TODO: use API to check that entities actually created... -#def test_grobid_metadata_importer_batch(grobid_metadata_importer): -# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: -# grobid_metadata_importer.process_batch(f) def test_grobid_metadata_parse(grobid_metadata_importer): with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: @@ -52,9 +48,11 @@ def test_file_metadata_parse(grobid_metadata_importer): assert fe.urls[0].rel == "webarchive" assert len(fe.release_ids) == 0 +# TODO: use API to check that entities actually created... def test_grobid_metadata_importer(grobid_metadata_importer): with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - grobid_metadata_importer.process_source(f) + grobid_metadata_importer.bezerk_mode = True + LinePusher(grobid_metadata_importer, f).run() # fetch most recent editgroup changes = grobid_metadata_importer.api.get_changelog(limit=1) diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py index 81334bc6..0263f706 100644 --- a/python/tests/import_journal_metadata.py +++ b/python/tests/import_journal_metadata.py @@ -1,6 +1,6 @@ import pytest -from fatcat_tools.importers import JournalMetadataImporter +from fatcat_tools.importers import JournalMetadataImporter, CsvPusher from fixtures import api @@ -11,11 +11,13 @@ def journal_metadata_importer(api): # TODO: use API to check that entities actually created... def test_journal_metadata_importer_batch(journal_metadata_importer): with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: - journal_metadata_importer.process_csv_batch(f) + CsvPusher(journal_metadata_importer, f).run() def test_journal_metadata_importer(journal_metadata_importer): with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: - journal_metadata_importer.process_csv_source(f) + journal_metadata_importer.bezerk_mode = True + journal_metadata_importer.serial_mode = True + CsvPusher(journal_metadata_importer, f).run() # fetch most recent editgroup changes = journal_metadata_importer.api.get_changelog(limit=1) diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py index 080674ac..a58c402f 100644 --- a/python/tests/import_matched.py +++ b/python/tests/import_matched.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.importers import MatchedImporter +from fatcat_tools.importers import MatchedImporter, JsonLinePusher from fixtures import api @@ -12,11 +12,12 @@ def matched_importer(api): # TODO: use API to check that entities actually created... def test_matched_importer_batch(matched_importer): with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_batch(f) + JsonLinePusher(matched_importer, f).run() def test_matched_importer(matched_importer): with open('tests/files/example_matched.json', 'r') as f: - matched_importer.process_source(f) + matched_importer.bezerk_mode = True + JsonLinePusher(matched_importer, f).run() # fetch most recent editgroup changes = matched_importer.api.get_changelog(limit=1) @@ -29,7 +30,7 @@ def test_matched_importer(matched_importer): def test_matched_dict_parse(matched_importer): with open('tests/files/example_matched.json', 'r') as f: raw = json.loads(f.readline()) - f = matched_importer.parse_matched_dict(raw) + f = matched_importer.parse_record(raw) assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313" assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff" assert f.mimetype == "application/pdf" diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py index 717a1328..9e898521 100644 --- a/python/tests/import_orcid.py +++ b/python/tests/import_orcid.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.importers import OrcidImporter +from fatcat_tools.importers import OrcidImporter, JsonLinePusher from fixtures import api @@ -9,18 +9,16 @@ from fixtures import api def orcid_importer(api): yield OrcidImporter(api) -# TODO: use API to check that entities actually created... -def test_orcid_importer_batch(orcid_importer): - with open('tests/files/0000-0001-8254-7103.json', 'r') as f: - orcid_importer.process_batch(f) - def test_orcid_importer_badid(orcid_importer): with open('tests/files/0000-0001-8254-710X.json', 'r') as f: - orcid_importer.process_batch(f) + pusher = JsonLinePusher(orcid_importer, f) + pusher.run() +# TODO: use API to check that entities actually created... def test_orcid_importer(orcid_importer): with open('tests/files/0000-0001-8254-7103.json', 'r') as f: - orcid_importer.process_source(f) + orcid_importer.bezerk_mode = True + JsonLinePusher(orcid_importer, f).run() # fetch most recent editgroup changes = orcid_importer.api.get_changelog(limit=1) @@ -32,14 +30,15 @@ def test_orcid_importer(orcid_importer): def test_orcid_importer_x(orcid_importer): with open('tests/files/0000-0003-3953-765X.json', 'r') as f: - orcid_importer.process_source(f) + pusher = JsonLinePusher(orcid_importer, f) + pusher.run() c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X") assert c is not None def test_orcid_dict_parse(orcid_importer): with open('tests/files/0000-0001-8254-7103.json', 'r') as f: raw = json.loads(f.readline()) - c = orcid_importer.parse_orcid_dict(raw) + c = orcid_importer.parse_record(raw) assert c.given_name == "Man-Hui" assert c.surname == "Li" assert c.display_name == "Man-Hui Li" diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py index e9d23250..6d6c6c82 100644 --- a/python/tests/transform_tests.py +++ b/python/tests/transform_tests.py @@ -11,7 +11,7 @@ def test_elasticsearch_convert(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: # not a single line raw = json.loads(f.read()) - (r, c) = crossref_importer.parse_crossref_dict(raw) + r = crossref_importer.parse_record(raw) r.state = 'active' release_to_elasticsearch(r) -- cgit v1.2.3