summaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-22 22:04:39 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-22 22:04:39 -0800
commite0f70bbbcbcb6232cfb508ad5c0ae637391c4871 (patch)
tree7d7c83a04a3776754476b123d70e23dfa6cf297d /python/tests
parent09475b87821142c5cd36c6b90fb97deb2a058312 (diff)
downloadfatcat-e0f70bbbcbcb6232cfb508ad5c0ae637391c4871.tar.gz
fatcat-e0f70bbbcbcb6232cfb508ad5c0ae637391c4871.zip
refactor remaining importers
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/import_crossref.py7
-rw-r--r--python/tests/import_grobid_metadata.py10
-rw-r--r--python/tests/import_journal_metadata.py8
-rw-r--r--python/tests/import_matched.py9
-rw-r--r--python/tests/import_orcid.py19
-rw-r--r--python/tests/transform_tests.py2
6 files changed, 27 insertions, 28 deletions
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index 5e0a150f..db49bb7f 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -17,13 +17,12 @@ def crossref_importer_existing(api):
def test_crossref_importer_batch(crossref_importer):
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
- pusher = JsonLinePusher(crossref_importer, f)
- pusher.run()
+ JsonLinePusher(crossref_importer, f).run()
def test_crossref_importer(crossref_importer):
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
- pusher = JsonLinePusher(crossref_importer, f)
- pusher.run()
+ crossref_importer.bezerk_mode = True
+ JsonLinePusher(crossref_importer, f).run()
# fetch most recent editgroup
changes = crossref_importer.api.get_changelog(limit=1)
eg = changes[0].editgroup
diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py
index 97ebcaef..f00479d8 100644
--- a/python/tests/import_grobid_metadata.py
+++ b/python/tests/import_grobid_metadata.py
@@ -3,7 +3,7 @@ import os
import json
import base64
import pytest
-from fatcat_tools.importers import GrobidMetadataImporter
+from fatcat_tools.importers import GrobidMetadataImporter, LinePusher
from fixtures import api
"""
@@ -15,10 +15,6 @@ side-effects. Should probably be disabled or re-written.
def grobid_metadata_importer(api):
yield GrobidMetadataImporter(api)
-# TODO: use API to check that entities actually created...
-#def test_grobid_metadata_importer_batch(grobid_metadata_importer):
-# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
-# grobid_metadata_importer.process_batch(f)
def test_grobid_metadata_parse(grobid_metadata_importer):
with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
@@ -52,9 +48,11 @@ def test_file_metadata_parse(grobid_metadata_importer):
assert fe.urls[0].rel == "webarchive"
assert len(fe.release_ids) == 0
+# TODO: use API to check that entities actually created...
def test_grobid_metadata_importer(grobid_metadata_importer):
with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
- grobid_metadata_importer.process_source(f)
+ grobid_metadata_importer.bezerk_mode = True
+ LinePusher(grobid_metadata_importer, f).run()
# fetch most recent editgroup
changes = grobid_metadata_importer.api.get_changelog(limit=1)
diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py
index 81334bc6..0263f706 100644
--- a/python/tests/import_journal_metadata.py
+++ b/python/tests/import_journal_metadata.py
@@ -1,6 +1,6 @@
import pytest
-from fatcat_tools.importers import JournalMetadataImporter
+from fatcat_tools.importers import JournalMetadataImporter, CsvPusher
from fixtures import api
@@ -11,11 +11,13 @@ def journal_metadata_importer(api):
# TODO: use API to check that entities actually created...
def test_journal_metadata_importer_batch(journal_metadata_importer):
with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f:
- journal_metadata_importer.process_csv_batch(f)
+ CsvPusher(journal_metadata_importer, f).run()
def test_journal_metadata_importer(journal_metadata_importer):
with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f:
- journal_metadata_importer.process_csv_source(f)
+ journal_metadata_importer.bezerk_mode = True
+ journal_metadata_importer.serial_mode = True
+ CsvPusher(journal_metadata_importer, f).run()
# fetch most recent editgroup
changes = journal_metadata_importer.api.get_changelog(limit=1)
diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py
index 080674ac..a58c402f 100644
--- a/python/tests/import_matched.py
+++ b/python/tests/import_matched.py
@@ -1,7 +1,7 @@
import json
import pytest
-from fatcat_tools.importers import MatchedImporter
+from fatcat_tools.importers import MatchedImporter, JsonLinePusher
from fixtures import api
@@ -12,11 +12,12 @@ def matched_importer(api):
# TODO: use API to check that entities actually created...
def test_matched_importer_batch(matched_importer):
with open('tests/files/example_matched.json', 'r') as f:
- matched_importer.process_batch(f)
+ JsonLinePusher(matched_importer, f).run()
def test_matched_importer(matched_importer):
with open('tests/files/example_matched.json', 'r') as f:
- matched_importer.process_source(f)
+ matched_importer.bezerk_mode = True
+ JsonLinePusher(matched_importer, f).run()
# fetch most recent editgroup
changes = matched_importer.api.get_changelog(limit=1)
@@ -29,7 +30,7 @@ def test_matched_importer(matched_importer):
def test_matched_dict_parse(matched_importer):
with open('tests/files/example_matched.json', 'r') as f:
raw = json.loads(f.readline())
- f = matched_importer.parse_matched_dict(raw)
+ f = matched_importer.parse_record(raw)
assert f.sha1 == "00242a192acc258bdfdb151943419437f440c313"
assert f.md5 == "f4de91152c7ab9fdc2a128f962faebff"
assert f.mimetype == "application/pdf"
diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py
index 717a1328..9e898521 100644
--- a/python/tests/import_orcid.py
+++ b/python/tests/import_orcid.py
@@ -1,7 +1,7 @@
import json
import pytest
-from fatcat_tools.importers import OrcidImporter
+from fatcat_tools.importers import OrcidImporter, JsonLinePusher
from fixtures import api
@@ -9,18 +9,16 @@ from fixtures import api
def orcid_importer(api):
yield OrcidImporter(api)
-# TODO: use API to check that entities actually created...
-def test_orcid_importer_batch(orcid_importer):
- with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
- orcid_importer.process_batch(f)
-
def test_orcid_importer_badid(orcid_importer):
with open('tests/files/0000-0001-8254-710X.json', 'r') as f:
- orcid_importer.process_batch(f)
+ pusher = JsonLinePusher(orcid_importer, f)
+ pusher.run()
+# TODO: use API to check that entities actually created...
def test_orcid_importer(orcid_importer):
with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
- orcid_importer.process_source(f)
+ orcid_importer.bezerk_mode = True
+ JsonLinePusher(orcid_importer, f).run()
# fetch most recent editgroup
changes = orcid_importer.api.get_changelog(limit=1)
@@ -32,14 +30,15 @@ def test_orcid_importer(orcid_importer):
def test_orcid_importer_x(orcid_importer):
with open('tests/files/0000-0003-3953-765X.json', 'r') as f:
- orcid_importer.process_source(f)
+ pusher = JsonLinePusher(orcid_importer, f)
+ pusher.run()
c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X")
assert c is not None
def test_orcid_dict_parse(orcid_importer):
with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
raw = json.loads(f.readline())
- c = orcid_importer.parse_orcid_dict(raw)
+ c = orcid_importer.parse_record(raw)
assert c.given_name == "Man-Hui"
assert c.surname == "Li"
assert c.display_name == "Man-Hui Li"
diff --git a/python/tests/transform_tests.py b/python/tests/transform_tests.py
index e9d23250..6d6c6c82 100644
--- a/python/tests/transform_tests.py
+++ b/python/tests/transform_tests.py
@@ -11,7 +11,7 @@ def test_elasticsearch_convert(crossref_importer):
with open('tests/files/crossref-works.single.json', 'r') as f:
# not a single line
raw = json.loads(f.read())
- (r, c) = crossref_importer.parse_crossref_dict(raw)
+ r = crossref_importer.parse_record(raw)
r.state = 'active'
release_to_elasticsearch(r)