summaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-23 15:02:03 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-23 15:02:03 -0800
commit1443f05faebd9e697086132694401f6a6c42d9b5 (patch)
tree8da8b8e7f4c957c5edccefe9188741c15697cd46 /python/tests
parent1fa8f820fd3b7c64d424f55796d2b860d22e4b22 (diff)
downloadfatcat-1443f05faebd9e697086132694401f6a6c42d9b5.tar.gz
fatcat-1443f05faebd9e697086132694401f6a6c42d9b5.zip
more tests; fix some importer behavior
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/import_crossref.py17
-rw-r--r--python/tests/import_grobid_metadata.py16
-rw-r--r--python/tests/import_journal_metadata.py13
-rw-r--r--python/tests/import_matched.py16
-rw-r--r--python/tests/import_orcid.py19
5 files changed, 68 insertions, 13 deletions
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index 8eeb8072..45123540 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -23,7 +23,11 @@ def test_crossref_importer(crossref_importer):
last_index = crossref_importer.api.get_changelog(limit=1)[0].index
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
crossref_importer.bezerk_mode = True
- JsonLinePusher(crossref_importer, f).run()
+ counts = JsonLinePusher(crossref_importer, f).run()
+ assert counts['insert'] == 14
+ assert counts['exists'] == 0
+ assert counts['skip'] == 0
+
# fetch most recent editgroup
change = crossref_importer.api.get_changelog_entry(index=last_index+1)
eg = change.editgroup
@@ -32,6 +36,14 @@ def test_crossref_importer(crossref_importer):
assert eg.extra['git_rev']
assert "fatcat_tools.CrossrefImporter" in eg.extra['agent']
+ with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
+ crossref_importer.bezerk_mode = False
+ crossref_importer.reset()
+ counts = JsonLinePusher(crossref_importer, f).run()
+ assert counts['insert'] == 0
+ assert counts['exists'] == 14
+ assert counts['skip'] == 0
+
def test_crossref_mappings(crossref_importer):
assert crossref_importer.map_release_type('journal-article') == "article-journal"
assert crossref_importer.map_release_type('asdf') is None
@@ -41,8 +53,7 @@ def test_crossref_mappings(crossref_importer):
def test_crossref_importer_create(crossref_importer):
crossref_importer.create_containers = True
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
- pusher = JsonLinePusher(crossref_importer, f)
- pusher.run()
+ JsonLinePusher(crossref_importer, f).run()
def test_crossref_dict_parse(crossref_importer):
with open('tests/files/crossref-works.single.json', 'r') as f:
diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py
index 698b36be..feb604ce 100644
--- a/python/tests/import_grobid_metadata.py
+++ b/python/tests/import_grobid_metadata.py
@@ -48,12 +48,15 @@ def test_file_metadata_parse(grobid_metadata_importer):
assert fe.urls[0].rel == "webarchive"
assert len(fe.release_ids) == 0
-# TODO: use API to check that entities actually created...
def test_grobid_metadata_importer(grobid_metadata_importer):
last_index = grobid_metadata_importer.api.get_changelog(limit=1)[0].index
with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
grobid_metadata_importer.bezerk_mode = True
- LinePusher(grobid_metadata_importer, f).run()
+ counts = LinePusher(grobid_metadata_importer, f).run()
+ assert counts['insert'] == 10
+ assert counts['inserted.release'] == 10
+ assert counts['exists'] == 0
+ assert counts['skip'] == 0
# fetch most recent editgroup
change = grobid_metadata_importer.api.get_changelog_entry(index=last_index+1)
@@ -62,3 +65,12 @@ def test_grobid_metadata_importer(grobid_metadata_importer):
assert "grobid" in eg.description.lower()
assert eg.extra['git_rev']
assert "fatcat_tools.GrobidMetadataImporter" in eg.extra['agent']
+
+ with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f:
+ grobid_metadata_importer.reset()
+ grobid_metadata_importer.bezerk_mode = False
+ counts = LinePusher(grobid_metadata_importer, f).run()
+ assert counts['insert'] == 0
+ assert counts['inserted.release'] == 0
+ assert counts['exists'] == 10
+ assert counts['skip'] == 0
diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py
index 1663da05..a2b10a65 100644
--- a/python/tests/import_journal_metadata.py
+++ b/python/tests/import_journal_metadata.py
@@ -17,7 +17,10 @@ def test_journal_metadata_importer(journal_metadata_importer):
last_index = journal_metadata_importer.api.get_changelog(limit=1)[0].index
with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f:
journal_metadata_importer.bezerk_mode = True
- CsvPusher(journal_metadata_importer, f).run()
+ counts = CsvPusher(journal_metadata_importer, f).run()
+ assert counts['insert'] == 9
+ assert counts['exists'] == 0
+ assert counts['skip'] == 0
# fetch most recent editgroup
change = journal_metadata_importer.api.get_changelog_entry(index=last_index+1)
@@ -26,3 +29,11 @@ def test_journal_metadata_importer(journal_metadata_importer):
assert "container" in eg.description.lower()
assert eg.extra['git_rev']
assert "fatcat_tools.JournalMetadataImporter" in eg.extra['agent']
+
+ with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f:
+ journal_metadata_importer.reset()
+ journal_metadata_importer.bezerk_mode = False
+ counts = CsvPusher(journal_metadata_importer, f).run()
+ assert counts['insert'] == 0
+ assert counts['exists'] == 9
+ assert counts['skip'] == 0
diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py
index 22bc45ad..8f694456 100644
--- a/python/tests/import_matched.py
+++ b/python/tests/import_matched.py
@@ -10,7 +10,7 @@ def matched_importer(api):
yield MatchedImporter(api)
# TODO: use API to check that entities actually created...
-def test_matched_importer_batch(matched_importer):
+def test_matched_importer(matched_importer):
with open('tests/files/example_matched.json', 'r') as f:
JsonLinePusher(matched_importer, f).run()
@@ -18,7 +18,10 @@ def test_matched_importer(matched_importer):
last_index = matched_importer.api.get_changelog(limit=1)[0].index
with open('tests/files/example_matched.json', 'r') as f:
matched_importer.bezerk_mode = True
- JsonLinePusher(matched_importer, f).run()
+ counts = JsonLinePusher(matched_importer, f).run()
+ assert counts['insert'] == 2
+ assert counts['exists'] == 0
+ assert counts['skip'] == 11
# fetch most recent editgroup
change = matched_importer.api.get_changelog_entry(index=last_index+1)
@@ -28,6 +31,15 @@ def test_matched_importer(matched_importer):
assert eg.extra['git_rev']
assert "fatcat_tools.MatchedImporter" in eg.extra['agent']
+ # re-insert; should skip
+ with open('tests/files/example_matched.json', 'r') as f:
+ matched_importer.reset()
+ matched_importer.bezerk_mode = False
+ counts = JsonLinePusher(matched_importer, f).run()
+ assert counts['insert'] == 0
+ assert counts['exists'] == 2
+ assert counts['skip'] == 11
+
def test_matched_dict_parse(matched_importer):
with open('tests/files/example_matched.json', 'r') as f:
raw = json.loads(f.readline())
diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py
index 4055091d..57886b52 100644
--- a/python/tests/import_orcid.py
+++ b/python/tests/import_orcid.py
@@ -11,15 +11,17 @@ def orcid_importer(api):
def test_orcid_importer_badid(orcid_importer):
with open('tests/files/0000-0001-8254-710X.json', 'r') as f:
- pusher = JsonLinePusher(orcid_importer, f)
- pusher.run()
+ JsonLinePusher(orcid_importer, f).run()
# TODO: use API to check that entities actually created...
def test_orcid_importer(orcid_importer):
last_index = orcid_importer.api.get_changelog(limit=1)[0].index
with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
orcid_importer.bezerk_mode = True
- JsonLinePusher(orcid_importer, f).run()
+ counts = JsonLinePusher(orcid_importer, f).run()
+ assert counts['insert'] == 1
+ assert counts['exists'] == 0
+ assert counts['skip'] == 0
# fetch most recent editgroup
change = orcid_importer.api.get_changelog_entry(index=last_index+1)
@@ -29,10 +31,17 @@ def test_orcid_importer(orcid_importer):
assert eg.extra['git_rev']
assert "fatcat_tools.OrcidImporter" in eg.extra['agent']
+ with open('tests/files/0000-0001-8254-7103.json', 'r') as f:
+ orcid_importer.reset()
+ orcid_importer.bezerk_mode = False
+ counts = JsonLinePusher(orcid_importer, f).run()
+ assert counts['insert'] == 0
+ assert counts['exists'] == 1
+ assert counts['skip'] == 0
+
def test_orcid_importer_x(orcid_importer):
with open('tests/files/0000-0003-3953-765X.json', 'r') as f:
- pusher = JsonLinePusher(orcid_importer, f)
- pusher.run()
+ JsonLinePusher(orcid_importer, f).run()
c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X")
assert c is not None