aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/import_crossref.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/tests/import_crossref.py')
-rw-r--r--python/tests/import_crossref.py47
1 files changed, 33 insertions, 14 deletions
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py
index e2ca6122..193f78f6 100644
--- a/python/tests/import_crossref.py
+++ b/python/tests/import_crossref.py
@@ -1,35 +1,51 @@
import json
import pytest
-from fatcat_tools.importers import CrossrefImporter
+from fatcat_tools.importers import CrossrefImporter, JsonLinePusher
from fixtures import api
@pytest.fixture(scope="function")
def crossref_importer(api):
with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=False)
+ yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True)
@pytest.fixture(scope="function")
def crossref_importer_existing(api):
with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
- yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=True)
+ yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False)
def test_crossref_importer_batch(crossref_importer):
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
- crossref_importer.process_batch(f)
+ JsonLinePusher(crossref_importer, f).run()
def test_crossref_importer(crossref_importer):
+ last_index = crossref_importer.api.get_changelog(limit=1)[0].index
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
- crossref_importer.process_source(f)
+ crossref_importer.bezerk_mode = True
+ counts = JsonLinePusher(crossref_importer, f).run()
+ assert counts['insert'] == 14
+ assert counts['exists'] == 0
+ assert counts['skip'] == 0
+
# fetch most recent editgroup
- changes = crossref_importer.api.get_changelog(limit=1)
- eg = changes[0].editgroup
+ change = crossref_importer.api.get_changelog_entry(index=last_index+1)
+ eg = change.editgroup
assert eg.description
assert "crossref" in eg.description.lower()
assert eg.extra['git_rev']
assert "fatcat_tools.CrossrefImporter" in eg.extra['agent']
+ last_index = crossref_importer.api.get_changelog(limit=1)[0].index
+ with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
+ crossref_importer.bezerk_mode = False
+ crossref_importer.reset()
+ counts = JsonLinePusher(crossref_importer, f).run()
+ assert counts['insert'] == 0
+ assert counts['exists'] == 14
+ assert counts['skip'] == 0
+ assert last_index == crossref_importer.api.get_changelog(limit=1)[0].index
+
def test_crossref_mappings(crossref_importer):
assert crossref_importer.map_release_type('journal-article') == "article-journal"
assert crossref_importer.map_release_type('asdf') is None
@@ -39,13 +55,13 @@ def test_crossref_mappings(crossref_importer):
def test_crossref_importer_create(crossref_importer):
crossref_importer.create_containers = True
with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:
- crossref_importer.process_source(f)
+ JsonLinePusher(crossref_importer, f).run()
def test_crossref_dict_parse(crossref_importer):
with open('tests/files/crossref-works.single.json', 'r') as f:
# not a single line
raw = json.loads(f.read())
- (r, c) = crossref_importer.parse_crossref_dict(raw)
+ r = crossref_importer.parse_record(raw)
extra = r.extra['crossref']
assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators"
assert r.doi == "10.1002/(sici)1097-461x(1998)66:4<261::aid-qua1>3.0.co;2-t"
@@ -61,7 +77,8 @@ def test_crossref_dict_parse(crossref_importer):
assert len(r.contribs) == 5
assert r.contribs[0].raw_name == "Marcelo D. Radicioni"
assert r.contribs[0].index == 0
- assert r.contribs[1].extra['affiliations'] == ["Some University"]
+ assert r.contribs[1].raw_affiliation == "Some University"
+ assert r.contribs[1].extra['more_affiliations'] == ["Some Department"]
assert r.contribs[1].role == "author"
assert r.contribs[3].role == "editor"
assert r.contribs[3].index is None
@@ -78,8 +95,10 @@ def test_crossref_dict_parse(crossref_importer):
def test_stateful_checking(crossref_importer_existing):
with open('tests/files/crossref-works.single.json', 'r') as f:
# not a single line, a whole document
- raw = json.loads(f.read())
+ raw = f.read()
# might not exist yet...
- crossref_importer_existing.process_source([json.dumps(raw)])
- # ok, make sure we get 'None' back
- assert crossref_importer_existing.parse_crossref_dict(raw) is None
+ crossref_importer_existing.push_record(json.loads(raw))
+ crossref_importer_existing.finish()
+ # make sure we wouldn't insert again
+ entity = crossref_importer_existing.parse_record(json.loads(raw))
+ assert crossref_importer_existing.try_update(entity) is False