diff options
Diffstat (limited to 'python/tests/import_crossref.py')
-rw-r--r-- | python/tests/import_crossref.py | 47 |
1 files changed, 33 insertions, 14 deletions
diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index e2ca6122..193f78f6 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -1,35 +1,51 @@ import json import pytest -from fatcat_tools.importers import CrossrefImporter +from fatcat_tools.importers import CrossrefImporter, JsonLinePusher from fixtures import api @pytest.fixture(scope="function") def crossref_importer(api): with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=False) + yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=True) @pytest.fixture(scope="function") def crossref_importer_existing(api): with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file: - yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', check_existing=True) + yield CrossrefImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3', bezerk_mode=False) def test_crossref_importer_batch(crossref_importer): with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_batch(f) + JsonLinePusher(crossref_importer, f).run() def test_crossref_importer(crossref_importer): + last_index = crossref_importer.api.get_changelog(limit=1)[0].index with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_source(f) + crossref_importer.bezerk_mode = True + counts = JsonLinePusher(crossref_importer, f).run() + assert counts['insert'] == 14 + assert counts['exists'] == 0 + assert counts['skip'] == 0 + # fetch most recent editgroup - changes = crossref_importer.api.get_changelog(limit=1) - eg = changes[0].editgroup + change = crossref_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup assert eg.description assert "crossref" in eg.description.lower() assert eg.extra['git_rev'] assert "fatcat_tools.CrossrefImporter" in eg.extra['agent'] + last_index = crossref_importer.api.get_changelog(limit=1)[0].index + with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + crossref_importer.bezerk_mode = False + crossref_importer.reset() + counts = JsonLinePusher(crossref_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 14 + assert counts['skip'] == 0 + assert last_index == crossref_importer.api.get_changelog(limit=1)[0].index + def test_crossref_mappings(crossref_importer): assert crossref_importer.map_release_type('journal-article') == "article-journal" assert crossref_importer.map_release_type('asdf') is None @@ -39,13 +55,13 @@ def test_crossref_mappings(crossref_importer): def test_crossref_importer_create(crossref_importer): crossref_importer.create_containers = True with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - crossref_importer.process_source(f) + JsonLinePusher(crossref_importer, f).run() def test_crossref_dict_parse(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: # not a single line raw = json.loads(f.read()) - (r, c) = crossref_importer.parse_crossref_dict(raw) + r = crossref_importer.parse_record(raw) extra = r.extra['crossref'] assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" assert r.doi == "10.1002/(sici)1097-461x(1998)66:4<261::aid-qua1>3.0.co;2-t" @@ -61,7 +77,8 @@ def test_crossref_dict_parse(crossref_importer): assert len(r.contribs) == 5 assert r.contribs[0].raw_name == "Marcelo D. Radicioni" assert r.contribs[0].index == 0 - assert r.contribs[1].extra['affiliations'] == ["Some University"] + assert r.contribs[1].raw_affiliation == "Some University" + assert r.contribs[1].extra['more_affiliations'] == ["Some Department"] assert r.contribs[1].role == "author" assert r.contribs[3].role == "editor" assert r.contribs[3].index is None @@ -78,8 +95,10 @@ def test_crossref_dict_parse(crossref_importer): def test_stateful_checking(crossref_importer_existing): with open('tests/files/crossref-works.single.json', 'r') as f: # not a single line, a whole document - raw = json.loads(f.read()) + raw = f.read() # might not exist yet... - crossref_importer_existing.process_source([json.dumps(raw)]) - # ok, make sure we get 'None' back - assert crossref_importer_existing.parse_crossref_dict(raw) is None + crossref_importer_existing.push_record(json.loads(raw)) + crossref_importer_existing.finish() + # make sure we wouldn't insert again + entity = crossref_importer_existing.parse_record(json.loads(raw)) + assert crossref_importer_existing.try_update(entity) is False |