From 1443f05faebd9e697086132694401f6a6c42d9b5 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 23 Jan 2019 15:02:03 -0800 Subject: more tests; fix some importer behavior --- python/tests/import_crossref.py | 17 ++++++++++++++--- python/tests/import_grobid_metadata.py | 16 ++++++++++++++-- python/tests/import_journal_metadata.py | 13 ++++++++++++- python/tests/import_matched.py | 16 ++++++++++++++-- python/tests/import_orcid.py | 19 ++++++++++++++----- 5 files changed, 68 insertions(+), 13 deletions(-) (limited to 'python/tests') diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index 8eeb8072..45123540 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -23,7 +23,11 @@ def test_crossref_importer(crossref_importer): last_index = crossref_importer.api.get_changelog(limit=1)[0].index with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: crossref_importer.bezerk_mode = True - JsonLinePusher(crossref_importer, f).run() + counts = JsonLinePusher(crossref_importer, f).run() + assert counts['insert'] == 14 + assert counts['exists'] == 0 + assert counts['skip'] == 0 + # fetch most recent editgroup change = crossref_importer.api.get_changelog_entry(index=last_index+1) eg = change.editgroup @@ -32,6 +36,14 @@ def test_crossref_importer(crossref_importer): assert eg.extra['git_rev'] assert "fatcat_tools.CrossrefImporter" in eg.extra['agent'] + with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: + crossref_importer.bezerk_mode = False + crossref_importer.reset() + counts = JsonLinePusher(crossref_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 14 + assert counts['skip'] == 0 + def test_crossref_mappings(crossref_importer): assert crossref_importer.map_release_type('journal-article') == "article-journal" assert crossref_importer.map_release_type('asdf') is None @@ -41,8 +53,7 @@ def test_crossref_mappings(crossref_importer): def test_crossref_importer_create(crossref_importer): crossref_importer.create_containers = True with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: - pusher = JsonLinePusher(crossref_importer, f) - pusher.run() + JsonLinePusher(crossref_importer, f).run() def test_crossref_dict_parse(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py index 698b36be..feb604ce 100644 --- a/python/tests/import_grobid_metadata.py +++ b/python/tests/import_grobid_metadata.py @@ -48,12 +48,15 @@ def test_file_metadata_parse(grobid_metadata_importer): assert fe.urls[0].rel == "webarchive" assert len(fe.release_ids) == 0 -# TODO: use API to check that entities actually created... def test_grobid_metadata_importer(grobid_metadata_importer): last_index = grobid_metadata_importer.api.get_changelog(limit=1)[0].index with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: grobid_metadata_importer.bezerk_mode = True - LinePusher(grobid_metadata_importer, f).run() + counts = LinePusher(grobid_metadata_importer, f).run() + assert counts['insert'] == 10 + assert counts['inserted.release'] == 10 + assert counts['exists'] == 0 + assert counts['skip'] == 0 # fetch most recent editgroup change = grobid_metadata_importer.api.get_changelog_entry(index=last_index+1) @@ -62,3 +65,12 @@ def test_grobid_metadata_importer(grobid_metadata_importer): assert "grobid" in eg.description.lower() assert eg.extra['git_rev'] assert "fatcat_tools.GrobidMetadataImporter" in eg.extra['agent'] + + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + grobid_metadata_importer.reset() + grobid_metadata_importer.bezerk_mode = False + counts = LinePusher(grobid_metadata_importer, f).run() + assert counts['insert'] == 0 + assert counts['inserted.release'] == 0 + assert counts['exists'] == 10 + assert counts['skip'] == 0 diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py index 1663da05..a2b10a65 100644 --- a/python/tests/import_journal_metadata.py +++ b/python/tests/import_journal_metadata.py @@ -17,7 +17,10 @@ def test_journal_metadata_importer(journal_metadata_importer): last_index = journal_metadata_importer.api.get_changelog(limit=1)[0].index with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: journal_metadata_importer.bezerk_mode = True - CsvPusher(journal_metadata_importer, f).run() + counts = CsvPusher(journal_metadata_importer, f).run() + assert counts['insert'] == 9 + assert counts['exists'] == 0 + assert counts['skip'] == 0 # fetch most recent editgroup change = journal_metadata_importer.api.get_changelog_entry(index=last_index+1) @@ -26,3 +29,11 @@ def test_journal_metadata_importer(journal_metadata_importer): assert "container" in eg.description.lower() assert eg.extra['git_rev'] assert "fatcat_tools.JournalMetadataImporter" in eg.extra['agent'] + + with open('tests/files/journal_extra_metadata.snip.csv', 'r') as f: + journal_metadata_importer.reset() + journal_metadata_importer.bezerk_mode = False + counts = CsvPusher(journal_metadata_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 9 + assert counts['skip'] == 0 diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py index 22bc45ad..8f694456 100644 --- a/python/tests/import_matched.py +++ b/python/tests/import_matched.py @@ -10,7 +10,7 @@ def matched_importer(api): yield MatchedImporter(api) # TODO: use API to check that entities actually created... -def test_matched_importer_batch(matched_importer): +def test_matched_importer(matched_importer): with open('tests/files/example_matched.json', 'r') as f: JsonLinePusher(matched_importer, f).run() @@ -18,7 +18,10 @@ def test_matched_importer(matched_importer): last_index = matched_importer.api.get_changelog(limit=1)[0].index with open('tests/files/example_matched.json', 'r') as f: matched_importer.bezerk_mode = True - JsonLinePusher(matched_importer, f).run() + counts = JsonLinePusher(matched_importer, f).run() + assert counts['insert'] == 2 + assert counts['exists'] == 0 + assert counts['skip'] == 11 # fetch most recent editgroup change = matched_importer.api.get_changelog_entry(index=last_index+1) @@ -28,6 +31,15 @@ def test_matched_importer(matched_importer): assert eg.extra['git_rev'] assert "fatcat_tools.MatchedImporter" in eg.extra['agent'] + # re-insert; should skip + with open('tests/files/example_matched.json', 'r') as f: + matched_importer.reset() + matched_importer.bezerk_mode = False + counts = JsonLinePusher(matched_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 2 + assert counts['skip'] == 11 + def test_matched_dict_parse(matched_importer): with open('tests/files/example_matched.json', 'r') as f: raw = json.loads(f.readline()) diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py index 4055091d..57886b52 100644 --- a/python/tests/import_orcid.py +++ b/python/tests/import_orcid.py @@ -11,15 +11,17 @@ def orcid_importer(api): def test_orcid_importer_badid(orcid_importer): with open('tests/files/0000-0001-8254-710X.json', 'r') as f: - pusher = JsonLinePusher(orcid_importer, f) - pusher.run() + JsonLinePusher(orcid_importer, f).run() # TODO: use API to check that entities actually created... def test_orcid_importer(orcid_importer): last_index = orcid_importer.api.get_changelog(limit=1)[0].index with open('tests/files/0000-0001-8254-7103.json', 'r') as f: orcid_importer.bezerk_mode = True - JsonLinePusher(orcid_importer, f).run() + counts = JsonLinePusher(orcid_importer, f).run() + assert counts['insert'] == 1 + assert counts['exists'] == 0 + assert counts['skip'] == 0 # fetch most recent editgroup change = orcid_importer.api.get_changelog_entry(index=last_index+1) @@ -29,10 +31,17 @@ def test_orcid_importer(orcid_importer): assert eg.extra['git_rev'] assert "fatcat_tools.OrcidImporter" in eg.extra['agent'] + with open('tests/files/0000-0001-8254-7103.json', 'r') as f: + orcid_importer.reset() + orcid_importer.bezerk_mode = False + counts = JsonLinePusher(orcid_importer, f).run() + assert counts['insert'] == 0 + assert counts['exists'] == 1 + assert counts['skip'] == 0 + def test_orcid_importer_x(orcid_importer): with open('tests/files/0000-0003-3953-765X.json', 'r') as f: - pusher = JsonLinePusher(orcid_importer, f) - pusher.run() + JsonLinePusher(orcid_importer, f).run() c = orcid_importer.api.lookup_creator(orcid="0000-0003-3953-765X") assert c is not None -- cgit v1.2.3