diff options
Diffstat (limited to 'python/tests/import_grobid_metadata.py')
-rw-r--r-- | python/tests/import_grobid_metadata.py | 30 |
1 files changed, 21 insertions, 9 deletions
diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py index 97ebcaef..4fed4aaa 100644 --- a/python/tests/import_grobid_metadata.py +++ b/python/tests/import_grobid_metadata.py @@ -3,7 +3,7 @@ import os import json import base64 import pytest -from fatcat_tools.importers import GrobidMetadataImporter +from fatcat_tools.importers import GrobidMetadataImporter, LinePusher from fixtures import api """ @@ -15,10 +15,6 @@ side-effects. Should probably be disabled or re-written. def grobid_metadata_importer(api): yield GrobidMetadataImporter(api) -# TODO: use API to check that entities actually created... -#def test_grobid_metadata_importer_batch(grobid_metadata_importer): -# with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: -# grobid_metadata_importer.process_batch(f) def test_grobid_metadata_parse(grobid_metadata_importer): with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: @@ -30,7 +26,8 @@ def test_grobid_metadata_parse(grobid_metadata_importer): print(re.contribs) assert re.contribs[0].raw_name == "Wahyu Ary" assert re.publisher == None - assert re.extra.get('container_name') == None + if re.extra: + assert re.extra.get('container_name') == None assert len(re.refs) == 27 def test_file_metadata_parse(grobid_metadata_importer): @@ -53,13 +50,28 @@ def test_file_metadata_parse(grobid_metadata_importer): assert len(fe.release_ids) == 0 def test_grobid_metadata_importer(grobid_metadata_importer): + last_index = grobid_metadata_importer.api.get_changelog(limit=1)[0].index with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: - grobid_metadata_importer.process_source(f) + grobid_metadata_importer.bezerk_mode = True + counts = LinePusher(grobid_metadata_importer, f).run() + assert counts['insert'] == 10 + assert counts['inserted.release'] == 10 + assert counts['exists'] == 0 + assert counts['skip'] == 0 # fetch most recent editgroup - changes = grobid_metadata_importer.api.get_changelog(limit=1) - eg = changes[0].editgroup + change = grobid_metadata_importer.api.get_changelog_entry(index=last_index+1) + eg = change.editgroup assert eg.description assert "grobid" in eg.description.lower() assert eg.extra['git_rev'] assert "fatcat_tools.GrobidMetadataImporter" in eg.extra['agent'] + + with open('tests/files/example_grobid_metadata_lines.tsv', 'r') as f: + grobid_metadata_importer.reset() + grobid_metadata_importer.bezerk_mode = False + counts = LinePusher(grobid_metadata_importer, f).run() + assert counts['insert'] == 0 + assert counts['inserted.release'] == 0 + assert counts['exists'] == 10 + assert counts['skip'] == 0 |