diff options
Diffstat (limited to 'python/tests/import_arabesque.py')
-rw-r--r-- | python/tests/import_arabesque.py | 41 |
1 files changed, 25 insertions, 16 deletions
diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py index 6f73d817..90e6d05b 100644 --- a/python/tests/import_arabesque.py +++ b/python/tests/import_arabesque.py @@ -1,4 +1,3 @@ - import json import pytest @@ -11,41 +10,51 @@ from fatcat_tools.importers import ArabesqueMatchImporter, JsonLinePusher, Sqlit def arabesque_importer(api): yield ArabesqueMatchImporter(api, extid_type="doi", crawl_id="DUMMY123") + # TODO: use API to check that entities actually created... def test_arabesque_importer_basic(arabesque_importer): - SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() + SqlitePusher( + arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result" + ).run() + def test_arabesque_importer_json(arabesque_importer): - with open('tests/files/arabesque_example.json', 'r') as f: + with open("tests/files/arabesque_example.json", "r") as f: JsonLinePusher(arabesque_importer, f).run() + def test_arabesque_importer(arabesque_importer): last_index = arabesque_importer.api.get_changelog(limit=1)[0].index arabesque_importer.bezerk_mode = True - counts = SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() - assert counts['insert'] == 1 - assert counts['exists'] == 0 - assert counts['skip'] == 490 + counts = SqlitePusher( + arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result" + ).run() + assert counts["insert"] == 1 + assert counts["exists"] == 0 + assert counts["skip"] == 490 # fetch most recent editgroup - change = arabesque_importer.api.get_changelog_entry(index=last_index+1) + change = arabesque_importer.api.get_changelog_entry(index=last_index + 1) eg = change.editgroup assert eg.description assert "identifier/url seedlist" in eg.description.lower() - assert eg.extra['git_rev'] - assert eg.extra['crawl_id'] == "DUMMY123" - assert "fatcat_tools.ArabesqueMatchImporter" in eg.extra['agent'] + assert eg.extra["git_rev"] + assert eg.extra["crawl_id"] == "DUMMY123" + assert "fatcat_tools.ArabesqueMatchImporter" in eg.extra["agent"] # re-insert; should skip arabesque_importer.reset() arabesque_importer.bezerk_mode = False - counts = SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() - assert counts['insert'] == 0 - assert counts['exists'] == 1 - assert counts['skip'] == 490 + counts = SqlitePusher( + arabesque_importer, "tests/files/arabesque_example.sqlite3", "crawl_result" + ).run() + assert counts["insert"] == 0 + assert counts["exists"] == 1 + assert counts["skip"] == 490 + def test_arabesque_dict_parse(arabesque_importer): - with open('tests/files/arabesque_example.json', 'r') as f: + with open("tests/files/arabesque_example.json", "r") as f: raw = json.loads(f.readline()) f = arabesque_importer.parse_record(raw) assert f.sha1 == "bdd78be55800bb1c9a5e47005bac5e4124793c7b" |