From fb3be0f3a44bf8a727f66a08bded28fa24e23e2b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 18 Apr 2019 15:28:08 -0700 Subject: arabesque import tweaks --- python/fatcat_import.py | 4 ++++ python/tests/import_arabesque.py | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/fatcat_import.py b/python/fatcat_import.py index f04a63ef..cd325697 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -34,6 +34,7 @@ def run_matched(args): def run_arabesque_matched(args): ami = ArabesqueMatchImporter(args.api, do_updates=args.do_updates, + require_grobid=(not args.no_require_grobid), extid_type=args.extid_type, crawl_id=args.crawl_id, default_link_rel=args.default_link_rel, @@ -171,6 +172,9 @@ def main(): sub_arabesque_matched.add_argument('--do-updates', action='store_true', help="update pre-existing file entities if new match (instead of skipping)") + sub_arabesque_matched.add_argument('--no-require-grobid', + action='store_true', + help="whether postproc_status column must be '200'") sub_arabesque_matched.add_argument('--extid-type', default="doi", help="identifer type in the database (eg, 'doi', 'pmcid'") diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py index 516b0ec2..9d74f96c 100644 --- a/python/tests/import_arabesque.py +++ b/python/tests/import_arabesque.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher +from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher, JsonLinePusher from fixtures import api @@ -13,6 +13,10 @@ def arabesque_importer(api): def test_arabesque_importer_basic(arabesque_importer): SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() +def test_arabesque_importer_json(arabesque_importer): + with open('tests/files/arabesque_example.json', 'r') as f: + JsonLinePusher(arabesque_importer, f).run() + def test_arabesque_importer(arabesque_importer): last_index = arabesque_importer.api.get_changelog(limit=1)[0].index arabesque_importer.bezerk_mode = True -- cgit v1.2.3