diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-18 15:28:08 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-18 15:28:13 -0700 |
commit | fb3be0f3a44bf8a727f66a08bded28fa24e23e2b (patch) | |
tree | b4451b3f12a23fa425d7973f347a6685bbde9f10 | |
parent | fb53198956843954a981dbbe83b4727b25ae6427 (diff) | |
download | fatcat-fb3be0f3a44bf8a727f66a08bded28fa24e23e2b.tar.gz fatcat-fb3be0f3a44bf8a727f66a08bded28fa24e23e2b.zip |
arabesque import tweaks
-rwxr-xr-x | python/fatcat_import.py | 4 | ||||
-rw-r--r-- | python/tests/import_arabesque.py | 6 |
2 files changed, 9 insertions, 1 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index f04a63ef..cd325697 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -34,6 +34,7 @@ def run_matched(args): def run_arabesque_matched(args): ami = ArabesqueMatchImporter(args.api, do_updates=args.do_updates, + require_grobid=(not args.no_require_grobid), extid_type=args.extid_type, crawl_id=args.crawl_id, default_link_rel=args.default_link_rel, @@ -171,6 +172,9 @@ def main(): sub_arabesque_matched.add_argument('--do-updates', action='store_true', help="update pre-existing file entities if new match (instead of skipping)") + sub_arabesque_matched.add_argument('--no-require-grobid', + action='store_true', + help="whether postproc_status column must be '200'") sub_arabesque_matched.add_argument('--extid-type', default="doi", help="identifer type in the database (eg, 'doi', 'pmcid'") diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py index 516b0ec2..9d74f96c 100644 --- a/python/tests/import_arabesque.py +++ b/python/tests/import_arabesque.py @@ -1,7 +1,7 @@ import json import pytest -from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher +from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher, JsonLinePusher from fixtures import api @@ -13,6 +13,10 @@ def arabesque_importer(api): def test_arabesque_importer_basic(arabesque_importer): SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() +def test_arabesque_importer_json(arabesque_importer): + with open('tests/files/arabesque_example.json', 'r') as f: + JsonLinePusher(arabesque_importer, f).run() + def test_arabesque_importer(arabesque_importer): last_index = arabesque_importer.api.get_changelog(limit=1)[0].index arabesque_importer.bezerk_mode = True |