diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-18 15:28:08 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-18 15:28:13 -0700 | 
| commit | fb3be0f3a44bf8a727f66a08bded28fa24e23e2b (patch) | |
| tree | b4451b3f12a23fa425d7973f347a6685bbde9f10 | |
| parent | fb53198956843954a981dbbe83b4727b25ae6427 (diff) | |
| download | fatcat-fb3be0f3a44bf8a727f66a08bded28fa24e23e2b.tar.gz fatcat-fb3be0f3a44bf8a727f66a08bded28fa24e23e2b.zip | |
arabesque import tweaks
| -rwxr-xr-x | python/fatcat_import.py | 4 | ||||
| -rw-r--r-- | python/tests/import_arabesque.py | 6 | 
2 files changed, 9 insertions, 1 deletions
| diff --git a/python/fatcat_import.py b/python/fatcat_import.py index f04a63ef..cd325697 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -34,6 +34,7 @@ def run_matched(args):  def run_arabesque_matched(args):      ami = ArabesqueMatchImporter(args.api,          do_updates=args.do_updates, +        require_grobid=(not args.no_require_grobid),          extid_type=args.extid_type,          crawl_id=args.crawl_id,          default_link_rel=args.default_link_rel, @@ -171,6 +172,9 @@ def main():      sub_arabesque_matched.add_argument('--do-updates',          action='store_true',          help="update pre-existing file entities if new match (instead of skipping)") +    sub_arabesque_matched.add_argument('--no-require-grobid', +        action='store_true', +        help="whether postproc_status column must be '200'")      sub_arabesque_matched.add_argument('--extid-type',          default="doi",          help="identifer type in the database (eg, 'doi', 'pmcid'") diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py index 516b0ec2..9d74f96c 100644 --- a/python/tests/import_arabesque.py +++ b/python/tests/import_arabesque.py @@ -1,7 +1,7 @@  import json  import pytest -from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher +from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher, JsonLinePusher  from fixtures import api @@ -13,6 +13,10 @@ def arabesque_importer(api):  def test_arabesque_importer_basic(arabesque_importer):      SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run() +def test_arabesque_importer_json(arabesque_importer): +    with open('tests/files/arabesque_example.json', 'r') as f: +        JsonLinePusher(arabesque_importer, f).run() +  def test_arabesque_importer(arabesque_importer):      last_index = arabesque_importer.api.get_changelog(limit=1)[0].index      arabesque_importer.bezerk_mode = True | 
