aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-04-18 15:28:08 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-04-18 15:28:13 -0700
commitfb3be0f3a44bf8a727f66a08bded28fa24e23e2b (patch)
treeb4451b3f12a23fa425d7973f347a6685bbde9f10
parentfb53198956843954a981dbbe83b4727b25ae6427 (diff)
downloadfatcat-fb3be0f3a44bf8a727f66a08bded28fa24e23e2b.tar.gz
fatcat-fb3be0f3a44bf8a727f66a08bded28fa24e23e2b.zip
arabesque import tweaks
-rwxr-xr-xpython/fatcat_import.py4
-rw-r--r--python/tests/import_arabesque.py6
2 files changed, 9 insertions, 1 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index f04a63ef..cd325697 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -34,6 +34,7 @@ def run_matched(args):
def run_arabesque_matched(args):
ami = ArabesqueMatchImporter(args.api,
do_updates=args.do_updates,
+ require_grobid=(not args.no_require_grobid),
extid_type=args.extid_type,
crawl_id=args.crawl_id,
default_link_rel=args.default_link_rel,
@@ -171,6 +172,9 @@ def main():
sub_arabesque_matched.add_argument('--do-updates',
action='store_true',
help="update pre-existing file entities if new match (instead of skipping)")
+ sub_arabesque_matched.add_argument('--no-require-grobid',
+ action='store_true',
+ help="whether postproc_status column must be '200'")
sub_arabesque_matched.add_argument('--extid-type',
default="doi",
help="identifer type in the database (eg, 'doi', 'pmcid'")
diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py
index 516b0ec2..9d74f96c 100644
--- a/python/tests/import_arabesque.py
+++ b/python/tests/import_arabesque.py
@@ -1,7 +1,7 @@
import json
import pytest
-from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher
+from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher, JsonLinePusher
from fixtures import api
@@ -13,6 +13,10 @@ def arabesque_importer(api):
def test_arabesque_importer_basic(arabesque_importer):
SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run()
+def test_arabesque_importer_json(arabesque_importer):
+ with open('tests/files/arabesque_example.json', 'r') as f:
+ JsonLinePusher(arabesque_importer, f).run()
+
def test_arabesque_importer(arabesque_importer):
last_index = arabesque_importer.api.get_changelog(limit=1)[0].index
arabesque_importer.bezerk_mode = True