From fb3be0f3a44bf8a727f66a08bded28fa24e23e2b Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Thu, 18 Apr 2019 15:28:08 -0700
Subject: arabesque import tweaks

---
 python/fatcat_import.py          | 4 ++++
 python/tests/import_arabesque.py | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'python')

diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index f04a63ef..cd325697 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -34,6 +34,7 @@ def run_matched(args):
 def run_arabesque_matched(args):
     ami = ArabesqueMatchImporter(args.api,
         do_updates=args.do_updates,
+        require_grobid=(not args.no_require_grobid),
         extid_type=args.extid_type,
         crawl_id=args.crawl_id,
         default_link_rel=args.default_link_rel,
@@ -171,6 +172,9 @@ def main():
     sub_arabesque_matched.add_argument('--do-updates',
         action='store_true',
         help="update pre-existing file entities if new match (instead of skipping)")
+    sub_arabesque_matched.add_argument('--no-require-grobid',
+        action='store_true',
+        help="whether postproc_status column must be '200'")
     sub_arabesque_matched.add_argument('--extid-type',
         default="doi",
         help="identifer type in the database (eg, 'doi', 'pmcid'")
diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py
index 516b0ec2..9d74f96c 100644
--- a/python/tests/import_arabesque.py
+++ b/python/tests/import_arabesque.py
@@ -1,7 +1,7 @@
 
 import json
 import pytest
-from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher
+from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher, JsonLinePusher
 from fixtures import api
 
 
@@ -13,6 +13,10 @@ def arabesque_importer(api):
 def test_arabesque_importer_basic(arabesque_importer):
     SqlitePusher(arabesque_importer, 'tests/files/arabesque_example.sqlite3', "crawl_result").run()
 
+def test_arabesque_importer_json(arabesque_importer):
+    with open('tests/files/arabesque_example.json', 'r') as f:
+        JsonLinePusher(arabesque_importer, f).run()
+
 def test_arabesque_importer(arabesque_importer):
     last_index = arabesque_importer.api.get_changelog(limit=1)[0].index
     arabesque_importer.bezerk_mode = True
-- 
cgit v1.2.3