aboutsummaryrefslogtreecommitdiffstats
path: root/python/persist_tool.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-11-01 17:54:42 -0700
committerBryan Newbold <bnewbold@archive.org>2021-11-04 17:19:52 -0700
commitda87108eecfd94e02d949a4fe4fc7998a489b934 (patch)
tree75b93602d3ec49615234ade1fb6d60abe0c21020 /python/persist_tool.py
parent59af5ddd0a9587eaf53b4f6965c0d6290295ce55 (diff)
downloadsandcrawler-da87108eecfd94e02d949a4fe4fc7998a489b934.tar.gz
sandcrawler-da87108eecfd94e02d949a4fe4fc7998a489b934.zip
crossref persist: make GROBID ref parsing an option (not default)
Diffstat (limited to 'python/persist_tool.py')
-rwxr-xr-xpython/persist_tool.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/persist_tool.py b/python/persist_tool.py
index a4f9812..5cf5776 100755
--- a/python/persist_tool.py
+++ b/python/persist_tool.py
@@ -126,6 +126,7 @@ def run_crossref(args):
worker = PersistCrossrefWorker(
db_url=args.db_url,
grobid_client=grobid_client,
+ parse_refs=args.parse_refs,
)
pusher = JsonLinePusher(
worker,
@@ -267,6 +268,11 @@ def main():
sub_crossref.add_argument(
"--grobid-host", default="https://grobid.qa.fatcat.wiki", help="GROBID API host/port"
)
+ sub_crossref.add_argument(
+ "--parse-refs",
+ action="store_true",
+ help="use GROBID to parse any unstructured references (default is to not)",
+ )
args = parser.parse_args()
if not args.__dict__.get("func"):