diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-11-01 17:54:42 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-11-04 17:19:52 -0700 |
commit | da87108eecfd94e02d949a4fe4fc7998a489b934 (patch) | |
tree | 75b93602d3ec49615234ade1fb6d60abe0c21020 /python/persist_tool.py | |
parent | 59af5ddd0a9587eaf53b4f6965c0d6290295ce55 (diff) | |
download | sandcrawler-da87108eecfd94e02d949a4fe4fc7998a489b934.tar.gz sandcrawler-da87108eecfd94e02d949a4fe4fc7998a489b934.zip |
crossref persist: make GROBID ref parsing an option (not default)
Diffstat (limited to 'python/persist_tool.py')
-rwxr-xr-x | python/persist_tool.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/python/persist_tool.py b/python/persist_tool.py index a4f9812..5cf5776 100755 --- a/python/persist_tool.py +++ b/python/persist_tool.py @@ -126,6 +126,7 @@ def run_crossref(args): worker = PersistCrossrefWorker( db_url=args.db_url, grobid_client=grobid_client, + parse_refs=args.parse_refs, ) pusher = JsonLinePusher( worker, @@ -267,6 +268,11 @@ def main(): sub_crossref.add_argument( "--grobid-host", default="https://grobid.qa.fatcat.wiki", help="GROBID API host/port" ) + sub_crossref.add_argument( + "--parse-refs", + action="store_true", + help="use GROBID to parse any unstructured references (default is to not)", + ) args = parser.parse_args() if not args.__dict__.get("func"): |