From 173e5e88de4160a63949ff6e263123c4a25b2017 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 5 Mar 2020 00:40:21 -0800 Subject: ingest_tool: force-recrawl arg --- python/ingest_file.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/ingest_file.py b/python/ingest_file.py index ba88368..d4fdcac 100755 --- a/python/ingest_file.py +++ b/python/ingest_file.py @@ -15,6 +15,8 @@ def run_single_ingest(args): ext_ids=dict(doi=args.doi), fatcat=dict(release_ident=args.release_id), ) + if args.force_recrawl: + request['force_recrawl'] = True ingester = IngestFileWorker() result = ingester.process(request) print(json.dumps(result, sort_keys=True)) @@ -46,6 +48,9 @@ def main(): help="(optional) existing release ident to match to") sub_single.add_argument('--doi', help="(optional) existing release DOI to match to") + sub_single.add_argument('--force-recrawl', + action='store_true', + help="ignore GWB history and use SPNv2 to re-crawl") sub_single.add_argument('--type', default="pdf", help="type of ingest (pdf, html, etc)") -- cgit v1.2.3