diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-03-05 00:40:21 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-03-05 00:40:21 -0800 |
commit | 173e5e88de4160a63949ff6e263123c4a25b2017 (patch) | |
tree | 41e757ecd8629d6233c5b8bf01d6b5200a3314af /python | |
parent | 31f2545eb8af78cde9e4e4178489a8630aca0d09 (diff) | |
download | sandcrawler-173e5e88de4160a63949ff6e263123c4a25b2017.tar.gz sandcrawler-173e5e88de4160a63949ff6e263123c4a25b2017.zip |
ingest_tool: force-recrawl arg
Diffstat (limited to 'python')
-rwxr-xr-x | python/ingest_file.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/python/ingest_file.py b/python/ingest_file.py index ba88368..d4fdcac 100755 --- a/python/ingest_file.py +++ b/python/ingest_file.py @@ -15,6 +15,8 @@ def run_single_ingest(args): ext_ids=dict(doi=args.doi), fatcat=dict(release_ident=args.release_id), ) + if args.force_recrawl: + request['force_recrawl'] = True ingester = IngestFileWorker() result = ingester.process(request) print(json.dumps(result, sort_keys=True)) @@ -46,6 +48,9 @@ def main(): help="(optional) existing release ident to match to") sub_single.add_argument('--doi', help="(optional) existing release DOI to match to") + sub_single.add_argument('--force-recrawl', + action='store_true', + help="ignore GWB history and use SPNv2 to re-crawl") sub_single.add_argument('--type', default="pdf", help="type of ingest (pdf, html, etc)") |