aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-03-05 00:40:21 -0800
committerBryan Newbold <bnewbold@archive.org>2020-03-05 00:40:21 -0800
commit173e5e88de4160a63949ff6e263123c4a25b2017 (patch)
tree41e757ecd8629d6233c5b8bf01d6b5200a3314af /python
parent31f2545eb8af78cde9e4e4178489a8630aca0d09 (diff)
downloadsandcrawler-173e5e88de4160a63949ff6e263123c4a25b2017.tar.gz
sandcrawler-173e5e88de4160a63949ff6e263123c4a25b2017.zip
ingest_tool: force-recrawl arg
Diffstat (limited to 'python')
-rwxr-xr-xpython/ingest_file.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/python/ingest_file.py b/python/ingest_file.py
index ba88368..d4fdcac 100755
--- a/python/ingest_file.py
+++ b/python/ingest_file.py
@@ -15,6 +15,8 @@ def run_single_ingest(args):
ext_ids=dict(doi=args.doi),
fatcat=dict(release_ident=args.release_id),
)
+ if args.force_recrawl:
+ request['force_recrawl'] = True
ingester = IngestFileWorker()
result = ingester.process(request)
print(json.dumps(result, sort_keys=True))
@@ -46,6 +48,9 @@ def main():
help="(optional) existing release ident to match to")
sub_single.add_argument('--doi',
help="(optional) existing release DOI to match to")
+ sub_single.add_argument('--force-recrawl',
+ action='store_true',
+ help="ignore GWB history and use SPNv2 to re-crawl")
sub_single.add_argument('--type',
default="pdf",
help="type of ingest (pdf, html, etc)")