aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-04 16:12:59 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-15 18:15:25 -0700
commit271f110e5ad4091e8d683b4365bc565ae0466916 (patch)
treee600bc81e27e4ccae6e616810f7b7d5868b40885
parent4b3d6cb79a7182be4976aab34db251ecbcbd2665 (diff)
downloadsandcrawler-271f110e5ad4091e8d683b4365bc565ae0466916.tar.gz
sandcrawler-271f110e5ad4091e8d683b4365bc565ae0466916.zip
ingest tool: always require ingest type as part of 'single' command
-rwxr-xr-xpython/ingest_tool.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/python/ingest_tool.py b/python/ingest_tool.py
index 2a7a1a2..45e1812 100755
--- a/python/ingest_tool.py
+++ b/python/ingest_tool.py
@@ -62,6 +62,9 @@ def main():
sub_single= subparsers.add_parser('single',
help="ingests a single base URL")
sub_single.set_defaults(func=run_single_ingest)
+ sub_single.add_argument('ingest_type',
+ default="pdf",
+ help="type of ingest (pdf, html, etc)")
sub_single.add_argument('--release-id',
help="(optional) existing release ident to match to")
sub_single.add_argument('--doi',
@@ -72,9 +75,6 @@ def main():
sub_single.add_argument('--no-spn2',
action='store_true',
help="don't use live web (SPNv2)")
- sub_single.add_argument('--ingest-type',
- default="pdf",
- help="type of ingest (pdf, html, etc)")
sub_single.add_argument('--html-quick-mode',
action='store_true',
help="don't fetch individual sub-resources, just use CDX")