aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-08 19:25:31 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-09 16:30:12 -0800
commit318bcf9dbc244a1130b74252b7842cc4eb954bfd (patch)
tree02aaddbac899a18b271b84774919fc0833c7d6dd /python
parent23760a1f67b8cae4cdd9d40c909a6bb2f485e9e8 (diff)
downloadsandcrawler-318bcf9dbc244a1130b74252b7842cc4eb954bfd.tar.gz
sandcrawler-318bcf9dbc244a1130b74252b7842cc4eb954bfd.zip
cli: allow multiple ingest single types
Diffstat (limited to 'python')
-rwxr-xr-xpython/ingest_file.py7
1 files changed, 4 insertions, 3 deletions
diff --git a/python/ingest_file.py b/python/ingest_file.py
index 460b576..ba88368 100755
--- a/python/ingest_file.py
+++ b/python/ingest_file.py
@@ -10,6 +10,7 @@ from sandcrawler.ingest import IngestFileRequestHandler, IngestFileWorker
def run_single_ingest(args):
request = dict(
+ ingest_type=args.type,
base_url=args.url,
ext_ids=dict(doi=args.doi),
fatcat=dict(release_ident=args.release_id),
@@ -36,9 +37,6 @@ def run_api(args):
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser.add_argument('--api-host-url',
- default="http://localhost:9411/v0",
- help="fatcat API host/port to use")
subparsers = parser.add_subparsers()
sub_single= subparsers.add_parser('single',
@@ -48,6 +46,9 @@ def main():
help="(optional) existing release ident to match to")
sub_single.add_argument('--doi',
help="(optional) existing release DOI to match to")
+ sub_single.add_argument('--type',
+ default="pdf",
+ help="type of ingest (pdf, html, etc)")
sub_single.add_argument('url',
help="URL of paper to fetch")