diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-08 19:25:31 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-09 16:30:12 -0800 |
commit | 318bcf9dbc244a1130b74252b7842cc4eb954bfd (patch) | |
tree | 02aaddbac899a18b271b84774919fc0833c7d6dd | |
parent | 23760a1f67b8cae4cdd9d40c909a6bb2f485e9e8 (diff) | |
download | sandcrawler-318bcf9dbc244a1130b74252b7842cc4eb954bfd.tar.gz sandcrawler-318bcf9dbc244a1130b74252b7842cc4eb954bfd.zip |
cli: allow multiple ingest single types
-rwxr-xr-x | python/ingest_file.py | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/python/ingest_file.py b/python/ingest_file.py index 460b576..ba88368 100755 --- a/python/ingest_file.py +++ b/python/ingest_file.py @@ -10,6 +10,7 @@ from sandcrawler.ingest import IngestFileRequestHandler, IngestFileWorker def run_single_ingest(args): request = dict( + ingest_type=args.type, base_url=args.url, ext_ids=dict(doi=args.doi), fatcat=dict(release_ident=args.release_id), @@ -36,9 +37,6 @@ def run_api(args): def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--api-host-url', - default="http://localhost:9411/v0", - help="fatcat API host/port to use") subparsers = parser.add_subparsers() sub_single= subparsers.add_parser('single', @@ -48,6 +46,9 @@ def main(): help="(optional) existing release ident to match to") sub_single.add_argument('--doi', help="(optional) existing release DOI to match to") + sub_single.add_argument('--type', + default="pdf", + help="type of ingest (pdf, html, etc)") sub_single.add_argument('url', help="URL of paper to fetch") |