diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-14 15:26:23 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-14 15:26:23 -0800 |
commit | 49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab (patch) | |
tree | 1dab8e7aa48adc73e00bbffa76fc41956e610321 /python/scripts | |
parent | 578c2bd737856c8cddb4589bb3cb87f1bc9d7b3b (diff) | |
download | sandcrawler-49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab.tar.gz sandcrawler-49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab.zip |
arabesque2ingestrequest: ingest type flag
Diffstat (limited to 'python/scripts')
-rwxr-xr-x | python/scripts/arabesque2ingestrequest.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/scripts/arabesque2ingestrequest.py b/python/scripts/arabesque2ingestrequest.py index 5cafdcf..fa46f10 100755 --- a/python/scripts/arabesque2ingestrequest.py +++ b/python/scripts/arabesque2ingestrequest.py @@ -27,7 +27,7 @@ def run(args): request = { 'base_url': row['final_url'], - 'ingest_type': 'pdf', + 'ingest_type': args.ingest_type, 'link_source': args.link_source, 'link_source_id': row['identifier'], 'ingest_request_source': args.ingest_request_source, @@ -50,6 +50,9 @@ def main(): parser.add_argument('--extid-type', required=True, help="extid to encode identifier as") + parser.add_argument('--ingest-type', + default="pdf", + help="ingest type (pdf, html, xml, etc)") parser.add_argument('--ingest-request-source', default="arabesque", help="to include in request") |