aboutsummaryrefslogtreecommitdiffstats
path: root/python/scripts
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-14 15:26:23 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-14 15:26:23 -0800
commit49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab (patch)
tree1dab8e7aa48adc73e00bbffa76fc41956e610321 /python/scripts
parent578c2bd737856c8cddb4589bb3cb87f1bc9d7b3b (diff)
downloadsandcrawler-49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab.tar.gz
sandcrawler-49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab.zip
arabesque2ingestrequest: ingest type flag
Diffstat (limited to 'python/scripts')
-rwxr-xr-xpython/scripts/arabesque2ingestrequest.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/python/scripts/arabesque2ingestrequest.py b/python/scripts/arabesque2ingestrequest.py
index 5cafdcf..fa46f10 100755
--- a/python/scripts/arabesque2ingestrequest.py
+++ b/python/scripts/arabesque2ingestrequest.py
@@ -27,7 +27,7 @@ def run(args):
request = {
'base_url': row['final_url'],
- 'ingest_type': 'pdf',
+ 'ingest_type': args.ingest_type,
'link_source': args.link_source,
'link_source_id': row['identifier'],
'ingest_request_source': args.ingest_request_source,
@@ -50,6 +50,9 @@ def main():
parser.add_argument('--extid-type',
required=True,
help="extid to encode identifier as")
+ parser.add_argument('--ingest-type',
+ default="pdf",
+ help="ingest type (pdf, html, xml, etc)")
parser.add_argument('--ingest-request-source',
default="arabesque",
help="to include in request")