diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-01-14 15:26:23 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-14 15:26:23 -0800 | 
| commit | 49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab (patch) | |
| tree | 1dab8e7aa48adc73e00bbffa76fc41956e610321 /python | |
| parent | 578c2bd737856c8cddb4589bb3cb87f1bc9d7b3b (diff) | |
| download | sandcrawler-49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab.tar.gz sandcrawler-49c4f4a4050a76e772f6ef9bf9ca544e2d54e2ab.zip | |
arabesque2ingestrequest: ingest type flag
Diffstat (limited to 'python')
| -rwxr-xr-x | python/scripts/arabesque2ingestrequest.py | 5 | 
1 files changed, 4 insertions, 1 deletions
| diff --git a/python/scripts/arabesque2ingestrequest.py b/python/scripts/arabesque2ingestrequest.py index 5cafdcf..fa46f10 100755 --- a/python/scripts/arabesque2ingestrequest.py +++ b/python/scripts/arabesque2ingestrequest.py @@ -27,7 +27,7 @@ def run(args):          request = {              'base_url': row['final_url'], -            'ingest_type': 'pdf', +            'ingest_type': args.ingest_type,              'link_source': args.link_source,              'link_source_id': row['identifier'],              'ingest_request_source': args.ingest_request_source, @@ -50,6 +50,9 @@ def main():      parser.add_argument('--extid-type',          required=True,          help="extid to encode identifier as") +    parser.add_argument('--ingest-type', +        default="pdf", +        help="ingest type (pdf, html, xml, etc)")      parser.add_argument('--ingest-request-source',          default="arabesque",          help="to include in request") | 
