diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-05-03 17:35:52 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-05-03 17:35:52 -0700 |
commit | 1ec661af75f37b3ae5031851f6c452039e08503c (patch) | |
tree | bbd62c19180666d2fccf4a98481ae318bd62f892 /python | |
parent | 621fca6245a5362cead33f71e83d0003aae42cf4 (diff) | |
download | sandcrawler-1ec661af75f37b3ae5031851f6c452039e08503c.tar.gz sandcrawler-1ec661af75f37b3ae5031851f6c452039e08503c.zip |
ingest_tool: fix arg parsing
Diffstat (limited to 'python')
-rwxr-xr-x | python/ingest_tool.py | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/python/ingest_tool.py b/python/ingest_tool.py index ac9ece1..6b59611 100755 --- a/python/ingest_tool.py +++ b/python/ingest_tool.py @@ -70,11 +70,11 @@ def run_file_requests_backfill(args): Can be used to batch re-process known files. """ - grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.kafka_env) - pdftext_topic = "sandcrawler-{}.pdf-text".format(args.kafka_env) - thumbnail_topic = "sandcrawler-{}.pdf-thumbnail-180px-jpg".format(args.kafka_env) - xmldoc_topic = "sandcrawler-{}.xml-doc".format(args.kafka_env) - htmlteixml_topic = "sandcrawler-{}.html-teixml".format(args.kafka_env) + grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.env) + pdftext_topic = "sandcrawler-{}.pdf-text".format(args.env) + thumbnail_topic = "sandcrawler-{}.pdf-thumbnail-180px-jpg".format(args.env) + xmldoc_topic = "sandcrawler-{}.xml-doc".format(args.env) + htmlteixml_topic = "sandcrawler-{}.html-teixml".format(args.env) grobid_sink = KafkaSink( kafka_hosts=args.kafka_hosts, produce_topic=grobid_topic, @@ -143,6 +143,9 @@ def main(): action="store_true", help="report exceptions to Sentry", ) + parser.add_argument( + "--env", default="dev", help="environment (eg, prod, qa, dev)" + ) subparsers = parser.add_subparsers() sub_single = subparsers.add_parser("single", help="ingests a single base URL") @@ -212,9 +215,6 @@ def main(): help="list of Kafka brokers (host/port) to use", ) sub_file_requests_backfill.add_argument( - "--kafka-env", default="dev", help="Kafka topic namespace to use (eg, prod, qa, dev)" - ) - sub_file_requests_backfill.add_argument( "--grobid-host", default="https://grobid.qa.fatcat.wiki", help="GROBID API host/port" ) |