aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-05-03 17:35:52 -0700
committerBryan Newbold <bnewbold@archive.org>2022-05-03 17:35:52 -0700
commit1ec661af75f37b3ae5031851f6c452039e08503c (patch)
treebbd62c19180666d2fccf4a98481ae318bd62f892 /python
parent621fca6245a5362cead33f71e83d0003aae42cf4 (diff)
downloadsandcrawler-1ec661af75f37b3ae5031851f6c452039e08503c.tar.gz
sandcrawler-1ec661af75f37b3ae5031851f6c452039e08503c.zip
ingest_tool: fix arg parsing
Diffstat (limited to 'python')
-rwxr-xr-xpython/ingest_tool.py16
1 files changed, 8 insertions, 8 deletions
diff --git a/python/ingest_tool.py b/python/ingest_tool.py
index ac9ece1..6b59611 100755
--- a/python/ingest_tool.py
+++ b/python/ingest_tool.py
@@ -70,11 +70,11 @@ def run_file_requests_backfill(args):
Can be used to batch re-process known files.
"""
- grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.kafka_env)
- pdftext_topic = "sandcrawler-{}.pdf-text".format(args.kafka_env)
- thumbnail_topic = "sandcrawler-{}.pdf-thumbnail-180px-jpg".format(args.kafka_env)
- xmldoc_topic = "sandcrawler-{}.xml-doc".format(args.kafka_env)
- htmlteixml_topic = "sandcrawler-{}.html-teixml".format(args.kafka_env)
+ grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.env)
+ pdftext_topic = "sandcrawler-{}.pdf-text".format(args.env)
+ thumbnail_topic = "sandcrawler-{}.pdf-thumbnail-180px-jpg".format(args.env)
+ xmldoc_topic = "sandcrawler-{}.xml-doc".format(args.env)
+ htmlteixml_topic = "sandcrawler-{}.html-teixml".format(args.env)
grobid_sink = KafkaSink(
kafka_hosts=args.kafka_hosts,
produce_topic=grobid_topic,
@@ -143,6 +143,9 @@ def main():
action="store_true",
help="report exceptions to Sentry",
)
+ parser.add_argument(
+ "--env", default="dev", help="environment (eg, prod, qa, dev)"
+ )
subparsers = parser.add_subparsers()
sub_single = subparsers.add_parser("single", help="ingests a single base URL")
@@ -212,9 +215,6 @@ def main():
help="list of Kafka brokers (host/port) to use",
)
sub_file_requests_backfill.add_argument(
- "--kafka-env", default="dev", help="Kafka topic namespace to use (eg, prod, qa, dev)"
- )
- sub_file_requests_backfill.add_argument(
"--grobid-host", default="https://grobid.qa.fatcat.wiki", help="GROBID API host/port"
)