aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-11-21 17:30:31 -0800
committerBryan Newbold <bnewbold@archive.org>2018-11-21 17:30:31 -0800
commit83cb9717d4790e7540308d179f59723633157d07 (patch)
treeed78ad907873deb3c0990878142aabeb0e1ed73e /python
parent35686c9f5fc671bd31e8fd605c401a4377b30910 (diff)
downloadsandcrawler-83cb9717d4790e7540308d179f59723633157d07.tar.gz
sandcrawler-83cb9717d4790e7540308d179f59723633157d07.zip
fix kafka grobid command line topic parsing
Diffstat (limited to 'python')
-rwxr-xr-xpython/kafka_grobid.py4
-rwxr-xr-xpython/kafka_grobid_hbase.py8
2 files changed, 9 insertions, 3 deletions
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py
index ada7264..d48c175 100755
--- a/python/kafka_grobid.py
+++ b/python/kafka_grobid.py
@@ -261,10 +261,10 @@ def main():
default="qa",
help="eg, 'qa' or 'prod'")
parser.add_argument('--consume-topic',
- default="sandcrawler-qa.ungrobided",
+ default=None,
help="Kafka topic to consume from")
parser.add_argument('--produce-topic',
- default="sandcrawler-qa.grobid-output",
+ default=None,
help="Kafka topic to produce to")
parser.add_argument('--grobid-uri',
type=str,
diff --git a/python/kafka_grobid_hbase.py b/python/kafka_grobid_hbase.py
index f884558..5241920 100755
--- a/python/kafka_grobid_hbase.py
+++ b/python/kafka_grobid_hbase.py
@@ -153,8 +153,11 @@ def main():
parser.add_argument('--kafka-hosts',
default="localhost:9092",
help="list of Kafka brokers (host/port) to use")
+ parser.add_argument('--kafka-env',
+ default="qa",
+ help="eg, 'qa' or 'prod'")
parser.add_argument('--consume-topic',
- default="sandcrawler-qa.grobid-output",
+ default=None,
help="Kafka topic to consume from")
parser.add_argument('--hbase-table',
type=str,
@@ -166,6 +169,9 @@ def main():
help='HBase thrift API host to connect to')
args = parser.parse_args()
+ if args.consume_topic is None:
+ args.consume_topic = "sandcrawler-{}.ungrobided".format(args.kafka_env)
+
worker = KafkaGrobidHbaseWorker(**args.__dict__)
worker.run()