aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler_worker.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-18 14:52:41 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-18 14:52:43 -0800
commit0ea8bfb3485bc4db5cb7a17397d6c37f407a65b4 (patch)
tree6a421ced28f7cd947dd09215e90811a83e8b3171 /python/sandcrawler_worker.py
parent59776d4175faa3d0b7ff5f25456620b2a84d738e (diff)
downloadsandcrawler-0ea8bfb3485bc4db5cb7a17397d6c37f407a65b4.tar.gz
sandcrawler-0ea8bfb3485bc4db5cb7a17397d6c37f407a65b4.zip
refactor: improve argparse usage
use ArgumentDefaultsHelpFormatter and add help messages to all sub-commands
Diffstat (limited to 'python/sandcrawler_worker.py')
-rwxr-xr-xpython/sandcrawler_worker.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py
index 81aef5b..f314218 100755
--- a/python/sandcrawler_worker.py
+++ b/python/sandcrawler_worker.py
@@ -47,7 +47,8 @@ def run_ingest_file(args):
pusher.run()
def main():
- parser = argparse.ArgumentParser()
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--kafka-hosts',
default="localhost:9092",
help="list of Kafka brokers (host/port) to use")
@@ -59,13 +60,16 @@ def main():
help="GROBID API host/port")
subparsers = parser.add_subparsers()
- sub_grobid_extract = subparsers.add_parser('grobid-extract')
+ sub_grobid_extract = subparsers.add_parser('grobid-extract',
+ help="daemon that consumes CDX JSON objects from Kafka, extracts, pushes to Kafka")
sub_grobid_extract.set_defaults(func=run_grobid_extract)
- sub_grobid_persist = subparsers.add_parser('grobid-persist')
+ sub_grobid_persist = subparsers.add_parser('grobid-persist',
+ help="daemon that consumes GROBID output from Kafka and pushes to minio and postgres")
sub_grobid_persist.set_defaults(func=run_grobid_persist)
- sub_ingest_file = subparsers.add_parser('ingest-file')
+ sub_ingest_file = subparsers.add_parser('ingest-file',
+ help="daemon that consumes requests from Kafka, ingests, pushes results to Kafka")
sub_ingest_file.set_defaults(func=run_ingest_file)
args = parser.parse_args()