aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-10-02 18:00:33 -0700
committerBryan Newbold <bnewbold@archive.org>2019-10-02 18:00:33 -0700
commit5c2f5b575e88c3714958634969af3ef403db0ee8 (patch)
tree296d3e926a8d08b06023050efa9250dc2b5ed2ee /python
parent9092f027004095f5cacb5dc870737751397872cc (diff)
downloadsandcrawler-5c2f5b575e88c3714958634969af3ef403db0ee8.tar.gz
sandcrawler-5c2f5b575e88c3714958634969af3ef403db0ee8.zip
grobid_tool.py example usage in docstring
Diffstat (limited to 'python')
-rwxr-xr-xpython/grobid_tool.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/grobid_tool.py b/python/grobid_tool.py
index 352c2fb..3533f43 100755
--- a/python/grobid_tool.py
+++ b/python/grobid_tool.py
@@ -3,6 +3,12 @@
"""
These are generally for running one-off tasks from the command line. Output
might go to stdout, or might go to Kafka topic.
+
+Example of large parallel run, locally:
+
+ cat /srv/sandcrawler/tasks/ungrobided.2019-09-23.json \
+ | parallel -j6 --pipe \
+ ./grobid_tool.py --kafka-env qa --kafka-hosts wbgrp-svc263.us.archive.org:9092,wbgrp-svc284.us.archive.org:9092,wbgrp-svc285.us.archive.org:9092 --kafka-mode --grobid-host http://localhost:8070 -j10 extract-json -
"""
import sys