From 5c2f5b575e88c3714958634969af3ef403db0ee8 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 2 Oct 2019 18:00:33 -0700 Subject: grobid_tool.py example usage in docstring --- python/grobid_tool.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/grobid_tool.py b/python/grobid_tool.py index 352c2fb..3533f43 100755 --- a/python/grobid_tool.py +++ b/python/grobid_tool.py @@ -3,6 +3,12 @@ """ These are generally for running one-off tasks from the command line. Output might go to stdout, or might go to Kafka topic. + +Example of large parallel run, locally: + + cat /srv/sandcrawler/tasks/ungrobided.2019-09-23.json \ + | parallel -j6 --pipe \ + ./grobid_tool.py --kafka-env qa --kafka-hosts wbgrp-svc263.us.archive.org:9092,wbgrp-svc284.us.archive.org:9092,wbgrp-svc285.us.archive.org:9092 --kafka-mode --grobid-host http://localhost:8070 -j10 extract-json - """ import sys -- cgit v1.2.3