aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-03-19 16:16:04 -0700
committerBryan Newbold <bnewbold@archive.org>2020-03-19 16:16:04 -0700
commit2ede359095660b8b0906cd26fe8eca2a6f429010 (patch)
tree92b12823502201939301d8541c1e91281d32cb7f /python
parent88f337f2cc40824ed3eaf32b1fec17c3b053bfdf (diff)
downloadsandcrawler-2ede359095660b8b0906cd26fe8eca2a6f429010.tar.gz
sandcrawler-2ede359095660b8b0906cd26fe8eca2a6f429010.zip
skip-db option also for worker
Diffstat (limited to 'python')
-rwxr-xr-xpython/sandcrawler_worker.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py
index 0ba4d03..5720f48 100755
--- a/python/sandcrawler_worker.py
+++ b/python/sandcrawler_worker.py
@@ -58,6 +58,7 @@ def run_persist_grobid(args):
s3_access_key=args.s3_access_key,
s3_secret_key=args.s3_secret_key,
s3_only=args.s3_only,
+ db_only=args.db_only,
)
pusher = KafkaJsonPusher(
worker=worker,
@@ -173,6 +174,9 @@ def main():
sub_persist_grobid.add_argument('--s3-only',
action='store_true',
help="only upload TEI-XML to S3 (don't write to database)")
+ sub_persist_grobid.add_argument('--db-only',
+ action='store_true',
+ help="only write status to database (don't upload TEI-XML to S3)")
sub_persist_grobid.set_defaults(func=run_persist_grobid)
sub_persist_pdftrio = subparsers.add_parser('persist-pdftrio',