diff options
Diffstat (limited to 'python/sandcrawler_worker.py')
-rwxr-xr-x | python/sandcrawler_worker.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index c7eca86..52d126a 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -296,6 +296,9 @@ def run_persist_ingest_file(args): def run_persist_crossref(args): + batch_size = 200 + if args.parse_refs: + batch_size = 10 grobid_client = GrobidClient(host_url=args.grobid_host) consume_topic = "fatcat-{}.api-crossref".format(args.env) worker = PersistCrossrefWorker( @@ -310,7 +313,7 @@ def run_persist_crossref(args): group="persist-crossref", push_batches=True, # small batch size because doing GROBID processing - batch_size=20, + batch_size=batch_size, ) pusher.run() |