diff options
Diffstat (limited to 'python')
| -rwxr-xr-x | python/persist_tool.py | 5 | ||||
| -rwxr-xr-x | python/sandcrawler_worker.py | 5 | 
2 files changed, 8 insertions, 2 deletions
diff --git a/python/persist_tool.py b/python/persist_tool.py index 5cf5776..069bef7 100755 --- a/python/persist_tool.py +++ b/python/persist_tool.py @@ -120,6 +120,9 @@ def run_ingest_request(args):  def run_crossref(args): +    batch_size = 200 +    if args.parse_refs: +        batch_size = 10      grobid_client = GrobidClient(          host_url=args.grobid_host,      ) @@ -131,7 +134,7 @@ def run_crossref(args):      pusher = JsonLinePusher(          worker,          args.json_file, -        batch_size=10, +        batch_size=batch_size,      )      pusher.run() diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index c7eca86..52d126a 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -296,6 +296,9 @@ def run_persist_ingest_file(args):  def run_persist_crossref(args): +    batch_size = 200 +    if args.parse_refs: +        batch_size = 10      grobid_client = GrobidClient(host_url=args.grobid_host)      consume_topic = "fatcat-{}.api-crossref".format(args.env)      worker = PersistCrossrefWorker( @@ -310,7 +313,7 @@ def run_persist_crossref(args):          group="persist-crossref",          push_batches=True,          # small batch size because doing GROBID processing -        batch_size=20, +        batch_size=batch_size,      )      pusher.run()  | 
