aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-03-10 12:55:47 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-03-10 12:55:47 +0100
commitd18942d1ab4d394bdb275bcf9eb82d1cba814775 (patch)
treefef87b46af1676680830c80a2224ff1889852b93 /python/fatcat_tools
parent4fcdc6d60260ca79693fd7d1ce2ae03065d5ef0c (diff)
downloadfatcat-d18942d1ab4d394bdb275bcf9eb82d1cba814775.tar.gz
fatcat-d18942d1ab4d394bdb275bcf9eb82d1cba814775.zip
common: use smaller batch size since XML parsing may be slow
Address kafka tradeoff between long and short time-outs. Shorter time-outs would facilitate > consumer group re-balances and other consumer group state changes [...] in a reasonable human time-frame.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/common.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 1cb5529e..5f5c46b8 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -735,7 +735,7 @@ class KafkaBs4XmlPusher(RecordPusher):
kafka_namespace=kwargs.get('kafka_namespace', 'fatcat')
)
self.poll_interval = kwargs.get('poll_interval', 5.0)
- self.consume_batch_size = kwargs.get('consume_batch_size', 100)
+ self.consume_batch_size = kwargs.get('consume_batch_size', 25)
def run(self):
count = 0