aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-13 23:48:45 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-13 23:48:47 -0800
commit7634f6ecf2361b1cb1cafd4e27fd1fb84d81d130 (patch)
tree69b18860ed4188c5169e9d9cb174355966b6f7de /python/fatcat_tools
parent7edae5c9d2267ba5e381ecbf00a7c3f7dacf4194 (diff)
downloadfatcat-7634f6ecf2361b1cb1cafd4e27fd1fb84d81d130.tar.gz
fatcat-7634f6ecf2361b1cb1cafd4e27fd1fb84d81d130.zip
switch to auto consumer offset updates
This is the classic/correct way to do consumer group updates for higher throughput, when "at least once" semantics are acceptible (as they are here; double processing should be safe/fine).
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/workers/changelog.py7
-rw-r--r--python/fatcat_tools/workers/elastic.py6
2 files changed, 11 insertions, 2 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index 92bb8bdd..6319da2f 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -102,12 +102,17 @@ class FatcatEntityUpdatesWorker(FatcatWorker):
managed=True,
auto_offset_reset=OffsetType.LATEST,
reset_offset_on_start=False,
+ fetch_message_max_bytes=4000000, # up to ~4MBytes
+ auto_commit_enable=True,
+ auto_commit_interval_ms=30000, # 30 seconds
+ compacted_topic=True,
)
with release_topic.get_sync_producer() as producer:
for msg in consumer:
cle = json.loads(msg.value.decode('utf-8'))
#print(cle)
+ print("processing changelog index {}".format(cle['index']))
release_edits = cle['editgroup']['edits']['releases']
for re in release_edits:
ident = re['ident']
@@ -118,5 +123,5 @@ class FatcatEntityUpdatesWorker(FatcatWorker):
partition_key=ident.encode('utf-8'),
timestamp=None,
)
- consumer.commit_offsets()
+ #consumer.commit_offsets()
diff --git a/python/fatcat_tools/workers/elastic.py b/python/fatcat_tools/workers/elastic.py
index 119a6a26..3a75a1b3 100644
--- a/python/fatcat_tools/workers/elastic.py
+++ b/python/fatcat_tools/workers/elastic.py
@@ -32,6 +32,10 @@ class FatcatElasticReleaseWorker(FatcatWorker):
consumer = consume_topic.get_balanced_consumer(
consumer_group=self.consumer_group,
managed=True,
+ fetch_message_max_bytes=4000000, # up to ~4MBytes
+ auto_commit_enable=True,
+ auto_commit_interval_ms=30000, # 30 seconds
+ compacted_topic=True,
)
for msg in consumer:
@@ -45,4 +49,4 @@ class FatcatElasticReleaseWorker(FatcatWorker):
print("Updating document: {}".format(elastic_endpoint))
resp = requests.post(elastic_endpoint, json=release_elastic_dict(release))
assert resp.status_code in (200, 201)
- consumer.commit_offsets()
+ #consumer.commit_offsets()