summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/common.py')
-rw-r--r--python/fatcat_tools/importers/common.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 18594884..65976a21 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -5,16 +5,33 @@ import csv
import json
import itertools
from collections import Counter
+import pykafka
import fatcat_client
from fatcat_client.rest import ApiException
+
# from: https://docs.python.org/3/library/itertools.html
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
args = [iter(iterable)] * n
return itertools.zip_longest(*args, fillvalue=fillvalue)
+def make_kafka_consumer(hosts, env, topic_suffix, group):
+ topic_name = "fatcat-{}.{}".format(env, topic_suffix).encode('utf-8')
+ client = pykafka.KafkaClient(hosts=hosts, broker_version="1.0.0")
+ consume_topic = client.topics[topic_name]
+ print("Consuming from kafka topic {}, group {}".format(topic_name, group))
+
+ consumer = consume_topic.get_balanced_consumer(
+ consumer_group=group.encode('utf-8'),
+ managed=True,
+ auto_commit_enable=True,
+ auto_commit_interval_ms=30000, # 30 seconds
+ compacted_topic=True,
+ )
+ return consumer
+
class FatcatImporter:
"""
Base class for fatcat importers