summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/elastic_workers.py
blob: 3d2e9c39f03437613cae31efddd3ed3791916f06 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

import json
import time
import requests
from fatcat.worker_common import FatcatWorker
from fatcat_client.models import ReleaseEntity
from fatcat.entity_helpers import *
from pykafka.common import OffsetType


class FatcatElasticReleaseWorker(FatcatWorker):
    """
    Consumes from release-updates topic and pushes into (presumably local)
    elasticsearch.

    Uses a consumer group to manage offset.
    """

    def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None,
            elastic_backend="http://localhost:9200", elastic_index="fatcat"):
        super().__init__(kafka_hosts=kafka_hosts,
                         consume_topic=consume_topic,
                         api_host_url=None)
        self.consumer_group = "elastic-updates"
        self.elastic_backend = elastic_backend
        self.elastic_index = elastic_index

    def run(self):
        consume_topic = self.kafka.topics[self.consume_topic]

        consumer = consume_topic.get_balanced_consumer(
            consumer_group=self.consumer_group,
            managed=True,
        )

        for msg in consumer:
            json_str = msg.value.decode('utf-8')
            release = entity_from_json(json_str, ReleaseEntity)
            #print(release)
            elastic_endpoint = "{}/{}/release/{}".format(
                self.elastic_backend,
                self.elastic_index,
                release.ident)
            print("Updating document: {}".format(elastic_endpoint))
            resp = requests.post(elastic_endpoint, json=release.to_elastic_dict())
            assert resp.status_code in (200, 201)
            consumer.commit_offsets()