From bb28a3fc1cc900f2dde31e1dbc492d9661034f41 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 15 Nov 2018 13:11:52 -0800 Subject: large refactor of python names/paths - Add __init__.py files for fatcat_tools submodules, and use them in imports - Add a bunch of comments to files. - rename a number of classes and functions to be less verbose --- python/fatcat_tools/workers/__init__.py | 4 +++ python/fatcat_tools/workers/changelog.py | 7 ++-- python/fatcat_tools/workers/elastic.py | 52 ---------------------------- python/fatcat_tools/workers/elasticsearch.py | 52 ++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 55 deletions(-) create mode 100644 python/fatcat_tools/workers/__init__.py delete mode 100644 python/fatcat_tools/workers/elastic.py create mode 100644 python/fatcat_tools/workers/elasticsearch.py (limited to 'python/fatcat_tools/workers') diff --git a/python/fatcat_tools/workers/__init__.py b/python/fatcat_tools/workers/__init__.py new file mode 100644 index 00000000..e8973bc3 --- /dev/null +++ b/python/fatcat_tools/workers/__init__.py @@ -0,0 +1,4 @@ + +from .changelog import ChangelogWorker, EntityUpdatesWorker +from .elasticsearch import ElasticsearchReleaseWorker +from .worker_common import most_recent_message, FatcatWorker diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index d07b5988..e64c043b 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -1,11 +1,12 @@ import json import time -from fatcat_tools.workers.worker_common import FatcatWorker, most_recent_message from pykafka.common import OffsetType +from .worker_common import FatcatWorker, most_recent_message -class FatcatChangelogWorker(FatcatWorker): + +class ChangelogWorker(FatcatWorker): """ Periodically polls the fatcat API looking for new changelogs. When they are found, fetch them and push (as JSON) into a Kafka topic. @@ -52,7 +53,7 @@ class FatcatChangelogWorker(FatcatWorker): time.sleep(self.poll_interval) -class FatcatEntityUpdatesWorker(FatcatWorker): +class EntityUpdatesWorker(FatcatWorker): """ Consumes from the changelog topic and publishes expanded entities (fetched from API) to update topics. diff --git a/python/fatcat_tools/workers/elastic.py b/python/fatcat_tools/workers/elastic.py deleted file mode 100644 index 3a75a1b3..00000000 --- a/python/fatcat_tools/workers/elastic.py +++ /dev/null @@ -1,52 +0,0 @@ - -import json -import time -import requests -from fatcat_tools.transforms import release_elastic_dict -from fatcat_tools.workers.worker_common import FatcatWorker -from fatcat_client import ReleaseEntity -from fatcat_tools.transforms import * -from pykafka.common import OffsetType - - -class FatcatElasticReleaseWorker(FatcatWorker): - """ - Consumes from release-updates topic and pushes into (presumably local) - elasticsearch. - - Uses a consumer group to manage offset. - """ - - def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None, - elastic_backend="http://localhost:9200", elastic_index="fatcat"): - super().__init__(kafka_hosts=kafka_hosts, - consume_topic=consume_topic, - api_host_url=None) - self.consumer_group = "elastic-updates" - self.elastic_backend = elastic_backend - self.elastic_index = elastic_index - - def run(self): - consume_topic = self.kafka.topics[self.consume_topic] - - consumer = consume_topic.get_balanced_consumer( - consumer_group=self.consumer_group, - managed=True, - fetch_message_max_bytes=4000000, # up to ~4MBytes - auto_commit_enable=True, - auto_commit_interval_ms=30000, # 30 seconds - compacted_topic=True, - ) - - for msg in consumer: - json_str = msg.value.decode('utf-8') - release = entity_from_json(json_str, ReleaseEntity) - #print(release) - elastic_endpoint = "{}/{}/release/{}".format( - self.elastic_backend, - self.elastic_index, - release.ident) - print("Updating document: {}".format(elastic_endpoint)) - resp = requests.post(elastic_endpoint, json=release_elastic_dict(release)) - assert resp.status_code in (200, 201) - #consumer.commit_offsets() diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py new file mode 100644 index 00000000..e7abd5ee --- /dev/null +++ b/python/fatcat_tools/workers/elasticsearch.py @@ -0,0 +1,52 @@ + +import json +import time +import requests +from pykafka.common import OffsetType + +from fatcat_client import ReleaseEntity +from fatcat_tools import * +from .worker_common import FatcatWorker + + +class ElasticsearchReleaseWorker(FatcatWorker): + """ + Consumes from release-updates topic and pushes into (presumably local) + elasticsearch. + + Uses a consumer group to manage offset. + """ + + def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None, + elasticsearch_backend="http://localhost:9200", elasticsearch_index="fatcat"): + super().__init__(kafka_hosts=kafka_hosts, + consume_topic=consume_topic, + api_host_url=None) + self.consumer_group = "elasticsearch-updates" + self.elasticsearch_backend = elasticsearch_backend + self.elasticsearch_index = elasticsearch_index + + def run(self): + consume_topic = self.kafka.topics[self.consume_topic] + + consumer = consume_topic.get_balanced_consumer( + consumer_group=self.consumer_group, + managed=True, + fetch_message_max_bytes=4000000, # up to ~4MBytes + auto_commit_enable=True, + auto_commit_interval_ms=30000, # 30 seconds + compacted_topic=True, + ) + + for msg in consumer: + json_str = msg.value.decode('utf-8') + release = entity_from_json(json_str, ReleaseEntity) + #print(release) + elasticsearch_endpoint = "{}/{}/release/{}".format( + self.elasticsearch_backend, + self.elasticsearch_index, + release.ident) + print("Updating document: {}".format(elasticsearch_endpoint)) + resp = requests.post(elasticsearch_endpoint, json=release_to_elasticsearch(release)) + assert resp.status_code in (200, 201) + #consumer.commit_offsets() -- cgit v1.2.3