diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 15:30:06 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 15:30:06 -0800 |
commit | 1eeecb90a4ce743d4d83c651b9e8c0128541404a (patch) | |
tree | eb63383e10f17ecf0968a0aab26e3f42b218a534 /python/fatcat_tools | |
parent | f6f7450903bdbe36bd5fff146b942e34ad221557 (diff) | |
download | fatcat-1eeecb90a4ce743d4d83c651b9e8c0128541404a.tar.gz fatcat-1eeecb90a4ce743d4d83c651b9e8c0128541404a.zip |
fatcat -> fatcat_release ES index
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/transforms.py | 18 | ||||
-rw-r--r-- | python/fatcat_tools/workers/elasticsearch.py | 5 |
2 files changed, 13 insertions, 10 deletions
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py index 7bb75c3e..37cbf1d2 100644 --- a/python/fatcat_tools/transforms.py +++ b/python/fatcat_tools/transforms.py @@ -1,23 +1,25 @@ import collections -from fatcat_client import ReleaseEntity, ApiClient +from fatcat_client import ApiClient -def entity_to_dict(entity): +def entity_to_dict(entity, api_client=None): """ Hack to take advantage of the code-generated serialization code """ - ac = ApiClient() - return ac.sanitize_for_serialization(entity) + if not api_client: + api_client = ApiClient() + return api_client.sanitize_for_serialization(entity) -def entity_from_json(json_str, entity_type): +def entity_from_json(json_str, entity_type, api_client=None): """ Hack to take advantage of the code-generated deserialization code """ - ac = ApiClient() + if not api_client: + api_client = ApiClient() thing = collections.namedtuple('Thing', ['data']) thing.data = json_str - return ac.deserialize(thing, entity_type) + return api_client.deserialize(thing, entity_type) def check_kbart(year, archive): if not archive or not archive.get('year_spans'): @@ -284,7 +286,7 @@ def container_to_elasticsearch(entity): t['is_oa'] = in_doaj or in_road or is_longtail_oa or is_oa t['is_longtail_oa'] = is_longtail_oa t['any_kbart'] = any_ia_sim - t['any_jstor'] = any_ia_sim + t['any_jstor'] = any_jstor t['any_ia_sim'] = bool(any_ia_sim) return t diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py index 3adb088e..0a49192e 100644 --- a/python/fatcat_tools/workers/elasticsearch.py +++ b/python/fatcat_tools/workers/elasticsearch.py @@ -4,7 +4,7 @@ import time import requests from pykafka.common import OffsetType -from fatcat_client import ReleaseEntity +from fatcat_client import ReleaseEntity, ApiClient from fatcat_tools import * from .worker_common import FatcatWorker @@ -28,6 +28,7 @@ class ElasticsearchReleaseWorker(FatcatWorker): def run(self): consume_topic = self.kafka.topics[self.consume_topic] + ac = ApiClient() consumer = consume_topic.get_balanced_consumer( consumer_group=self.consumer_group, @@ -40,7 +41,7 @@ class ElasticsearchReleaseWorker(FatcatWorker): for msg in consumer: json_str = msg.value.decode('utf-8') - release = entity_from_json(json_str, ReleaseEntity) + release = entity_from_json(json_str, ReleaseEntity, api_client=ac) #print(release) elasticsearch_endpoint = "{}/{}/release/{}".format( self.elasticsearch_backend, |