summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-28 15:30:06 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-28 15:30:06 -0800
commit1eeecb90a4ce743d4d83c651b9e8c0128541404a (patch)
treeeb63383e10f17ecf0968a0aab26e3f42b218a534 /python/fatcat_tools
parentf6f7450903bdbe36bd5fff146b942e34ad221557 (diff)
downloadfatcat-1eeecb90a4ce743d4d83c651b9e8c0128541404a.tar.gz
fatcat-1eeecb90a4ce743d4d83c651b9e8c0128541404a.zip
fatcat -> fatcat_release ES index
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/transforms.py18
-rw-r--r--python/fatcat_tools/workers/elasticsearch.py5
2 files changed, 13 insertions, 10 deletions
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py
index 7bb75c3e..37cbf1d2 100644
--- a/python/fatcat_tools/transforms.py
+++ b/python/fatcat_tools/transforms.py
@@ -1,23 +1,25 @@
import collections
-from fatcat_client import ReleaseEntity, ApiClient
+from fatcat_client import ApiClient
-def entity_to_dict(entity):
+def entity_to_dict(entity, api_client=None):
"""
Hack to take advantage of the code-generated serialization code
"""
- ac = ApiClient()
- return ac.sanitize_for_serialization(entity)
+ if not api_client:
+ api_client = ApiClient()
+ return api_client.sanitize_for_serialization(entity)
-def entity_from_json(json_str, entity_type):
+def entity_from_json(json_str, entity_type, api_client=None):
"""
Hack to take advantage of the code-generated deserialization code
"""
- ac = ApiClient()
+ if not api_client:
+ api_client = ApiClient()
thing = collections.namedtuple('Thing', ['data'])
thing.data = json_str
- return ac.deserialize(thing, entity_type)
+ return api_client.deserialize(thing, entity_type)
def check_kbart(year, archive):
if not archive or not archive.get('year_spans'):
@@ -284,7 +286,7 @@ def container_to_elasticsearch(entity):
t['is_oa'] = in_doaj or in_road or is_longtail_oa or is_oa
t['is_longtail_oa'] = is_longtail_oa
t['any_kbart'] = any_ia_sim
- t['any_jstor'] = any_ia_sim
+ t['any_jstor'] = any_jstor
t['any_ia_sim'] = bool(any_ia_sim)
return t
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py
index 3adb088e..0a49192e 100644
--- a/python/fatcat_tools/workers/elasticsearch.py
+++ b/python/fatcat_tools/workers/elasticsearch.py
@@ -4,7 +4,7 @@ import time
import requests
from pykafka.common import OffsetType
-from fatcat_client import ReleaseEntity
+from fatcat_client import ReleaseEntity, ApiClient
from fatcat_tools import *
from .worker_common import FatcatWorker
@@ -28,6 +28,7 @@ class ElasticsearchReleaseWorker(FatcatWorker):
def run(self):
consume_topic = self.kafka.topics[self.consume_topic]
+ ac = ApiClient()
consumer = consume_topic.get_balanced_consumer(
consumer_group=self.consumer_group,
@@ -40,7 +41,7 @@ class ElasticsearchReleaseWorker(FatcatWorker):
for msg in consumer:
json_str = msg.value.decode('utf-8')
- release = entity_from_json(json_str, ReleaseEntity)
+ release = entity_from_json(json_str, ReleaseEntity, api_client=ac)
#print(release)
elasticsearch_endpoint = "{}/{}/release/{}".format(
self.elasticsearch_backend,