aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-28 15:30:06 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-28 15:30:06 -0800
commit1eeecb90a4ce743d4d83c651b9e8c0128541404a (patch)
treeeb63383e10f17ecf0968a0aab26e3f42b218a534
parentf6f7450903bdbe36bd5fff146b942e34ad221557 (diff)
downloadfatcat-1eeecb90a4ce743d4d83c651b9e8c0128541404a.tar.gz
fatcat-1eeecb90a4ce743d4d83c651b9e8c0128541404a.zip
fatcat -> fatcat_release ES index
-rwxr-xr-xpython/fatcat_export.py18
-rw-r--r--python/fatcat_tools/transforms.py18
-rw-r--r--python/fatcat_tools/workers/elasticsearch.py5
3 files changed, 21 insertions, 20 deletions
diff --git a/python/fatcat_export.py b/python/fatcat_export.py
index 027d6c0a..33e23202 100755
--- a/python/fatcat_export.py
+++ b/python/fatcat_export.py
@@ -19,19 +19,18 @@ from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \
def run_export_releases(args):
- api = args.api
for line in args.ident_file:
ident = uuid2fcid(line.split()[0])
- release = api.get_release(ident=ident, expand="all")
+ release = args.api.get_release(ident=ident, expand="all")
args.json_output.write(
- json.dumps(entity_to_dict(release)) + "\n")
+ json.dumps(entity_to_dict(release), api_client=args.api.api_client) + "\n")
def run_transform_releases(args):
for line in args.json_input:
line = line.strip()
if not line:
continue
- entity = entity_from_json(line, ReleaseEntity)
+ entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client)
args.json_output.write(
json.dumps(release_to_elasticsearch(entity)) + '\n')
@@ -40,7 +39,7 @@ def run_transform_containers(args):
line = line.strip()
if not line:
continue
- entity = entity_from_json(line, ContainerEntity)
+ entity = entity_from_json(line, ContainerEntity, api_client=args.api.api_client)
args.json_output.write(
json.dumps(container_to_elasticsearch(entity)) + '\n')
@@ -49,21 +48,20 @@ def run_transform_changelogs(args):
line = line.strip()
if not line:
continue
- entity = entity_from_json(line, ChangelogEntry)
+ entity = entity_from_json(line, ChangelogEntry, api_client=args.api.api_client)
args.json_output.write(
json.dumps(changelog_to_elasticsearch(entity)) + '\n')
def run_export_changelog(args):
- api = args.api
end = args.end
if end is None:
- latest = api.get_changelog(limit=1)[0]
+ latest = args.api.get_changelog(limit=1)[0]
end = latest.index
for i in range(args.start, end):
- entry = api.get_changelog_entry(index=i)
+ entry = args.api.get_changelog_entry(index=i)
args.json_output.write(
- json.dumps(entity_to_dict(entry)) + "\n")
+ json.dumps(entity_to_dict(entry, api_client=args.api.api_client)) + "\n")
def main():
parser = argparse.ArgumentParser()
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py
index 7bb75c3e..37cbf1d2 100644
--- a/python/fatcat_tools/transforms.py
+++ b/python/fatcat_tools/transforms.py
@@ -1,23 +1,25 @@
import collections
-from fatcat_client import ReleaseEntity, ApiClient
+from fatcat_client import ApiClient
-def entity_to_dict(entity):
+def entity_to_dict(entity, api_client=None):
"""
Hack to take advantage of the code-generated serialization code
"""
- ac = ApiClient()
- return ac.sanitize_for_serialization(entity)
+ if not api_client:
+ api_client = ApiClient()
+ return api_client.sanitize_for_serialization(entity)
-def entity_from_json(json_str, entity_type):
+def entity_from_json(json_str, entity_type, api_client=None):
"""
Hack to take advantage of the code-generated deserialization code
"""
- ac = ApiClient()
+ if not api_client:
+ api_client = ApiClient()
thing = collections.namedtuple('Thing', ['data'])
thing.data = json_str
- return ac.deserialize(thing, entity_type)
+ return api_client.deserialize(thing, entity_type)
def check_kbart(year, archive):
if not archive or not archive.get('year_spans'):
@@ -284,7 +286,7 @@ def container_to_elasticsearch(entity):
t['is_oa'] = in_doaj or in_road or is_longtail_oa or is_oa
t['is_longtail_oa'] = is_longtail_oa
t['any_kbart'] = any_ia_sim
- t['any_jstor'] = any_ia_sim
+ t['any_jstor'] = any_jstor
t['any_ia_sim'] = bool(any_ia_sim)
return t
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py
index 3adb088e..0a49192e 100644
--- a/python/fatcat_tools/workers/elasticsearch.py
+++ b/python/fatcat_tools/workers/elasticsearch.py
@@ -4,7 +4,7 @@ import time
import requests
from pykafka.common import OffsetType
-from fatcat_client import ReleaseEntity
+from fatcat_client import ReleaseEntity, ApiClient
from fatcat_tools import *
from .worker_common import FatcatWorker
@@ -28,6 +28,7 @@ class ElasticsearchReleaseWorker(FatcatWorker):
def run(self):
consume_topic = self.kafka.topics[self.consume_topic]
+ ac = ApiClient()
consumer = consume_topic.get_balanced_consumer(
consumer_group=self.consumer_group,
@@ -40,7 +41,7 @@ class ElasticsearchReleaseWorker(FatcatWorker):
for msg in consumer:
json_str = msg.value.decode('utf-8')
- release = entity_from_json(json_str, ReleaseEntity)
+ release = entity_from_json(json_str, ReleaseEntity, api_client=ac)
#print(release)
elasticsearch_endpoint = "{}/{}/release/{}".format(
self.elasticsearch_backend,