diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 15:30:06 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 15:30:06 -0800 | 
| commit | 1eeecb90a4ce743d4d83c651b9e8c0128541404a (patch) | |
| tree | eb63383e10f17ecf0968a0aab26e3f42b218a534 /python | |
| parent | f6f7450903bdbe36bd5fff146b942e34ad221557 (diff) | |
| download | fatcat-1eeecb90a4ce743d4d83c651b9e8c0128541404a.tar.gz fatcat-1eeecb90a4ce743d4d83c651b9e8c0128541404a.zip | |
fatcat -> fatcat_release ES index
Diffstat (limited to 'python')
| -rwxr-xr-x | python/fatcat_export.py | 18 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms.py | 18 | ||||
| -rw-r--r-- | python/fatcat_tools/workers/elasticsearch.py | 5 | 
3 files changed, 21 insertions, 20 deletions
| diff --git a/python/fatcat_export.py b/python/fatcat_export.py index 027d6c0a..33e23202 100755 --- a/python/fatcat_export.py +++ b/python/fatcat_export.py @@ -19,19 +19,18 @@ from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \  def run_export_releases(args): -    api = args.api      for line in args.ident_file:          ident = uuid2fcid(line.split()[0]) -        release = api.get_release(ident=ident, expand="all") +        release = args.api.get_release(ident=ident, expand="all")          args.json_output.write( -            json.dumps(entity_to_dict(release)) + "\n") +            json.dumps(entity_to_dict(release), api_client=args.api.api_client) + "\n")  def run_transform_releases(args):      for line in args.json_input:          line = line.strip()          if not line:              continue -        entity = entity_from_json(line, ReleaseEntity) +        entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client)          args.json_output.write(              json.dumps(release_to_elasticsearch(entity)) + '\n') @@ -40,7 +39,7 @@ def run_transform_containers(args):          line = line.strip()          if not line:              continue -        entity = entity_from_json(line, ContainerEntity) +        entity = entity_from_json(line, ContainerEntity, api_client=args.api.api_client)          args.json_output.write(              json.dumps(container_to_elasticsearch(entity)) + '\n') @@ -49,21 +48,20 @@ def run_transform_changelogs(args):          line = line.strip()          if not line:              continue -        entity = entity_from_json(line, ChangelogEntry) +        entity = entity_from_json(line, ChangelogEntry, api_client=args.api.api_client)          args.json_output.write(              json.dumps(changelog_to_elasticsearch(entity)) + '\n')  def run_export_changelog(args): -    api = args.api      end = args.end      if end is None: -        latest = api.get_changelog(limit=1)[0] +        latest = args.api.get_changelog(limit=1)[0]          end = latest.index      for i in range(args.start, end): -        entry = api.get_changelog_entry(index=i) +        entry = args.api.get_changelog_entry(index=i)          args.json_output.write( -            json.dumps(entity_to_dict(entry)) + "\n") +            json.dumps(entity_to_dict(entry, api_client=args.api.api_client)) + "\n")  def main():      parser = argparse.ArgumentParser() diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py index 7bb75c3e..37cbf1d2 100644 --- a/python/fatcat_tools/transforms.py +++ b/python/fatcat_tools/transforms.py @@ -1,23 +1,25 @@  import collections -from fatcat_client import ReleaseEntity, ApiClient +from fatcat_client import ApiClient -def entity_to_dict(entity): +def entity_to_dict(entity, api_client=None):      """      Hack to take advantage of the code-generated serialization code      """ -    ac = ApiClient() -    return ac.sanitize_for_serialization(entity) +    if not api_client: +        api_client = ApiClient() +    return api_client.sanitize_for_serialization(entity) -def entity_from_json(json_str, entity_type): +def entity_from_json(json_str, entity_type, api_client=None):      """      Hack to take advantage of the code-generated deserialization code      """ -    ac = ApiClient() +    if not api_client: +        api_client = ApiClient()      thing = collections.namedtuple('Thing', ['data'])      thing.data = json_str -    return ac.deserialize(thing, entity_type) +    return api_client.deserialize(thing, entity_type)  def check_kbart(year, archive):      if not archive or not archive.get('year_spans'): @@ -284,7 +286,7 @@ def container_to_elasticsearch(entity):      t['is_oa'] = in_doaj or in_road or is_longtail_oa or is_oa      t['is_longtail_oa'] = is_longtail_oa      t['any_kbart'] = any_ia_sim -    t['any_jstor'] = any_ia_sim +    t['any_jstor'] = any_jstor      t['any_ia_sim'] = bool(any_ia_sim)      return t diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py index 3adb088e..0a49192e 100644 --- a/python/fatcat_tools/workers/elasticsearch.py +++ b/python/fatcat_tools/workers/elasticsearch.py @@ -4,7 +4,7 @@ import time  import requests  from pykafka.common import OffsetType -from fatcat_client import ReleaseEntity +from fatcat_client import ReleaseEntity, ApiClient  from fatcat_tools import *  from .worker_common import FatcatWorker @@ -28,6 +28,7 @@ class ElasticsearchReleaseWorker(FatcatWorker):      def run(self):          consume_topic = self.kafka.topics[self.consume_topic] +        ac = ApiClient()          consumer = consume_topic.get_balanced_consumer(              consumer_group=self.consumer_group, @@ -40,7 +41,7 @@ class ElasticsearchReleaseWorker(FatcatWorker):          for msg in consumer:              json_str = msg.value.decode('utf-8') -            release = entity_from_json(json_str, ReleaseEntity) +            release = entity_from_json(json_str, ReleaseEntity, api_client=ac)              #print(release)              elasticsearch_endpoint = "{}/{}/release/{}".format(                  self.elasticsearch_backend, | 
