From b0c5db8a2bd2e389f99df1b44120c18fa5bc3e52 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 6 Apr 2021 20:27:05 -0700 Subject: transform tool: container transform stats lookup support --- python/fatcat_transform.py | 28 ++++++++++++++++++++-- .../container_jxqqgho7bncrvgfyfznramju3q.json | 1 + 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 python/tests/files/container_jxqqgho7bncrvgfyfznramju3q.json diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py index 8e01c860..93c39e2f 100755 --- a/python/fatcat_transform.py +++ b/python/fatcat_transform.py @@ -9,11 +9,14 @@ import sys import json import argparse +import elasticsearch from fatcat_openapi_client import ReleaseEntity, ContainerEntity, FileEntity, ChangelogEntry + from fatcat_tools import entity_from_json, \ release_to_elasticsearch, container_to_elasticsearch, \ file_to_elasticsearch, changelog_to_elasticsearch, public_api, \ release_to_csl, citeproc_csl +from fatcat_web.search import get_elastic_container_stats def run_elasticsearch_releases(args): @@ -28,6 +31,8 @@ def run_elasticsearch_releases(args): json.dumps(release_to_elasticsearch(entity)) + '\n') def run_elasticsearch_containers(args): + es_client = elasticsearch.Elasticsearch(args.fatcat_elasticsearch_url) + es_release_index = "fatcat_release" for line in args.json_input: line = line.strip() if not line: @@ -35,8 +40,21 @@ def run_elasticsearch_containers(args): entity = entity_from_json(line, ContainerEntity, api_client=args.api.api_client) if entity.state != 'active': continue - args.json_output.write( - json.dumps(container_to_elasticsearch(entity)) + '\n') + + if args.query_stats: + es_doc = container_to_elasticsearch( + entity, + stats=get_elastic_container_stats( + entity.ident, + es_client=es_client, + es_index=es_release_index, + merge_shadows=True, + ), + ) + else: + es_doc = container_to_elasticsearch(entity) + + args.json_output.write(json.dumps(es_doc) + '\n') def run_elasticsearch_files(args): for line in args.json_input: @@ -77,6 +95,9 @@ def main(): parser.add_argument('--fatcat-api-url', default="http://localhost:9411/v0", help="connect to this host/port") + parser.add_argument('--fatcat-elasticsearch-url', + default="http://localhost:9200", + help="connect to this host/port") subparsers = parser.add_subparsers() sub_elasticsearch_releases = subparsers.add_parser('elasticsearch-releases', @@ -98,6 +119,9 @@ def main(): sub_elasticsearch_containers.add_argument('json_output', help="where to send output", default=sys.stdout, type=argparse.FileType('w')) + sub_elasticsearch_containers.add_argument('--query-stats', + action='store_true', + help="whether to query release search index for container stats") sub_elasticsearch_files = subparsers.add_parser('elasticsearch-files', help="convert fatcat file JSON schema to elasticsearch file schema") diff --git a/python/tests/files/container_jxqqgho7bncrvgfyfznramju3q.json b/python/tests/files/container_jxqqgho7bncrvgfyfznramju3q.json new file mode 100644 index 00000000..bb4d46f9 --- /dev/null +++ b/python/tests/files/container_jxqqgho7bncrvgfyfznramju3q.json @@ -0,0 +1 @@ +{"extra":{"abbrev":"Annu. Rev. Pharmacol. Toxicol.","country":"us","ezb":{"color":"red","ezb_id":"2460"},"ia":{"sim":{"peer_reviewed":true,"pub_type":"Scholarly Journals","scholarly_peer_reviewed":true,"sim_pubid":"5091","year_spans":[[1961,2009]]}},"issne":"1545-4304","issnp":"0362-1642","kbart":{"hathitrust":{"year_spans":[[1976,1992]]},"portico":{"year_spans":[[1961,1999],[2001,2001],[2003,2003],[2005,2006],[2008,2010],[2012,2019]]},"scholarsportal":{"year_spans":[[1961,2003],[2005,2019]]}},"languages":["en"],"sherpa_romeo":{"color":"yellow"},"urls":["https://www.annualreviews.org/journal/pharmtox","https://www.annualreviews.org/loi/pharmtox","http://arjournals.annualreviews.org/loi/pharmtox"]},"ident":"jxqqgho7bncrvgfyfznramju3q","issnl":"0362-1642","name":"Annual Review of Pharmacology and Toxicology","publisher":"Annual Reviews","revision":"ff56081b-9130-47a6-9e14-9901c2808502","state":"active"} -- cgit v1.2.3