summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-04-06 20:27:05 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-04-06 21:58:54 -0700
commitb0c5db8a2bd2e389f99df1b44120c18fa5bc3e52 (patch)
tree1555781d10359ce3e4ccbe57047a60c6b4fbae26 /python
parent5ba1859a2308c72e7e3e4e86af3c275210c5bacd (diff)
downloadfatcat-b0c5db8a2bd2e389f99df1b44120c18fa5bc3e52.tar.gz
fatcat-b0c5db8a2bd2e389f99df1b44120c18fa5bc3e52.zip
transform tool: container transform stats lookup support
Diffstat (limited to 'python')
-rwxr-xr-xpython/fatcat_transform.py28
-rw-r--r--python/tests/files/container_jxqqgho7bncrvgfyfznramju3q.json1
2 files changed, 27 insertions, 2 deletions
diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py
index 8e01c860..93c39e2f 100755
--- a/python/fatcat_transform.py
+++ b/python/fatcat_transform.py
@@ -9,11 +9,14 @@ import sys
import json
import argparse
+import elasticsearch
from fatcat_openapi_client import ReleaseEntity, ContainerEntity, FileEntity, ChangelogEntry
+
from fatcat_tools import entity_from_json, \
release_to_elasticsearch, container_to_elasticsearch, \
file_to_elasticsearch, changelog_to_elasticsearch, public_api, \
release_to_csl, citeproc_csl
+from fatcat_web.search import get_elastic_container_stats
def run_elasticsearch_releases(args):
@@ -28,6 +31,8 @@ def run_elasticsearch_releases(args):
json.dumps(release_to_elasticsearch(entity)) + '\n')
def run_elasticsearch_containers(args):
+ es_client = elasticsearch.Elasticsearch(args.fatcat_elasticsearch_url)
+ es_release_index = "fatcat_release"
for line in args.json_input:
line = line.strip()
if not line:
@@ -35,8 +40,21 @@ def run_elasticsearch_containers(args):
entity = entity_from_json(line, ContainerEntity, api_client=args.api.api_client)
if entity.state != 'active':
continue
- args.json_output.write(
- json.dumps(container_to_elasticsearch(entity)) + '\n')
+
+ if args.query_stats:
+ es_doc = container_to_elasticsearch(
+ entity,
+ stats=get_elastic_container_stats(
+ entity.ident,
+ es_client=es_client,
+ es_index=es_release_index,
+ merge_shadows=True,
+ ),
+ )
+ else:
+ es_doc = container_to_elasticsearch(entity)
+
+ args.json_output.write(json.dumps(es_doc) + '\n')
def run_elasticsearch_files(args):
for line in args.json_input:
@@ -77,6 +95,9 @@ def main():
parser.add_argument('--fatcat-api-url',
default="http://localhost:9411/v0",
help="connect to this host/port")
+ parser.add_argument('--fatcat-elasticsearch-url',
+ default="http://localhost:9200",
+ help="connect to this host/port")
subparsers = parser.add_subparsers()
sub_elasticsearch_releases = subparsers.add_parser('elasticsearch-releases',
@@ -98,6 +119,9 @@ def main():
sub_elasticsearch_containers.add_argument('json_output',
help="where to send output",
default=sys.stdout, type=argparse.FileType('w'))
+ sub_elasticsearch_containers.add_argument('--query-stats',
+ action='store_true',
+ help="whether to query release search index for container stats")
sub_elasticsearch_files = subparsers.add_parser('elasticsearch-files',
help="convert fatcat file JSON schema to elasticsearch file schema")
diff --git a/python/tests/files/container_jxqqgho7bncrvgfyfznramju3q.json b/python/tests/files/container_jxqqgho7bncrvgfyfznramju3q.json
new file mode 100644
index 00000000..bb4d46f9
--- /dev/null
+++ b/python/tests/files/container_jxqqgho7bncrvgfyfznramju3q.json
@@ -0,0 +1 @@
+{"extra":{"abbrev":"Annu. Rev. Pharmacol. Toxicol.","country":"us","ezb":{"color":"red","ezb_id":"2460"},"ia":{"sim":{"peer_reviewed":true,"pub_type":"Scholarly Journals","scholarly_peer_reviewed":true,"sim_pubid":"5091","year_spans":[[1961,2009]]}},"issne":"1545-4304","issnp":"0362-1642","kbart":{"hathitrust":{"year_spans":[[1976,1992]]},"portico":{"year_spans":[[1961,1999],[2001,2001],[2003,2003],[2005,2006],[2008,2010],[2012,2019]]},"scholarsportal":{"year_spans":[[1961,2003],[2005,2019]]}},"languages":["en"],"sherpa_romeo":{"color":"yellow"},"urls":["https://www.annualreviews.org/journal/pharmtox","https://www.annualreviews.org/loi/pharmtox","http://arjournals.annualreviews.org/loi/pharmtox"]},"ident":"jxqqgho7bncrvgfyfznramju3q","issnl":"0362-1642","name":"Annual Review of Pharmacology and Toxicology","publisher":"Annual Reviews","revision":"ff56081b-9130-47a6-9e14-9901c2808502","state":"active"}