aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_transform.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_transform.py')
-rwxr-xr-xpython/fatcat_transform.py34
1 files changed, 32 insertions, 2 deletions
diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py
index ccb13871..23a56109 100755
--- a/python/fatcat_transform.py
+++ b/python/fatcat_transform.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
"""
+Utility script for doing bulk conversion/tranforms of entity JSON schema to
+other formats
"""
import sys
@@ -15,10 +17,11 @@ from citeproc_styles import get_style_filepath
import fatcat_openapi_client
from fatcat_openapi_client.rest import ApiException
-from fatcat_openapi_client import ReleaseEntity, ContainerEntity, ChangelogEntry
+from fatcat_openapi_client import ReleaseEntity, ContainerEntity, FileEntity, ChangelogEntry
from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \
release_to_elasticsearch, container_to_elasticsearch, \
- changelog_to_elasticsearch, public_api, release_to_csl, citeproc_csl
+ file_to_elasticsearch, changelog_to_elasticsearch, public_api, \
+ release_to_csl, citeproc_csl
def run_elasticsearch_releases(args):
@@ -27,6 +30,8 @@ def run_elasticsearch_releases(args):
if not line:
continue
entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client)
+ if entity.state != 'active':
+ continue
args.json_output.write(
json.dumps(release_to_elasticsearch(entity)) + '\n')
@@ -36,9 +41,22 @@ def run_elasticsearch_containers(args):
if not line:
continue
entity = entity_from_json(line, ContainerEntity, api_client=args.api.api_client)
+ if entity.state != 'active':
+ continue
args.json_output.write(
json.dumps(container_to_elasticsearch(entity)) + '\n')
+def run_elasticsearch_files(args):
+ for line in args.json_input:
+ line = line.strip()
+ if not line:
+ continue
+ entity = entity_from_json(line, FileEntity, api_client=args.api.api_client)
+ if entity.state != 'active':
+ continue
+ args.json_output.write(
+ json.dumps(file_to_elasticsearch(entity)) + '\n')
+
def run_elasticsearch_changelogs(args):
for line in args.json_input:
line = line.strip()
@@ -54,6 +72,8 @@ def run_citeproc_releases(args):
if not line:
continue
entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client)
+ if entity.state != 'active':
+ continue
csl_json = release_to_csl(entity)
csl_json['id'] = "release:" + (entity.ident or "unknown")
out = citeproc_csl(csl_json, args.style, args.html)
@@ -87,6 +107,16 @@ def main():
help="where to send output",
default=sys.stdout, type=argparse.FileType('w'))
+ sub_elasticsearch_files = subparsers.add_parser('elasticsearch-files',
+ help="convert fatcat file JSON schema to elasticsearch file schema")
+ sub_elasticsearch_files.set_defaults(func=run_elasticsearch_files)
+ sub_elasticsearch_files.add_argument('json_input',
+ help="JSON-per-line of file entities",
+ default=sys.stdin, type=argparse.FileType('r'))
+ sub_elasticsearch_files.add_argument('json_output',
+ help="where to send output",
+ default=sys.stdout, type=argparse.FileType('w'))
+
sub_elasticsearch_changelogs = subparsers.add_parser('elasticsearch-changelogs',
help="convert fatcat changelog JSON schema to elasticsearch changelog schema")
sub_elasticsearch_changelogs.set_defaults(func=run_elasticsearch_changelogs)