From 5d458a3df7e58e6551d8ec72979e376c62fdd2f7 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 29 Jan 2020 21:52:33 -0800 Subject: fix some transform bugs, add some tests --- python/fatcat_transform.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'python/fatcat_transform.py') diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py index ccb13871..42d2ea99 100755 --- a/python/fatcat_transform.py +++ b/python/fatcat_transform.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 """ +Utility script for doing bulk conversion/tranforms of entity JSON schema to +other formats """ import sys @@ -15,10 +17,11 @@ from citeproc_styles import get_style_filepath import fatcat_openapi_client from fatcat_openapi_client.rest import ApiException -from fatcat_openapi_client import ReleaseEntity, ContainerEntity, ChangelogEntry +from fatcat_openapi_client import ReleaseEntity, ContainerEntity, FileEntity, ChangelogEntry from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \ release_to_elasticsearch, container_to_elasticsearch, \ - changelog_to_elasticsearch, public_api, release_to_csl, citeproc_csl + file_to_elasticsearch, changelog_to_elasticsearch, public_api, \ + release_to_csl, citeproc_csl def run_elasticsearch_releases(args): @@ -39,6 +42,15 @@ def run_elasticsearch_containers(args): args.json_output.write( json.dumps(container_to_elasticsearch(entity)) + '\n') +def run_elasticsearch_files(args): + for line in args.json_input: + line = line.strip() + if not line: + continue + entity = entity_from_json(line, FileEntity, api_client=args.api.api_client) + args.json_output.write( + json.dumps(file_to_elasticsearch(entity)) + '\n') + def run_elasticsearch_changelogs(args): for line in args.json_input: line = line.strip() @@ -87,6 +99,16 @@ def main(): help="where to send output", default=sys.stdout, type=argparse.FileType('w')) + sub_elasticsearch_files = subparsers.add_parser('elasticsearch-files', + help="convert fatcat file JSON schema to elasticsearch file schema") + sub_elasticsearch_files.set_defaults(func=run_elasticsearch_files) + sub_elasticsearch_files.add_argument('json_input', + help="JSON-per-line of file entities", + default=sys.stdin, type=argparse.FileType('r')) + sub_elasticsearch_files.add_argument('json_output', + help="where to send output", + default=sys.stdout, type=argparse.FileType('w')) + sub_elasticsearch_changelogs = subparsers.add_parser('elasticsearch-changelogs', help="convert fatcat changelog JSON schema to elasticsearch changelog schema") sub_elasticsearch_changelogs.set_defaults(func=run_elasticsearch_changelogs) -- cgit v1.2.3 From c57a743cb8b774750c99c6f079438666a87f6476 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 26 Feb 2020 11:38:51 -0800 Subject: bulk ES transform: skip non-active entities --- python/fatcat_transform.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'python/fatcat_transform.py') diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py index 42d2ea99..9ddbaa4d 100755 --- a/python/fatcat_transform.py +++ b/python/fatcat_transform.py @@ -30,6 +30,8 @@ def run_elasticsearch_releases(args): if not line: continue entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client) + if entity['state'] != 'active': + continue args.json_output.write( json.dumps(release_to_elasticsearch(entity)) + '\n') @@ -39,6 +41,8 @@ def run_elasticsearch_containers(args): if not line: continue entity = entity_from_json(line, ContainerEntity, api_client=args.api.api_client) + if entity['state'] != 'active': + continue args.json_output.write( json.dumps(container_to_elasticsearch(entity)) + '\n') @@ -48,6 +52,8 @@ def run_elasticsearch_files(args): if not line: continue entity = entity_from_json(line, FileEntity, api_client=args.api.api_client) + if entity['state'] != 'active': + continue args.json_output.write( json.dumps(file_to_elasticsearch(entity)) + '\n') @@ -66,6 +72,8 @@ def run_citeproc_releases(args): if not line: continue entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client) + if entity['state'] != 'active': + continue csl_json = release_to_csl(entity) csl_json['id'] = "release:" + (entity.ident or "unknown") out = citeproc_csl(csl_json, args.style, args.html) -- cgit v1.2.3 From 21239503ddd71c69ddf651260f2953c93f227dfc Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 26 Feb 2020 12:16:22 -0800 Subject: fix fatcat_transform state filters --- python/fatcat_transform.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'python/fatcat_transform.py') diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py index 9ddbaa4d..23a56109 100755 --- a/python/fatcat_transform.py +++ b/python/fatcat_transform.py @@ -30,7 +30,7 @@ def run_elasticsearch_releases(args): if not line: continue entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client) - if entity['state'] != 'active': + if entity.state != 'active': continue args.json_output.write( json.dumps(release_to_elasticsearch(entity)) + '\n') @@ -41,7 +41,7 @@ def run_elasticsearch_containers(args): if not line: continue entity = entity_from_json(line, ContainerEntity, api_client=args.api.api_client) - if entity['state'] != 'active': + if entity.state != 'active': continue args.json_output.write( json.dumps(container_to_elasticsearch(entity)) + '\n') @@ -52,7 +52,7 @@ def run_elasticsearch_files(args): if not line: continue entity = entity_from_json(line, FileEntity, api_client=args.api.api_client) - if entity['state'] != 'active': + if entity.state != 'active': continue args.json_output.write( json.dumps(file_to_elasticsearch(entity)) + '\n') @@ -72,7 +72,7 @@ def run_citeproc_releases(args): if not line: continue entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client) - if entity['state'] != 'active': + if entity.state != 'active': continue csl_json = release_to_csl(entity) csl_json['id'] = "release:" + (entity.ident or "unknown") -- cgit v1.2.3