diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 18:14:09 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 18:14:09 -0700 | 
| commit | 6464631dbe5c4afeb76f2f3c9d63b89f917c9a3b (patch) | |
| tree | 633303839cafc7d901cf8565e034542606a5bb27 /python/fatcat_transform.py | |
| parent | cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (diff) | |
| download | fatcat-6464631dbe5c4afeb76f2f3c9d63b89f917c9a3b.tar.gz fatcat-6464631dbe5c4afeb76f2f3c9d63b89f917c9a3b.zip | |
fmt (black): *.py
Diffstat (limited to 'python/fatcat_transform.py')
| -rwxr-xr-x | python/fatcat_transform.py | 172 | 
1 files changed, 109 insertions, 63 deletions
| diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py index ab855dbf..fe2e12a6 100755 --- a/python/fatcat_transform.py +++ b/python/fatcat_transform.py @@ -31,10 +31,10 @@ def run_elasticsearch_releases(args):          if not line:              continue          entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client) -        if entity.state != 'active': +        if entity.state != "active":              continue -        args.json_output.write( -            json.dumps(release_to_elasticsearch(entity)) + '\n') +        args.json_output.write(json.dumps(release_to_elasticsearch(entity)) + "\n") +  def run_elasticsearch_containers(args):      es_client = elasticsearch.Elasticsearch(args.fatcat_elasticsearch_url) @@ -44,7 +44,7 @@ def run_elasticsearch_containers(args):          if not line:              continue          entity = entity_from_json(line, ContainerEntity, api_client=args.api.api_client) -        if entity.state != 'active': +        if entity.state != "active":              continue          if args.query_stats: @@ -60,7 +60,8 @@ def run_elasticsearch_containers(args):          else:              es_doc = container_to_elasticsearch(entity) -        args.json_output.write(json.dumps(es_doc) + '\n') +        args.json_output.write(json.dumps(es_doc) + "\n") +  def run_elasticsearch_files(args):      for line in args.json_input: @@ -68,10 +69,10 @@ def run_elasticsearch_files(args):          if not line:              continue          entity = entity_from_json(line, FileEntity, api_client=args.api.api_client) -        if entity.state != 'active': +        if entity.state != "active":              continue -        args.json_output.write( -            json.dumps(file_to_elasticsearch(entity)) + '\n') +        args.json_output.write(json.dumps(file_to_elasticsearch(entity)) + "\n") +  def run_elasticsearch_changelogs(args):      for line in args.json_input: @@ -79,8 +80,8 @@ def run_elasticsearch_changelogs(args):          if not line:              continue          entity = entity_from_json(line, ChangelogEntry, api_client=args.api.api_client) -        args.json_output.write( -            json.dumps(changelog_to_elasticsearch(entity)) + '\n') +        args.json_output.write(json.dumps(changelog_to_elasticsearch(entity)) + "\n") +  def run_citeproc_releases(args):      for line in args.json_input: @@ -88,82 +89,126 @@ def run_citeproc_releases(args):          if not line:              continue          entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client) -        if entity.state != 'active': +        if entity.state != "active":              continue          csl_json = release_to_csl(entity) -        csl_json['id'] = "release:" + (entity.ident or "unknown") +        csl_json["id"] = "release:" + (entity.ident or "unknown")          out = citeproc_csl(csl_json, args.style, args.html)          args.json_output.write(out + "\n") +  def main(): -    parser = argparse.ArgumentParser( -        formatter_class=argparse.ArgumentDefaultsHelpFormatter) -    parser.add_argument('--fatcat-api-url', -        default="http://localhost:9411/v0", -        help="connect to this host/port") -    parser.add_argument('--fatcat-elasticsearch-url', +    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) +    parser.add_argument( +        "--fatcat-api-url", default="http://localhost:9411/v0", help="connect to this host/port" +    ) +    parser.add_argument( +        "--fatcat-elasticsearch-url",          default="http://localhost:9200", -        help="connect to this host/port") +        help="connect to this host/port", +    )      subparsers = parser.add_subparsers() -    sub_elasticsearch_releases = subparsers.add_parser('elasticsearch-releases', -        help="convert fatcat release JSON schema to elasticsearch release schema") +    sub_elasticsearch_releases = subparsers.add_parser( +        "elasticsearch-releases", +        help="convert fatcat release JSON schema to elasticsearch release schema", +    )      sub_elasticsearch_releases.set_defaults(func=run_elasticsearch_releases) -    sub_elasticsearch_releases.add_argument('json_input', +    sub_elasticsearch_releases.add_argument( +        "json_input",          help="JSON-per-line of release entities", -        default=sys.stdin, type=argparse.FileType('r')) -    sub_elasticsearch_releases.add_argument('json_output', +        default=sys.stdin, +        type=argparse.FileType("r"), +    ) +    sub_elasticsearch_releases.add_argument( +        "json_output",          help="where to send output", -        default=sys.stdout, type=argparse.FileType('w')) - -    sub_elasticsearch_containers = subparsers.add_parser('elasticsearch-containers', -        help="convert fatcat container JSON schema to elasticsearch container schema") +        default=sys.stdout, +        type=argparse.FileType("w"), +    ) + +    sub_elasticsearch_containers = subparsers.add_parser( +        "elasticsearch-containers", +        help="convert fatcat container JSON schema to elasticsearch container schema", +    )      sub_elasticsearch_containers.set_defaults(func=run_elasticsearch_containers) -    sub_elasticsearch_containers.add_argument('json_input', +    sub_elasticsearch_containers.add_argument( +        "json_input",          help="JSON-per-line of container entities", -        default=sys.stdin, type=argparse.FileType('r')) -    sub_elasticsearch_containers.add_argument('json_output', +        default=sys.stdin, +        type=argparse.FileType("r"), +    ) +    sub_elasticsearch_containers.add_argument( +        "json_output",          help="where to send output", -        default=sys.stdout, type=argparse.FileType('w')) -    sub_elasticsearch_containers.add_argument('--query-stats', -        action='store_true', -        help="whether to query release search index for container stats") - -    sub_elasticsearch_files = subparsers.add_parser('elasticsearch-files', -        help="convert fatcat file JSON schema to elasticsearch file schema") +        default=sys.stdout, +        type=argparse.FileType("w"), +    ) +    sub_elasticsearch_containers.add_argument( +        "--query-stats", +        action="store_true", +        help="whether to query release search index for container stats", +    ) + +    sub_elasticsearch_files = subparsers.add_parser( +        "elasticsearch-files", +        help="convert fatcat file JSON schema to elasticsearch file schema", +    )      sub_elasticsearch_files.set_defaults(func=run_elasticsearch_files) -    sub_elasticsearch_files.add_argument('json_input', +    sub_elasticsearch_files.add_argument( +        "json_input",          help="JSON-per-line of file entities", -        default=sys.stdin, type=argparse.FileType('r')) -    sub_elasticsearch_files.add_argument('json_output', +        default=sys.stdin, +        type=argparse.FileType("r"), +    ) +    sub_elasticsearch_files.add_argument( +        "json_output",          help="where to send output", -        default=sys.stdout, type=argparse.FileType('w')) - -    sub_elasticsearch_changelogs = subparsers.add_parser('elasticsearch-changelogs', -        help="convert fatcat changelog JSON schema to elasticsearch changelog schema") +        default=sys.stdout, +        type=argparse.FileType("w"), +    ) + +    sub_elasticsearch_changelogs = subparsers.add_parser( +        "elasticsearch-changelogs", +        help="convert fatcat changelog JSON schema to elasticsearch changelog schema", +    )      sub_elasticsearch_changelogs.set_defaults(func=run_elasticsearch_changelogs) -    sub_elasticsearch_changelogs.add_argument('json_input', +    sub_elasticsearch_changelogs.add_argument( +        "json_input",          help="JSON-per-line of changelog entries", -        default=sys.stdin, type=argparse.FileType('r')) -    sub_elasticsearch_changelogs.add_argument('json_output', +        default=sys.stdin, +        type=argparse.FileType("r"), +    ) +    sub_elasticsearch_changelogs.add_argument( +        "json_output",          help="where to send output", -        default=sys.stdout, type=argparse.FileType('w')) - -    sub_citeproc_releases = subparsers.add_parser('citeproc-releases', -        help="convert fatcat release schema to any standard citation format using citeproc/CSL") +        default=sys.stdout, +        type=argparse.FileType("w"), +    ) + +    sub_citeproc_releases = subparsers.add_parser( +        "citeproc-releases", +        help="convert fatcat release schema to any standard citation format using citeproc/CSL", +    )      sub_citeproc_releases.set_defaults(func=run_citeproc_releases) -    sub_citeproc_releases.add_argument('json_input', +    sub_citeproc_releases.add_argument( +        "json_input",          help="JSON-per-line of release entities", -        default=sys.stdin, type=argparse.FileType('r')) -    sub_citeproc_releases.add_argument('json_output', +        default=sys.stdin, +        type=argparse.FileType("r"), +    ) +    sub_citeproc_releases.add_argument( +        "json_output",          help="where to send output", -        default=sys.stdout, type=argparse.FileType('w')) -    sub_citeproc_releases.add_argument('--style', -        help="citation style to output", -        default='csl-json') -    sub_citeproc_releases.add_argument('--html', -        action='store_true', -        help="output HTML, not plain text") +        default=sys.stdout, +        type=argparse.FileType("w"), +    ) +    sub_citeproc_releases.add_argument( +        "--style", help="citation style to output", default="csl-json" +    ) +    sub_citeproc_releases.add_argument( +        "--html", action="store_true", help="output HTML, not plain text" +    )      args = parser.parse_args()      if not args.__dict__.get("func"): @@ -173,5 +218,6 @@ def main():      args.api = public_api(args.fatcat_api_url)      args.func(args) -if __name__ == '__main__': + +if __name__ == "__main__":      main() | 
