From 826c7538e091fac14d987a3cd654975da964e240 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 27 Oct 2021 18:50:17 -0700 Subject: make fmt (black 21.9b0) --- python/ingest_tool.py | 85 +++++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 36 deletions(-) (limited to 'python/ingest_tool.py') diff --git a/python/ingest_tool.py b/python/ingest_tool.py index eb1047d..7405d28 100755 --- a/python/ingest_tool.py +++ b/python/ingest_tool.py @@ -17,9 +17,9 @@ def run_single_ingest(args): fatcat=dict(release_ident=args.release_id), ) if args.force_recrawl: - request['force_recrawl'] = True - if request['ingest_type'] in [ - 'dataset', + request["force_recrawl"] = True + if request["ingest_type"] in [ + "dataset", ]: ingester = IngestFilesetWorker( try_spn2=not args.no_spn2, @@ -41,11 +41,13 @@ def run_requests(args): try_spn2=not args.no_spn2, html_quick_mode=args.html_quick_mode, ) - fileset_worker = IngestFilesetWorker(try_spn2=not args.no_spn2, ) + fileset_worker = IngestFilesetWorker( + try_spn2=not args.no_spn2, + ) for line in args.json_file: request = json.loads(line.strip()) - if request['ingest_type'] in [ - 'dataset', + if request["ingest_type"] in [ + "dataset", ]: result = fileset_worker.process(request) else: @@ -56,7 +58,7 @@ def run_requests(args): def run_api(args): port = 8083 print("Listening on localhost:{}".format(port)) - server = HTTPServer(('', port), IngestFileRequestHandler) + server = HTTPServer(("", port), IngestFileRequestHandler) server.serve_forever() @@ -64,41 +66,52 @@ def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) subparsers = parser.add_subparsers() - sub_single = subparsers.add_parser('single', help="ingests a single base URL") + sub_single = subparsers.add_parser("single", help="ingests a single base URL") sub_single.set_defaults(func=run_single_ingest) - sub_single.add_argument('ingest_type', - default="pdf", - help="type of ingest (pdf, html, etc)") - sub_single.add_argument('--release-id', - help="(optional) existing release ident to match to") - sub_single.add_argument('--doi', help="(optional) existing release DOI to match to") - sub_single.add_argument('--force-recrawl', - action='store_true', - help="ignore GWB history and use SPNv2 to re-crawl") - sub_single.add_argument('--no-spn2', action='store_true', help="don't use live web (SPNv2)") - sub_single.add_argument('--html-quick-mode', - action='store_true', - help="don't fetch individual sub-resources, just use CDX") - sub_single.add_argument('url', help="URL of paper to fetch") + sub_single.add_argument( + "ingest_type", default="pdf", help="type of ingest (pdf, html, etc)" + ) + sub_single.add_argument( + "--release-id", help="(optional) existing release ident to match to" + ) + sub_single.add_argument("--doi", help="(optional) existing release DOI to match to") + sub_single.add_argument( + "--force-recrawl", + action="store_true", + help="ignore GWB history and use SPNv2 to re-crawl", + ) + sub_single.add_argument("--no-spn2", action="store_true", help="don't use live web (SPNv2)") + sub_single.add_argument( + "--html-quick-mode", + action="store_true", + help="don't fetch individual sub-resources, just use CDX", + ) + sub_single.add_argument("url", help="URL of paper to fetch") sub_requests = subparsers.add_parser( - 'requests', help="takes a series of ingest requests (JSON, per line) and runs each") - sub_requests.add_argument('--no-spn2', - action='store_true', - help="don't use live web (SPNv2)") - sub_requests.add_argument('--html-quick-mode', - action='store_true', - help="don't fetch individual sub-resources, just use CDX") + "requests", help="takes a series of ingest requests (JSON, per line) and runs each" + ) + sub_requests.add_argument( + "--no-spn2", action="store_true", help="don't use live web (SPNv2)" + ) + sub_requests.add_argument( + "--html-quick-mode", + action="store_true", + help="don't fetch individual sub-resources, just use CDX", + ) sub_requests.set_defaults(func=run_requests) - sub_requests.add_argument('json_file', - help="JSON file (request per line) to import from (or stdin)", - default=sys.stdin, - type=argparse.FileType('r')) + sub_requests.add_argument( + "json_file", + help="JSON file (request per line) to import from (or stdin)", + default=sys.stdin, + type=argparse.FileType("r"), + ) sub_api = subparsers.add_parser( - 'api', help="starts a simple HTTP server that processes ingest requests") + "api", help="starts a simple HTTP server that processes ingest requests" + ) sub_api.set_defaults(func=run_api) - sub_api.add_argument('--port', help="HTTP port to listen on", default=8033, type=int) + sub_api.add_argument("--port", help="HTTP port to listen on", default=8033, type=int) args = parser.parse_args() if not args.__dict__.get("func"): @@ -108,5 +121,5 @@ def main(): args.func(args) -if __name__ == '__main__': +if __name__ == "__main__": main() -- cgit v1.2.3