diff options
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 8d82dab3..d7651792 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -166,6 +166,17 @@ def run_cdl_dash_dat(args): print("fileset id: {}".format(fs.ident)) print("link: https://fatcat.wiki/fileset/{}".format(fs.ident)) +def run_datacite(args): + dci = DataciteImporter(args.api, + args.issn_map_file, + edit_batch_size=args.batch_size, + bezerk_mode=args.bezerk_mode) + if args.kafka_mode: + KafkaJsonPusher(fci, args.kafka_hosts, args.kafka_env, "api-datacite", + "fatcat-import", consume_batch_size=args.batch_size).run() + else: + JsonLinePusher(dci, args.json_file).run() + def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -439,6 +450,25 @@ def main(): type=str, help="use existing editgroup (instead of creating a new one)") + sub_datacite = subparsers.add_parser('datacite', + help="import datacite.org metadata") + sub_datacite.add_argument('json_file', + help="File with jsonlines from datacite.org v2 API to import from", + default=sys.stdin, type=argparse.FileType('r')) + sub_datacite.add_argument('issn_map_file', + help="ISSN to ISSN-L mapping file", + default=None, type=argparse.FileType('r')) + sub_datacite.add_argument('--kafka-mode', + action='store_true', + help="consume from kafka topic (not stdin)") + sub_datacite.add_argument('--bezerk-mode', + action='store_true', + help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)") + sub_datacite.set_defaults( + func=run_datacite, + auth_var="FATCAT_API_AUTH_TOKEN", + ) + args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") |