diff options
author | Martin Czygan <martin@archive.org> | 2020-11-24 19:29:07 +0000 |
---|---|---|
committer | Martin Czygan <martin@archive.org> | 2020-11-24 19:29:07 +0000 |
commit | cfd13852d7cb58fcc3387373960adaf3680f0faf (patch) | |
tree | 675954b8b34324fe22fc5a00f3fbb99a21a77a21 /python/fatcat_import.py | |
parent | fcfcd3224a113fa90da2045a3c7fe90127088ebe (diff) | |
parent | 1fca5a9822944d0646d2dcba6cf54f27a0ffe5c0 (diff) | |
download | fatcat-cfd13852d7cb58fcc3387373960adaf3680f0faf.tar.gz fatcat-cfd13852d7cb58fcc3387373960adaf3680f0faf.zip |
Merge branch 'bnewbold-doaj-metadata' into 'master'
DOAJ article metadata import
See merge request webgroup/fatcat!89
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 19cf43ec..ff6c94dc 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -256,6 +256,24 @@ def run_datacite(args): else: JsonLinePusher(dci, args.json_file).run() +def run_doaj_article(args): + dai = DoajArticleImporter(args.api, + args.issn_map_file, + edit_batch_size=args.batch_size, + do_updates=args.do_updates, + ) + if args.kafka_mode: + KafkaJsonPusher( + dai, + args.kafka_hosts, + args.kafka_env, + "api-doaj", + "fatcat-{}-import-doaj".format(args.kafka_env), + consume_batch_size=args.batch_size, + ).run() + else: + JsonLinePusher(dai, args.json_file).run() + def run_file_meta(args): # do_updates defaults to true for this importer fmi = FileMetaImporter(args.api, @@ -606,6 +624,25 @@ def main(): auth_var="FATCAT_AUTH_WORKER_DATACITE", ) + sub_doaj_article = subparsers.add_parser('doaj-article', + help="import doaj.org article metadata") + sub_doaj_article.add_argument('json_file', + help="File with JSON lines from DOAJ API (or bulk dump) to import from", + default=sys.stdin, type=argparse.FileType('r')) + sub_doaj_article.add_argument('--issn-map-file', + help="ISSN to ISSN-L mapping file", + default=None, type=argparse.FileType('r')) + sub_doaj_article.add_argument('--kafka-mode', + action='store_true', + help="consume from kafka topic (not stdin)") + sub_doaj_article.add_argument('--do-updates', + action='store_true', + help="update any pre-existing release entities") + sub_doaj_article.set_defaults( + func=run_doaj_article, + auth_var="FATCAT_AUTH_WORKER_DOAJ", + ) + sub_file_meta = subparsers.add_parser('file-meta', help="simple update-only importer for file metadata") sub_file_meta.set_defaults( |