diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 11:13:45 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-22 11:33:22 -0700 |
commit | 4a3112f9f8de73511f354e7f1ceff3f8e2b7036d (patch) | |
tree | ec53dd35b031c69d7ac88a07e325bfab7cb1c688 /python/fatcat_import.py | |
parent | c0faa77cce85ec8ade96927c9ce2ff5dd166aff6 (diff) | |
download | fatcat-4a3112f9f8de73511f354e7f1ceff3f8e2b7036d.tar.gz fatcat-4a3112f9f8de73511f354e7f1ceff3f8e2b7036d.zip |
pubmed importer command and tweaks
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index e80c5d5b..91fa2279 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -31,6 +31,16 @@ def run_arxiv(args): else: Bs4XmlFilePusher(ari, args.xml_file, "record").run() +def run_pubmed(args): + pi = PubmedImporter(args.api, + args.issn_map_file, + edit_batch_size=args.batch_size) + if args.kafka_mode: + raise NotImplementedError + #KafkaBs4XmlPusher(pi, args.kafka_hosts, args.kafka_env, "api-pubmed", "fatcat-import").run() + else: + Bs4XmlFilePusher(pi, args.xml_file, "PubmedArticle").run() + def run_orcid(args): foi = OrcidImporter(args.api, edit_batch_size=args.batch_size) @@ -185,6 +195,21 @@ def main(): action='store_true', help="consume from kafka topic (not stdin)") + sub_pubmed = subparsers.add_parser('pubmed') + sub_pubmed.set_defaults( + func=run_pubmed, + auth_var="FATCAT_AUTH_WORKER_PUBMED", + ) + sub_pubmed.add_argument('xml_file', + help="Pubmed XML file to import from", + default=sys.stdin, type=argparse.FileType('r')) + sub_pubmed.add_argument('issn_map_file', + help="ISSN to ISSN-L mapping file", + default=None, type=argparse.FileType('r')) + sub_pubmed.add_argument('--kafka-mode', + action='store_true', + help="consume from kafka topic (not stdin)") + sub_orcid = subparsers.add_parser('orcid') sub_orcid.set_defaults( func=run_orcid, |