aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_import.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-22 11:13:45 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-22 11:33:22 -0700
commit4a3112f9f8de73511f354e7f1ceff3f8e2b7036d (patch)
treeec53dd35b031c69d7ac88a07e325bfab7cb1c688 /python/fatcat_import.py
parentc0faa77cce85ec8ade96927c9ce2ff5dd166aff6 (diff)
downloadfatcat-4a3112f9f8de73511f354e7f1ceff3f8e2b7036d.tar.gz
fatcat-4a3112f9f8de73511f354e7f1ceff3f8e2b7036d.zip
pubmed importer command and tweaks
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-xpython/fatcat_import.py25
1 files changed, 25 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index e80c5d5b..91fa2279 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -31,6 +31,16 @@ def run_arxiv(args):
else:
Bs4XmlFilePusher(ari, args.xml_file, "record").run()
+def run_pubmed(args):
+ pi = PubmedImporter(args.api,
+ args.issn_map_file,
+ edit_batch_size=args.batch_size)
+ if args.kafka_mode:
+ raise NotImplementedError
+ #KafkaBs4XmlPusher(pi, args.kafka_hosts, args.kafka_env, "api-pubmed", "fatcat-import").run()
+ else:
+ Bs4XmlFilePusher(pi, args.xml_file, "PubmedArticle").run()
+
def run_orcid(args):
foi = OrcidImporter(args.api,
edit_batch_size=args.batch_size)
@@ -185,6 +195,21 @@ def main():
action='store_true',
help="consume from kafka topic (not stdin)")
+ sub_pubmed = subparsers.add_parser('pubmed')
+ sub_pubmed.set_defaults(
+ func=run_pubmed,
+ auth_var="FATCAT_AUTH_WORKER_PUBMED",
+ )
+ sub_pubmed.add_argument('xml_file',
+ help="Pubmed XML file to import from",
+ default=sys.stdin, type=argparse.FileType('r'))
+ sub_pubmed.add_argument('issn_map_file',
+ help="ISSN to ISSN-L mapping file",
+ default=None, type=argparse.FileType('r'))
+ sub_pubmed.add_argument('--kafka-mode',
+ action='store_true',
+ help="consume from kafka topic (not stdin)")
+
sub_orcid = subparsers.add_parser('orcid')
sub_orcid.set_defaults(
func=run_orcid,