diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 19:15:25 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 19:15:25 -0700 |
commit | af1f65b980a4faf01004b617a7c34f80c380132b (patch) | |
tree | 6f51d52f57fde6e595b0a460c8d46f4eba063c82 /python/fatcat_import.py | |
parent | 02b3a659a951a6611eed59ad5078d5889119bedd (diff) | |
download | fatcat-af1f65b980a4faf01004b617a7c34f80c380132b.tar.gz fatcat-af1f65b980a4faf01004b617a7c34f80c380132b.zip |
arxiv importer robustification and CLI impl
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 94c90ea5..e80c5d5b 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -22,6 +22,15 @@ def run_jalc(args): extid_map_file=args.extid_map_file) Bs4XmlLinesPusher(ji, args.xml_file, "<rdf:Description").run() +def run_arxiv(args): + ari = ArxivRawImporter(args.api, + edit_batch_size=args.batch_size) + if args.kafka_mode: + raise NotImplementedError + #KafkaBs4XmlPusher(ari, args.kafka_hosts, args.kafka_env, "api-arxiv", "fatcat-import").run() + else: + Bs4XmlFilePusher(ari, args.xml_file, "record").run() + def run_orcid(args): foi = OrcidImporter(args.api, edit_batch_size=args.batch_size) @@ -164,6 +173,18 @@ def main(): help="DOI-to-other-identifiers sqlite3 database", default=None, type=str) + sub_arxiv = subparsers.add_parser('arxiv') + sub_arxiv.set_defaults( + func=run_arxiv, + auth_var="FATCAT_AUTH_WORKER_ARXIV", + ) + sub_arxiv.add_argument('xml_file', + help="arXivRaw XML file to import from", + default=sys.stdin, type=argparse.FileType('r')) + sub_arxiv.add_argument('--kafka-mode', + action='store_true', + help="consume from kafka topic (not stdin)") + sub_orcid = subparsers.add_parser('orcid') sub_orcid.set_defaults( func=run_orcid, |