diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 17:19:58 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 17:19:58 -0700 |
commit | 90560ba18ac042a23db6995cc07a5cef024bc179 (patch) | |
tree | 64f38160b6b6efe26b58f33bb8d50f156119e7d6 /python/fatcat_import.py | |
parent | c9c830256315066afdc619eeaba5b234de89468e (diff) | |
download | fatcat-90560ba18ac042a23db6995cc07a5cef024bc179.tar.gz fatcat-90560ba18ac042a23db6995cc07a5cef024bc179.zip |
JALC bulk file importer
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 6b1a10b1..94c90ea5 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -16,6 +16,12 @@ def run_crossref(args): else: JsonLinePusher(fci, args.json_file).run() +def run_jalc(args): + ji = JalcImporter(args.api, + args.issn_map_file, + extid_map_file=args.extid_map_file) + Bs4XmlLinesPusher(ji, args.xml_file, "<rdf:Description").run() + def run_orcid(args): foi = OrcidImporter(args.api, edit_batch_size=args.batch_size) @@ -143,6 +149,21 @@ def main(): action='store_true', help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)") + sub_jalc = subparsers.add_parser('jalc') + sub_jalc.set_defaults( + func=run_jalc, + auth_var="FATCAT_AUTH_WORKER_JALC", + ) + sub_jalc.add_argument('xml_file', + help="Jalc RDF XML file (record-per-line) to import from", + default=sys.stdin, type=argparse.FileType('r')) + sub_jalc.add_argument('issn_map_file', + help="ISSN to ISSN-L mapping file", + default=None, type=argparse.FileType('r')) + sub_jalc.add_argument('--extid-map-file', + help="DOI-to-other-identifiers sqlite3 database", + default=None, type=str) + sub_orcid = subparsers.add_parser('orcid') sub_orcid.set_defaults( func=run_orcid, |