From 7d1ac51fb1b67f64f03e4c6e943202085cd4faa9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 2 Dec 2020 11:29:03 -0800 Subject: initial implementation of dblp release importer (in progress) --- python/fatcat_import.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'python/fatcat_import.py') diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 6c9e65a8..5ee81b92 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -273,6 +273,19 @@ def run_doaj_article(args): else: JsonLinePusher(dai, args.json_file).run() +def run_dblp_release(args): + dwi = DblpReleaseImporter(args.api, + args.issn_map_file, + edit_batch_size=args.batch_size, + do_updates=args.do_updates, + ) + Bs4XmlLargeFilePusher( + dwi, + args.xml_file, + DblpReleaseImporter.ELEMENT_TYPES, + use_lxml=True, + ).run() + def run_file_meta(args): # do_updates defaults to true for this importer fmi = FileMetaImporter(args.api, @@ -642,6 +655,22 @@ def main(): auth_var="FATCAT_AUTH_WORKER_DOAJ", ) + sub_dblp_release = subparsers.add_parser('dblp-release', + help="import dblp release metadata") + sub_dblp_release.add_argument('xml_file', + help="File with DBLP XML to import from", + default=sys.stdin, type=argparse.FileType('rb')) + sub_dblp_release.add_argument('--issn-map-file', + help="ISSN to ISSN-L mapping file", + default=None, type=argparse.FileType('r')) + sub_dblp_release.add_argument('--do-updates', + action='store_true', + help="update any pre-existing release entities") + sub_dblp_release.set_defaults( + func=run_dblp_release, + auth_var="FATCAT_AUTH_WORKER_DBLP", + ) + sub_file_meta = subparsers.add_parser('file-meta', help="simple update-only importer for file metadata") sub_file_meta.set_defaults( -- cgit v1.2.3