aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_import.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-12-17 01:55:30 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-12-17 23:03:08 -0800
commit58ff361eb481bee9d2ef7249f48f94729d2a830d (patch)
tree9cbcbcf1c7eafa958c81d18fcf5b6a5d27f2f42d /python/fatcat_import.py
parentb2f53077bd05a536b5fdb551755a559b653421d3 (diff)
downloadfatcat-58ff361eb481bee9d2ef7249f48f94729d2a830d.tar.gz
fatcat-58ff361eb481bee9d2ef7249f48f94729d2a830d.zip
very simple dblp container importer
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-xpython/fatcat_import.py36
1 files changed, 34 insertions, 2 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index 90b53e5c..1dcfec21 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -274,19 +274,29 @@ def run_doaj_article(args):
JsonLinePusher(dai, args.json_file).run()
def run_dblp_release(args):
- dwi = DblpReleaseImporter(args.api,
+ dri = DblpReleaseImporter(args.api,
dblp_container_map_file=args.dblp_container_map_file,
edit_batch_size=args.batch_size,
do_updates=args.do_updates,
dump_json_mode=args.dump_json_mode,
)
Bs4XmlLargeFilePusher(
- dwi,
+ dri,
args.xml_file,
DblpReleaseImporter.ELEMENT_TYPES,
use_lxml=True,
).run()
+def run_dblp_container(args):
+ dci = DblpContainerImporter(args.api,
+ args.issn_map_file,
+ dblp_container_map_file=args.dblp_container_map_file,
+ dblp_container_map_output=args.dblp_container_map_output,
+ edit_batch_size=args.batch_size,
+ do_updates=args.do_updates,
+ )
+ JsonLinePusher(dci, args.json_file).run()
+
def run_file_meta(args):
# do_updates defaults to true for this importer
fmi = FileMetaImporter(args.api,
@@ -675,6 +685,28 @@ def main():
auth_var="FATCAT_AUTH_WORKER_DBLP",
)
+ sub_dblp_container = subparsers.add_parser('dblp-container',
+ help="import dblp container metadata")
+ sub_dblp_container.add_argument('json_file',
+ help="File with DBLP container JSON to import from (see extra/dblp/)",
+ default=sys.stdin, type=argparse.FileType('rb'))
+ sub_dblp_container.add_argument('--dblp-container-map-file',
+ help="file path to dblp pre-existing prefix to container_id TSV file",
+ default=None, type=argparse.FileType('r'))
+ sub_dblp_container.add_argument('--dblp-container-map-output',
+ help="file path to output new dblp container map TSV to",
+ default=None, type=argparse.FileType('w'))
+ sub_dblp_container.add_argument('--issn-map-file',
+ help="ISSN to ISSN-L mapping file",
+ default=None, type=argparse.FileType('r'))
+ sub_dblp_container.add_argument('--do-updates',
+ action='store_true',
+ help="update any pre-existing container entities")
+ sub_dblp_container.set_defaults(
+ func=run_dblp_container,
+ auth_var="FATCAT_AUTH_WORKER_DBLP",
+ )
+
sub_file_meta = subparsers.add_parser('file-meta',
help="simple update-only importer for file metadata")
sub_file_meta.set_defaults(