From 801abe6ed54ce8cd7d602d8d2ad4ced5b3502b9f Mon Sep 17 00:00:00 2001 From: Vinay Goel Date: Thu, 21 Jun 2018 21:25:16 +0000 Subject: more importer fixes --- python/client.py | 18 +++++++++++------- python/fatcat/importer_common.py | 4 +++- python/fatcat/issn_importer.py | 1 - 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/python/client.py b/python/client.py index 4f6d3ccc..2804a210 100755 --- a/python/client.py +++ b/python/client.py @@ -6,13 +6,12 @@ from fatcat.raw_api_client import RawFatcatApiClient from fatcat.crossref_importer import FatcatCrossrefImporter from fatcat.orcid_importer import FatcatOrcidImporter from fatcat.manifest_importer import FatcatManifestImporter +from fatcat.issn_importer import FatcatIssnImporter def run_import_crossref(args): - fcc = FatcatCrossrefClient(args.host_url) - fcc.import_crossref_file( - args.json_file, - issn_map_file=args.issn_map_file, + fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file, create_containers=(not args.no_create_containers)) + fci.process_batch(args.json_file, size=args.batch_size) def run_import_orcid(args): foi = FatcatOrcidImporter(args.host_url) @@ -20,7 +19,7 @@ def run_import_orcid(args): def run_import_issn(args): fii = FatcatIssnImporter(args.host_url) - fii.process_batch(args.csv_file, size=args.batch_size) + fii.process_csv_batch(args.csv_file, size=args.batch_size) def run_import_manifest(args): fmi = FatcatManifestImporter(args.host_url) @@ -43,12 +42,17 @@ def main(): sub_import_crossref = subparsers.add_parser('import-crossref') sub_import_crossref.set_defaults(func=run_import_crossref) sub_import_crossref.add_argument('json_file', - help="crossref JSON file to import from") + help="crossref JSON file to import from", + default=sys.stdin, type=argparse.FileType('r')) sub_import_crossref.add_argument('issn_map_file', - help="ISSN to ISSN-L mapping file") + help="ISSN to ISSN-L mapping file", + default=sys.stdin, type=argparse.FileType('r')) sub_import_crossref.add_argument('--no-create-containers', action='store_true', help="skip creation of new container entities based on ISSN") + sub_import_crossref.add_argument('--batch-size', + help="size of batch to send", + default=50, type=int) sub_import_orcid = subparsers.add_parser('import-orcid') sub_import_orcid.set_defaults(func=run_import_orcid) diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py index 9ec64e8f..6f867f70 100644 --- a/python/fatcat/importer_common.py +++ b/python/fatcat/importer_common.py @@ -55,7 +55,7 @@ class FatcatImporter: """Caches calls to the ISSN-L lookup API endpoint in a local dict""" assert len(issnl) == 9 and issnl[4] == '-' if issnl in self._issnl_id_map: - return self._issnl_id_map[issn] + return self._issnl_id_map[issnl] container_id = None try: rv = self.api.lookup_container(issnl=issnl) @@ -98,6 +98,7 @@ class FatcatImporter: return release_id def read_issn_map_file(self, issn_map_file): + print("Loading ISSN map file...") self._issn_issnl_map = dict() for line in issn_map_file: if line.startswith("ISSN") or len(line) == 0: @@ -106,6 +107,7 @@ class FatcatImporter: self._issn_issnl_map[issn] = issnl # double mapping makes lookups easy self._issn_issnl_map[issnl] = issnl + print("Got {} ISSN-L mappings.".format(len(self._issn_issnl_map))) def issn2issnl(self, issn): if issn is None: diff --git a/python/fatcat/issn_importer.py b/python/fatcat/issn_importer.py index 9055bdc2..2bce25b4 100644 --- a/python/fatcat/issn_importer.py +++ b/python/fatcat/issn_importer.py @@ -32,7 +32,6 @@ class FatcatIssnImporter(FatcatImporter): abbrev=None, coden=None, extra=extra) - print(ce) return ce def create_row(self, row, editgroup_id=None): -- cgit v1.2.3