summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVinay Goel <vinay@archive.org>2018-06-21 21:25:16 +0000
committerVinay Goel <vinay@archive.org>2018-06-21 21:25:16 +0000
commit801abe6ed54ce8cd7d602d8d2ad4ced5b3502b9f (patch)
treeb8357e7531203ea5540e0c06df5406cf1fc79650
parent5b376b98ca6fe8e9f75a42c807786a2aa9a52d94 (diff)
downloadfatcat-801abe6ed54ce8cd7d602d8d2ad4ced5b3502b9f.tar.gz
fatcat-801abe6ed54ce8cd7d602d8d2ad4ced5b3502b9f.zip
more importer fixes
-rwxr-xr-xpython/client.py18
-rw-r--r--python/fatcat/importer_common.py4
-rw-r--r--python/fatcat/issn_importer.py1
3 files changed, 14 insertions, 9 deletions
diff --git a/python/client.py b/python/client.py
index 4f6d3ccc..2804a210 100755
--- a/python/client.py
+++ b/python/client.py
@@ -6,13 +6,12 @@ from fatcat.raw_api_client import RawFatcatApiClient
from fatcat.crossref_importer import FatcatCrossrefImporter
from fatcat.orcid_importer import FatcatOrcidImporter
from fatcat.manifest_importer import FatcatManifestImporter
+from fatcat.issn_importer import FatcatIssnImporter
def run_import_crossref(args):
- fcc = FatcatCrossrefClient(args.host_url)
- fcc.import_crossref_file(
- args.json_file,
- issn_map_file=args.issn_map_file,
+ fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file,
create_containers=(not args.no_create_containers))
+ fci.process_batch(args.json_file, size=args.batch_size)
def run_import_orcid(args):
foi = FatcatOrcidImporter(args.host_url)
@@ -20,7 +19,7 @@ def run_import_orcid(args):
def run_import_issn(args):
fii = FatcatIssnImporter(args.host_url)
- fii.process_batch(args.csv_file, size=args.batch_size)
+ fii.process_csv_batch(args.csv_file, size=args.batch_size)
def run_import_manifest(args):
fmi = FatcatManifestImporter(args.host_url)
@@ -43,12 +42,17 @@ def main():
sub_import_crossref = subparsers.add_parser('import-crossref')
sub_import_crossref.set_defaults(func=run_import_crossref)
sub_import_crossref.add_argument('json_file',
- help="crossref JSON file to import from")
+ help="crossref JSON file to import from",
+ default=sys.stdin, type=argparse.FileType('r'))
sub_import_crossref.add_argument('issn_map_file',
- help="ISSN to ISSN-L mapping file")
+ help="ISSN to ISSN-L mapping file",
+ default=sys.stdin, type=argparse.FileType('r'))
sub_import_crossref.add_argument('--no-create-containers',
action='store_true',
help="skip creation of new container entities based on ISSN")
+ sub_import_crossref.add_argument('--batch-size',
+ help="size of batch to send",
+ default=50, type=int)
sub_import_orcid = subparsers.add_parser('import-orcid')
sub_import_orcid.set_defaults(func=run_import_orcid)
diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py
index 9ec64e8f..6f867f70 100644
--- a/python/fatcat/importer_common.py
+++ b/python/fatcat/importer_common.py
@@ -55,7 +55,7 @@ class FatcatImporter:
"""Caches calls to the ISSN-L lookup API endpoint in a local dict"""
assert len(issnl) == 9 and issnl[4] == '-'
if issnl in self._issnl_id_map:
- return self._issnl_id_map[issn]
+ return self._issnl_id_map[issnl]
container_id = None
try:
rv = self.api.lookup_container(issnl=issnl)
@@ -98,6 +98,7 @@ class FatcatImporter:
return release_id
def read_issn_map_file(self, issn_map_file):
+ print("Loading ISSN map file...")
self._issn_issnl_map = dict()
for line in issn_map_file:
if line.startswith("ISSN") or len(line) == 0:
@@ -106,6 +107,7 @@ class FatcatImporter:
self._issn_issnl_map[issn] = issnl
# double mapping makes lookups easy
self._issn_issnl_map[issnl] = issnl
+ print("Got {} ISSN-L mappings.".format(len(self._issn_issnl_map)))
def issn2issnl(self, issn):
if issn is None:
diff --git a/python/fatcat/issn_importer.py b/python/fatcat/issn_importer.py
index 9055bdc2..2bce25b4 100644
--- a/python/fatcat/issn_importer.py
+++ b/python/fatcat/issn_importer.py
@@ -32,7 +32,6 @@ class FatcatIssnImporter(FatcatImporter):
abbrev=None,
coden=None,
extra=extra)
- print(ce)
return ce
def create_row(self, row, editgroup_id=None):