diff options
| author | Vinay Goel <vinay@archive.org> | 2018-06-21 21:25:16 +0000 | 
|---|---|---|
| committer | Vinay Goel <vinay@archive.org> | 2018-06-21 21:25:16 +0000 | 
| commit | 801abe6ed54ce8cd7d602d8d2ad4ced5b3502b9f (patch) | |
| tree | b8357e7531203ea5540e0c06df5406cf1fc79650 /python | |
| parent | 5b376b98ca6fe8e9f75a42c807786a2aa9a52d94 (diff) | |
| download | fatcat-801abe6ed54ce8cd7d602d8d2ad4ced5b3502b9f.tar.gz fatcat-801abe6ed54ce8cd7d602d8d2ad4ced5b3502b9f.zip | |
more importer fixes
Diffstat (limited to 'python')
| -rwxr-xr-x | python/client.py | 18 | ||||
| -rw-r--r-- | python/fatcat/importer_common.py | 4 | ||||
| -rw-r--r-- | python/fatcat/issn_importer.py | 1 | 
3 files changed, 14 insertions, 9 deletions
| diff --git a/python/client.py b/python/client.py index 4f6d3ccc..2804a210 100755 --- a/python/client.py +++ b/python/client.py @@ -6,13 +6,12 @@ from fatcat.raw_api_client import RawFatcatApiClient  from fatcat.crossref_importer import FatcatCrossrefImporter  from fatcat.orcid_importer import FatcatOrcidImporter  from fatcat.manifest_importer import FatcatManifestImporter +from fatcat.issn_importer import FatcatIssnImporter  def run_import_crossref(args): -    fcc = FatcatCrossrefClient(args.host_url) -    fcc.import_crossref_file( -        args.json_file, -        issn_map_file=args.issn_map_file, +    fci = FatcatCrossrefImporter(args.host_url, args.issn_map_file,          create_containers=(not args.no_create_containers)) +    fci.process_batch(args.json_file, size=args.batch_size)  def run_import_orcid(args):      foi = FatcatOrcidImporter(args.host_url) @@ -20,7 +19,7 @@ def run_import_orcid(args):  def run_import_issn(args):      fii = FatcatIssnImporter(args.host_url) -    fii.process_batch(args.csv_file, size=args.batch_size) +    fii.process_csv_batch(args.csv_file, size=args.batch_size)  def run_import_manifest(args):      fmi = FatcatManifestImporter(args.host_url) @@ -43,12 +42,17 @@ def main():      sub_import_crossref = subparsers.add_parser('import-crossref')      sub_import_crossref.set_defaults(func=run_import_crossref)      sub_import_crossref.add_argument('json_file', -        help="crossref JSON file to import from") +        help="crossref JSON file to import from", +        default=sys.stdin, type=argparse.FileType('r'))      sub_import_crossref.add_argument('issn_map_file', -        help="ISSN to ISSN-L mapping file") +        help="ISSN to ISSN-L mapping file", +        default=sys.stdin, type=argparse.FileType('r'))      sub_import_crossref.add_argument('--no-create-containers',          action='store_true',          help="skip creation of new container entities based on ISSN") +    sub_import_crossref.add_argument('--batch-size', +        help="size of batch to send", +        default=50, type=int)      sub_import_orcid = subparsers.add_parser('import-orcid')      sub_import_orcid.set_defaults(func=run_import_orcid) diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py index 9ec64e8f..6f867f70 100644 --- a/python/fatcat/importer_common.py +++ b/python/fatcat/importer_common.py @@ -55,7 +55,7 @@ class FatcatImporter:          """Caches calls to the ISSN-L lookup API endpoint in a local dict"""          assert len(issnl) == 9 and issnl[4] == '-'          if issnl in self._issnl_id_map: -            return self._issnl_id_map[issn] +            return self._issnl_id_map[issnl]          container_id = None          try:              rv = self.api.lookup_container(issnl=issnl) @@ -98,6 +98,7 @@ class FatcatImporter:          return release_id      def read_issn_map_file(self, issn_map_file): +        print("Loading ISSN map file...")          self._issn_issnl_map = dict()          for line in issn_map_file:              if line.startswith("ISSN") or len(line) == 0: @@ -106,6 +107,7 @@ class FatcatImporter:              self._issn_issnl_map[issn] = issnl              # double mapping makes lookups easy              self._issn_issnl_map[issnl] = issnl +        print("Got {} ISSN-L mappings.".format(len(self._issn_issnl_map)))      def issn2issnl(self, issn):          if issn is None: diff --git a/python/fatcat/issn_importer.py b/python/fatcat/issn_importer.py index 9055bdc2..2bce25b4 100644 --- a/python/fatcat/issn_importer.py +++ b/python/fatcat/issn_importer.py @@ -32,7 +32,6 @@ class FatcatIssnImporter(FatcatImporter):              abbrev=None,              coden=None,              extra=extra) -        print(ce)          return ce      def create_row(self, row, editgroup_id=None): | 
