diff options
Diffstat (limited to 'python/fatcat_tools/importers/journal_metadata.py')
| -rw-r--r-- | python/fatcat_tools/importers/journal_metadata.py | 49 | 
1 files changed, 33 insertions, 16 deletions
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index cd058889..ff38cd77 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -3,7 +3,7 @@ import sys  import json  import itertools  import fatcat_client -from .common import FatcatImporter +from .common import EntityImporter  def or_none(s): @@ -25,7 +25,7 @@ def truthy(s):      else:          return None -class JournalMetadataImporter(FatcatImporter): +class JournalMetadataImporter(EntityImporter):      """      Imports journal metadata ("containers") by ISSN, currently from a custom      (data munged) .csv file format @@ -45,7 +45,12 @@ class JournalMetadataImporter(FatcatImporter):              editgroup_description=eg_desc,              editgroup_extra=eg_extra) -    def parse_journal_metadata_row(self, row): +    def want(self, raw_record): +        if raw_record.get('ISSN-L'): +            return True +        return False + +    def parse_record(self, row):          """          row is a python dict (parsed from CSV).          returns a ContainerEntity (or None if invalid or couldn't parse) @@ -72,16 +77,28 @@ class JournalMetadataImporter(FatcatImporter):              extra=extra)          return ce -    def create_row(self, row, editgroup_id=None): -        ce = self.parse_journal_metadata_row(row) -        if ce is not None: -            self.api.create_container(ce, editgroup_id=editgroup_id) -            self.counts['insert'] += 1 - -    def create_batch(self, batch): -        """Reads and processes in batches (not API-call-per-line)""" -        objects = [self.parse_journal_metadata_row(l) -                   for l in batch if (l is not None)] -        objects = [o for o in objects if (o is not None)] -        self.api.create_container_batch(objects, autoaccept=True) -        self.counts['insert'] += len(objects) +    def try_update(self, ce): + +        existing = None +        try: +            existing = self.api.lookup_container(issnl=ce.issnl) +        except fatcat_client.rest.ApiException as err: +            if err.status != 404: +                raise err +            # doesn't exist, need to update +            return True + +        # eventually we'll want to support "updates", but for now just skip if +        # entity already exists +        if existing: +            self.counts['exists'] += 1 +            return False +         +        return True + +    def insert_batch(self, batch): +        self.api.create_container_batch(batch, +            autoaccept=True, +            description=self.editgroup_description, +            extra=json.dumps(self.editgroup_extra)) +  | 
