diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/journal_metadata.py | 10 | 
2 files changed, 10 insertions, 4 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index ebdce56f..a29b3019 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -236,8 +236,8 @@ class EntityImporter:          self._entity_queue.append(entity)          if len(self._entity_queue) >= self.edit_batch_size:              self.insert_batch(self._entity_queue) -            self.counts['insert'] += len(_entity_queue) -            self._entity_queue = 0 +            self.counts['insert'] += len(self._entity_queue) +            self._entity_queue = []      def want(self, raw_record):          """ diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index 7f6b1ee8..be62d63a 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -44,7 +44,7 @@ class JournalMetadataImporter(EntityImporter):              editgroup_extra=eg_extra)      def want(self, raw_record): -        if raw_record.get('issnl'): +        if raw_record.get('issnl') and raw_record.get('name'):              return True          return False @@ -55,6 +55,10 @@ class JournalMetadataImporter(EntityImporter):          returns a ContainerEntity (or None if invalid or couldn't parse)          """ +        if not row.get('name'): +            # Name is required (by schema) +            return None +          extra = dict()          for key in ('issne', 'issnp', 'languages', 'country', 'urls', 'abbrev',              'coden', 'aliases', 'original_name', 'first_year', 'last_year', @@ -76,8 +80,10 @@ class JournalMetadataImporter(EntityImporter):          extra_ia = dict()          # TODO: would like an ia.longtail_ia flag          if row.get('sim'): +            # NB: None case of the .get() here is blech, but othrwise +            # extra['ia'].get('sim') would be false-y, breaking 'any_ia_sim' later on              extra_ia['sim'] = { -                'year_spans': row['sim']['year_spans'], +                'year_spans': row['sim'].get('year_spans'),              }          if extra_ia:              extra['ia'] = extra_ia  | 
