From f6f7450903bdbe36bd5fff146b942e34ad221557 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 25 Jan 2019 18:41:33 -0800 Subject: transform and import fixes/tweaks --- python/fatcat_tools/importers/common.py | 4 ++-- python/fatcat_tools/importers/journal_metadata.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'python/fatcat_tools/importers') diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index ebdce56f..a29b3019 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -236,8 +236,8 @@ class EntityImporter: self._entity_queue.append(entity) if len(self._entity_queue) >= self.edit_batch_size: self.insert_batch(self._entity_queue) - self.counts['insert'] += len(_entity_queue) - self._entity_queue = 0 + self.counts['insert'] += len(self._entity_queue) + self._entity_queue = [] def want(self, raw_record): """ diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index 7f6b1ee8..be62d63a 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -44,7 +44,7 @@ class JournalMetadataImporter(EntityImporter): editgroup_extra=eg_extra) def want(self, raw_record): - if raw_record.get('issnl'): + if raw_record.get('issnl') and raw_record.get('name'): return True return False @@ -55,6 +55,10 @@ class JournalMetadataImporter(EntityImporter): returns a ContainerEntity (or None if invalid or couldn't parse) """ + if not row.get('name'): + # Name is required (by schema) + return None + extra = dict() for key in ('issne', 'issnp', 'languages', 'country', 'urls', 'abbrev', 'coden', 'aliases', 'original_name', 'first_year', 'last_year', @@ -76,8 +80,10 @@ class JournalMetadataImporter(EntityImporter): extra_ia = dict() # TODO: would like an ia.longtail_ia flag if row.get('sim'): + # NB: None case of the .get() here is blech, but othrwise + # extra['ia'].get('sim') would be false-y, breaking 'any_ia_sim' later on extra_ia['sim'] = { - 'year_spans': row['sim']['year_spans'], + 'year_spans': row['sim'].get('year_spans'), } if extra_ia: extra['ia'] = extra_ia -- cgit v1.2.3