aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-25 18:41:33 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-25 18:41:33 -0800
commitf6f7450903bdbe36bd5fff146b942e34ad221557 (patch)
treec50332c832f414b5c0070e58a42ceb4751ed4d81 /python/fatcat_tools/importers
parent16256f8ed119c072c09b13b0b1a6d4a56bed5113 (diff)
downloadfatcat-f6f7450903bdbe36bd5fff146b942e34ad221557.tar.gz
fatcat-f6f7450903bdbe36bd5fff146b942e34ad221557.zip
transform and import fixes/tweaks
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/common.py4
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py10
2 files changed, 10 insertions, 4 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index ebdce56f..a29b3019 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -236,8 +236,8 @@ class EntityImporter:
self._entity_queue.append(entity)
if len(self._entity_queue) >= self.edit_batch_size:
self.insert_batch(self._entity_queue)
- self.counts['insert'] += len(_entity_queue)
- self._entity_queue = 0
+ self.counts['insert'] += len(self._entity_queue)
+ self._entity_queue = []
def want(self, raw_record):
"""
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index 7f6b1ee8..be62d63a 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -44,7 +44,7 @@ class JournalMetadataImporter(EntityImporter):
editgroup_extra=eg_extra)
def want(self, raw_record):
- if raw_record.get('issnl'):
+ if raw_record.get('issnl') and raw_record.get('name'):
return True
return False
@@ -55,6 +55,10 @@ class JournalMetadataImporter(EntityImporter):
returns a ContainerEntity (or None if invalid or couldn't parse)
"""
+ if not row.get('name'):
+ # Name is required (by schema)
+ return None
+
extra = dict()
for key in ('issne', 'issnp', 'languages', 'country', 'urls', 'abbrev',
'coden', 'aliases', 'original_name', 'first_year', 'last_year',
@@ -76,8 +80,10 @@ class JournalMetadataImporter(EntityImporter):
extra_ia = dict()
# TODO: would like an ia.longtail_ia flag
if row.get('sim'):
+ # NB: None case of the .get() here is blech, but othrwise
+ # extra['ia'].get('sim') would be false-y, breaking 'any_ia_sim' later on
extra_ia['sim'] = {
- 'year_spans': row['sim']['year_spans'],
+ 'year_spans': row['sim'].get('year_spans'),
}
if extra_ia:
extra['ia'] = extra_ia