From ad46c83e87512bc3e6c1fa992c135b7dc3db3ee5 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 3 Sep 2019 13:48:54 -0700 Subject: improve fatcat_export metadata quality --- chocula.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/chocula.py b/chocula.py index 47cd2ac..72268cc 100755 --- a/chocula.py +++ b/chocula.py @@ -1292,24 +1292,33 @@ class ChoculaDatabase(): for row in self.c.execute('SELECT * FROM journal WHERE valid_issnl = 1'): counts['total'] += 1 + name = row['name'].strip() + if not row['name']: counts['empty-name'] += 1 continue + if len(name) <= 2: + counts['short-name'] += 1 + continue + + publisher = row['publisher'].strip() or None + out = dict( issnl=row['issnl'], wikidata_qid=row['wikidata_qid'], ident=row['fatcat_ident'], - publisher=row['publisher'], - name=row['name'], + publisher=publisher, + name=name, _known_issnl=row['known_issnl']) extra = dict( issnp=row['issnp'], issne=row['issne'], country=row['country'], - lang=row['lang'], ) + if row['lang']: + extra['languages'] = [row['lang'],] if row['sherpa_color']: extra['sherpa_romeo'] = dict(color=row['sherpa_color']) -- cgit v1.2.3