diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-09-03 13:48:54 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-09-03 13:48:54 -0700 |
commit | ad46c83e87512bc3e6c1fa992c135b7dc3db3ee5 (patch) | |
tree | ec55a91fc3a4466b76298494076e8a3cbe907698 | |
parent | 61a17a30e8592143af0b34f4d55a61f9b213ffc6 (diff) | |
download | chocula-ad46c83e87512bc3e6c1fa992c135b7dc3db3ee5.tar.gz chocula-ad46c83e87512bc3e6c1fa992c135b7dc3db3ee5.zip |
improve fatcat_export metadata quality
-rwxr-xr-x | chocula.py | 15 |
1 files changed, 12 insertions, 3 deletions
@@ -1292,24 +1292,33 @@ class ChoculaDatabase(): for row in self.c.execute('SELECT * FROM journal WHERE valid_issnl = 1'): counts['total'] += 1 + name = row['name'].strip() + if not row['name']: counts['empty-name'] += 1 continue + if len(name) <= 2: + counts['short-name'] += 1 + continue + + publisher = row['publisher'].strip() or None + out = dict( issnl=row['issnl'], wikidata_qid=row['wikidata_qid'], ident=row['fatcat_ident'], - publisher=row['publisher'], - name=row['name'], + publisher=publisher, + name=name, _known_issnl=row['known_issnl']) extra = dict( issnp=row['issnp'], issne=row['issne'], country=row['country'], - lang=row['lang'], ) + if row['lang']: + extra['languages'] = [row['lang'],] if row['sherpa_color']: extra['sherpa_romeo'] = dict(color=row['sherpa_color']) |