aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-09-03 13:48:54 -0700
committerBryan Newbold <bnewbold@archive.org>2019-09-03 13:48:54 -0700
commitad46c83e87512bc3e6c1fa992c135b7dc3db3ee5 (patch)
treeec55a91fc3a4466b76298494076e8a3cbe907698
parent61a17a30e8592143af0b34f4d55a61f9b213ffc6 (diff)
downloadchocula-ad46c83e87512bc3e6c1fa992c135b7dc3db3ee5.tar.gz
chocula-ad46c83e87512bc3e6c1fa992c135b7dc3db3ee5.zip
improve fatcat_export metadata quality
-rwxr-xr-xchocula.py15
1 files changed, 12 insertions, 3 deletions
diff --git a/chocula.py b/chocula.py
index 47cd2ac..72268cc 100755
--- a/chocula.py
+++ b/chocula.py
@@ -1292,24 +1292,33 @@ class ChoculaDatabase():
for row in self.c.execute('SELECT * FROM journal WHERE valid_issnl = 1'):
counts['total'] += 1
+ name = row['name'].strip()
+
if not row['name']:
counts['empty-name'] += 1
continue
+ if len(name) <= 2:
+ counts['short-name'] += 1
+ continue
+
+ publisher = row['publisher'].strip() or None
+
out = dict(
issnl=row['issnl'],
wikidata_qid=row['wikidata_qid'],
ident=row['fatcat_ident'],
- publisher=row['publisher'],
- name=row['name'],
+ publisher=publisher,
+ name=name,
_known_issnl=row['known_issnl'])
extra = dict(
issnp=row['issnp'],
issne=row['issne'],
country=row['country'],
- lang=row['lang'],
)
+ if row['lang']:
+ extra['languages'] = [row['lang'],]
if row['sherpa_color']:
extra['sherpa_romeo'] = dict(color=row['sherpa_color'])