summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-06-21 17:02:24 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-06-21 17:02:24 -0700
commit7b327b742458df72691c6cc9311af74091719ce1 (patch)
tree3f92ffc66ba07470b25d78051af2a9c61c3833a5
parent801abe6ed54ce8cd7d602d8d2ad4ced5b3502b9f (diff)
downloadfatcat-7b327b742458df72691c6cc9311af74091719ce1.tar.gz
fatcat-7b327b742458df72691c6cc9311af74091719ce1.zip
tweak CSV null fields as None, not ''
-rw-r--r--python/fatcat/issn_importer.py33
1 files changed, 22 insertions, 11 deletions
diff --git a/python/fatcat/issn_importer.py b/python/fatcat/issn_importer.py
index 2bce25b4..69db8e66 100644
--- a/python/fatcat/issn_importer.py
+++ b/python/fatcat/issn_importer.py
@@ -8,6 +8,13 @@ from fatcat.importer_common import FatcatImporter
# CSV format (generated from git.archive.org/webgroup/oa-journal-analysis):
# ISSN-L,in_doaj,in_road,in_norwegian,in_crossref,title,publisher,url,lang,ISSN-print,ISSN-electronic,doi_count,has_doi,is_oa,is_kept,publisher_size,url_live,url_live_status,url_live_final_status,url_live_final_url,url_live_status_simple,url_live_final_status_simple,url_domain,gwb_pdf_count
+def or_none(s):
+ if s is None:
+ return None
+ if len(s) == 0:
+ return None
+ return s
+
class FatcatIssnImporter(FatcatImporter):
def parse_issn_row(self, row):
@@ -15,20 +22,24 @@ class FatcatIssnImporter(FatcatImporter):
row is a python dict (parsed from CSV).
returns a ContainerEntity
"""
+ title = or_none(row['title'])
+ issnl = or_none(row['ISSN-L'])
+ if title is None or issnl is None:
+ return
extra = dict(
- in_doaj=row['in_doaj'],
- in_road=row['in_road'],
- language=row['lang'],
- url=row['url'],
- ISSNp=row['ISSN-print'],
- ISSNe=row['ISSN-electronic'],
- is_oa=row['is_oa'],
- is_kept=row['is_kept'],
+ in_doaj=bool(row['in_doaj']),
+ in_road=bool(row['in_road']),
+ language=or_none(row['lang']),
+ url=or_none(row['url']),
+ ISSNp=or_none(row['ISSN-print']),
+ ISSNe=or_none(row['ISSN-electronic']),
+ is_oa=bool(row['is_oa']),
+ is_kept=bool(row['is_kept']),
)
ce = fatcat_client.ContainerEntity(
- issnl=row['ISSN-L'],
- name=row['title'],
- publisher=row['publisher'],
+ issnl=issnl,
+ name=title,
+ publisher=or_none(row['publisher']),
abbrev=None,
coden=None,
extra=extra)