aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-08-15 21:47:00 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-08-15 21:47:00 -0700
commit4c11f65f202ef8f71bfd640232ed30ccd6f4c3a4 (patch)
treecfa76395e59cbcf0c05f5eb79af2e2013f231080 /python/fatcat
parent18821fcbfa9eb38ee0fb0b065d6642b461fed021 (diff)
downloadfatcat-4c11f65f202ef8f71bfd640232ed30ccd6f4c3a4.tar.gz
fatcat-4c11f65f202ef8f71bfd640232ed30ccd6f4c3a4.zip
improve handling of invalid identifiers
Diffstat (limited to 'python/fatcat')
-rw-r--r--python/fatcat/importer_common.py17
-rw-r--r--python/fatcat/orcid_importer.py7
2 files changed, 19 insertions, 5 deletions
diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py
index 9d495aa7..e084d8c4 100644
--- a/python/fatcat/importer_common.py
+++ b/python/fatcat/importer_common.py
@@ -1,4 +1,5 @@
+import re
import sys
import csv
import json
@@ -22,6 +23,7 @@ class FatcatImporter:
self._orcid_id_map = dict()
self._doi_id_map = dict()
self._issn_issnl_map = None
+ self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{4}$")
if issn_map_file:
self.read_issn_map_file(issn_map_file)
@@ -54,9 +56,11 @@ class FatcatImporter:
reader = csv.DictReader(source, delimiter=delimiter)
self.process_batch(reader, size)
+ def is_issnl(self, issnl):
+ return len(issnl) == 9 and issnl[4] == '-'
+
def lookup_issnl(self, issnl):
"""Caches calls to the ISSN-L lookup API endpoint in a local dict"""
- assert len(issnl) == 9 and issnl[4] == '-'
if issnl in self._issnl_id_map:
return self._issnl_id_map[issnl]
container_id = None
@@ -69,9 +73,13 @@ class FatcatImporter:
self._issnl_id_map[issnl] = container_id # might be None
return container_id
+ def is_orcid(self, orcid):
+ return self._orcid_regex.match(orcid) != None
+
def lookup_orcid(self, orcid):
"""Caches calls to the Orcid lookup API endpoint in a local dict"""
- assert len(orcid) == 19 and orcid[4] == '-'
+ if not self.is_orcid(orcid):
+ return None
if orcid in self._orcid_id_map:
return self._orcid_id_map[orcid]
creator_id = None
@@ -84,9 +92,12 @@ class FatcatImporter:
self._orcid_id_map[orcid] = creator_id # might be None
return creator_id
+ def is_doi(self, doi):
+ return doi.startswith("10.") and doi.count("/") >= 1
+
def lookup_doi(self, doi):
"""Caches calls to the doi lookup API endpoint in a local dict"""
- assert doi.startswith('10.')
+ assert self.is_doi(doi)
doi = doi.lower()
if doi in self._doi_id_map:
return self._doi_id_map[doi]
diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py
index 69b184d5..e57703d5 100644
--- a/python/fatcat/orcid_importer.py
+++ b/python/fatcat/orcid_importer.py
@@ -5,7 +5,6 @@ import itertools
import fatcat_client
from fatcat.importer_common import FatcatImporter
-
def value_or_none(e):
if type(e) == dict:
e = e.get('value')
@@ -46,8 +45,12 @@ class FatcatOrcidImporter(FatcatImporter):
else:
# must have *some* name
return None
+ orcid = obj['orcid-identifier']['path']
+ if not self.is_orcid(orcid):
+ sys.stderr.write("Bad ORCID: {}\n".format(orcid))
+ return None
ce = fatcat_client.CreatorEntity(
- orcid=obj['orcid-identifier']['path'],
+ orcid=orcid,
given_name=given,
surname=sur,
display_name=display,