diff options
Diffstat (limited to 'python/fatcat/importer_common.py')
-rw-r--r-- | python/fatcat/importer_common.py | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py index 9d495aa7..e084d8c4 100644 --- a/python/fatcat/importer_common.py +++ b/python/fatcat/importer_common.py @@ -1,4 +1,5 @@ +import re import sys import csv import json @@ -22,6 +23,7 @@ class FatcatImporter: self._orcid_id_map = dict() self._doi_id_map = dict() self._issn_issnl_map = None + self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{4}$") if issn_map_file: self.read_issn_map_file(issn_map_file) @@ -54,9 +56,11 @@ class FatcatImporter: reader = csv.DictReader(source, delimiter=delimiter) self.process_batch(reader, size) + def is_issnl(self, issnl): + return len(issnl) == 9 and issnl[4] == '-' + def lookup_issnl(self, issnl): """Caches calls to the ISSN-L lookup API endpoint in a local dict""" - assert len(issnl) == 9 and issnl[4] == '-' if issnl in self._issnl_id_map: return self._issnl_id_map[issnl] container_id = None @@ -69,9 +73,13 @@ class FatcatImporter: self._issnl_id_map[issnl] = container_id # might be None return container_id + def is_orcid(self, orcid): + return self._orcid_regex.match(orcid) != None + def lookup_orcid(self, orcid): """Caches calls to the Orcid lookup API endpoint in a local dict""" - assert len(orcid) == 19 and orcid[4] == '-' + if not self.is_orcid(orcid): + return None if orcid in self._orcid_id_map: return self._orcid_id_map[orcid] creator_id = None @@ -84,9 +92,12 @@ class FatcatImporter: self._orcid_id_map[orcid] = creator_id # might be None return creator_id + def is_doi(self, doi): + return doi.startswith("10.") and doi.count("/") >= 1 + def lookup_doi(self, doi): """Caches calls to the doi lookup API endpoint in a local dict""" - assert doi.startswith('10.') + assert self.is_doi(doi) doi = doi.lower() if doi in self._doi_id_map: return self._doi_id_map[doi] |