summaryrefslogtreecommitdiffstats
path: root/python/fatcat/importer_common.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat/importer_common.py')
-rw-r--r--python/fatcat/importer_common.py17
1 files changed, 14 insertions, 3 deletions
diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py
index 9d495aa7..e084d8c4 100644
--- a/python/fatcat/importer_common.py
+++ b/python/fatcat/importer_common.py
@@ -1,4 +1,5 @@
+import re
import sys
import csv
import json
@@ -22,6 +23,7 @@ class FatcatImporter:
self._orcid_id_map = dict()
self._doi_id_map = dict()
self._issn_issnl_map = None
+ self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{4}$")
if issn_map_file:
self.read_issn_map_file(issn_map_file)
@@ -54,9 +56,11 @@ class FatcatImporter:
reader = csv.DictReader(source, delimiter=delimiter)
self.process_batch(reader, size)
+ def is_issnl(self, issnl):
+ return len(issnl) == 9 and issnl[4] == '-'
+
def lookup_issnl(self, issnl):
"""Caches calls to the ISSN-L lookup API endpoint in a local dict"""
- assert len(issnl) == 9 and issnl[4] == '-'
if issnl in self._issnl_id_map:
return self._issnl_id_map[issnl]
container_id = None
@@ -69,9 +73,13 @@ class FatcatImporter:
self._issnl_id_map[issnl] = container_id # might be None
return container_id
+ def is_orcid(self, orcid):
+ return self._orcid_regex.match(orcid) != None
+
def lookup_orcid(self, orcid):
"""Caches calls to the Orcid lookup API endpoint in a local dict"""
- assert len(orcid) == 19 and orcid[4] == '-'
+ if not self.is_orcid(orcid):
+ return None
if orcid in self._orcid_id_map:
return self._orcid_id_map[orcid]
creator_id = None
@@ -84,9 +92,12 @@ class FatcatImporter:
self._orcid_id_map[orcid] = creator_id # might be None
return creator_id
+ def is_doi(self, doi):
+ return doi.startswith("10.") and doi.count("/") >= 1
+
def lookup_doi(self, doi):
"""Caches calls to the doi lookup API endpoint in a local dict"""
- assert doi.startswith('10.')
+ assert self.is_doi(doi)
doi = doi.lower()
if doi in self._doi_id_map:
return self._doi_id_map[doi]