summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/orcid.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/orcid.py')
-rw-r--r--python/fatcat_tools/importers/orcid.py73
1 files changed, 73 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
new file mode 100644
index 00000000..350c4c57
--- /dev/null
+++ b/python/fatcat_tools/importers/orcid.py
@@ -0,0 +1,73 @@
+
+import sys
+import json
+import itertools
+import fatcat_client
+from fatcat_tools.importers.common import FatcatImporter
+
+def value_or_none(e):
+ if type(e) == dict:
+ e = e.get('value')
+ if type(e) == str and len(e) == 0:
+ e = None
+ # TODO: this is probably bogus; patched in desperation; remove?
+ if e:
+ try:
+ e.encode()
+ except UnicodeEncodeError:
+ # Invalid JSON?
+ print("BAD UNICODE")
+ return None
+ return e
+
+class FatcatOrcidImporter(FatcatImporter):
+
+ def parse_orcid_dict(self, obj):
+ """
+ obj is a python dict (parsed from json).
+ returns a CreatorEntity
+ """
+ name = obj['person']['name']
+ if name is None:
+ return None
+ extra = None
+ given = value_or_none(name.get('given-names'))
+ sur = value_or_none(name.get('family-name'))
+ display = value_or_none(name.get('credit-name'))
+ if display is None:
+ # TODO: sorry human beings
+ if given and sur:
+ display = "{} {}".format(given, sur)
+ elif sur:
+ display = sur
+ elif given:
+ display = given
+ else:
+ # must have *some* name
+ return None
+ orcid = obj['orcid-identifier']['path']
+ if not self.is_orcid(orcid):
+ sys.stderr.write("Bad ORCID: {}\n".format(orcid))
+ return None
+ ce = fatcat_client.CreatorEntity(
+ orcid=orcid,
+ given_name=given,
+ surname=sur,
+ display_name=display,
+ extra=extra)
+ return ce
+
+ def create_row(self, row, editgroup=None):
+ obj = json.loads(row)
+ ce = self.parse_orcid_dict(obj)
+ if ce is not None:
+ self.api.create_creator(ce, editgroup=editgroup)
+ self.insert_count = self.insert_count + 1
+
+ def create_batch(self, batch, editgroup=None):
+ """Reads and processes in batches (not API-call-per-line)"""
+ objects = [self.parse_orcid_dict(json.loads(l))
+ for l in batch if l != None]
+ objects = [o for o in objects if o != None]
+ self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup)
+ self.insert_count = self.insert_count + len(objects)