summaryrefslogtreecommitdiffstats
path: root/python/fatcat/orcid_importer.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat/orcid_importer.py')
-rw-r--r--python/fatcat/orcid_importer.py63
1 files changed, 63 insertions, 0 deletions
diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py
new file mode 100644
index 00000000..063390b8
--- /dev/null
+++ b/python/fatcat/orcid_importer.py
@@ -0,0 +1,63 @@
+
+import sys
+import json
+import itertools
+import fatcat_client
+
+def value_or_none(e):
+ if type(e) == dict:
+ e = e.get('value')
+ if type(e) == str and len(e) == 0:
+ e = None
+ return e
+
+# from: https://docs.python.org/3/library/itertools.html
+def grouper(iterable, n, fillvalue=None):
+ "Collect data into fixed-length chunks or blocks"
+ args = [iter(iterable)] * n
+ return itertools.zip_longest(*args, fillvalue=fillvalue)
+
+class FatcatOrcidImporter:
+
+ def __init__(self, host_url):
+ conf = fatcat_client.Configuration()
+ conf.host = host_url
+ self.api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf))
+
+ def parse_orcid_dict(self, obj):
+ """
+ obj is a python dict (parsed from json).
+ returns a CreatorEntity
+ """
+ name = obj['person']['name']
+ extra = None
+ given = value_or_none(name.get('given-name'))
+ sur = value_or_none(name.get('family-name'))
+ display = value_or_none(name.get('credit-name'))
+ if display is None:
+ # TODO: sorry human beings
+ display = "{} {}".format(given, sur)
+ ce = fatcat_client.CreatorEntity(
+ orcid=obj['orcid-identifier']['path'],
+ given_name=given,
+ surname=sur,
+ display_name=display,
+ extra=extra)
+ return ce
+
+ def process_line(self, line):
+ obj = json.loads(line)
+ ce = self.parse_orcid_dict(obj)
+ self.api.create_creator(ce)
+
+ def process_source(self, source):
+ for line in source:
+ self.process_line(line)
+
+ def process_batch(self, source, size=50):
+ """Reads and processes in batches (not API-call-per-line)"""
+ for lines in grouper(source, size):
+ objects = [self.parse_orcid_dict(json.loads(l))
+ for l in lines if l != None]
+ self.api.create_creator_batch(objects)
+ break