diff options
Diffstat (limited to 'python/fatcat/orcid_importer.py')
-rw-r--r-- | python/fatcat/orcid_importer.py | 49 |
1 files changed, 12 insertions, 37 deletions
diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py index ba8d0bd7..fb4716df 100644 --- a/python/fatcat/orcid_importer.py +++ b/python/fatcat/orcid_importer.py @@ -3,6 +3,8 @@ import sys import json import itertools import fatcat_client +from fatcat.importer_common import FatcatImporter + def value_or_none(e): if type(e) == dict: @@ -11,18 +13,7 @@ def value_or_none(e): e = None return e -# from: https://docs.python.org/3/library/itertools.html -def grouper(iterable, n, fillvalue=None): - "Collect data into fixed-length chunks or blocks" - args = [iter(iterable)] * n - return itertools.zip_longest(*args, fillvalue=fillvalue) - -class FatcatOrcidImporter: - - def __init__(self, host_url): - conf = fatcat_client.Configuration() - conf.host = host_url - self.api = fatcat_client.DefaultApi(fatcat_client.ApiClient(conf)) +class FatcatOrcidImporter(FatcatImporter): def parse_orcid_dict(self, obj): """ @@ -47,34 +38,18 @@ class FatcatOrcidImporter: extra=extra) return ce - def process_line(self, line, editgroup_id=None): - """Doesn't accept the editgroup""" - obj = json.loads(line) + def create_row(self, row, editgroup_id=None): + obj = json.loads(row) ce = self.parse_orcid_dict(obj) if ce is not None: ce.editgroup_id = editgroup_id self.api.create_creator(ce) - def process_source(self, source, group_size=100): - """Creates and auto-accepts editgropu every group_size lines""" - eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) - for i, line in enumerate(source): - self.process_line(line, editgroup_id=eg.id) - if i > 0 and (i % group_size) == 0: - self.api.accept_editgroup(eg) - eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) - if i == 0 or (i % group_size) != 0: - self.api.accept_editgroup(eg.id) - - def process_batch(self, source, size=50): + def create_batch(self, batch, editgroup_id=None): """Reads and processes in batches (not API-call-per-line)""" - for lines in grouper(source, size): - objects = [self.parse_orcid_dict(json.loads(l)) - for l in lines if l != None] - objects = [o for o in objects if o != None] - eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) - for o in objects: - o.editgroup_id = eg.id - self.api.create_creator_batch(objects) - self.api.accept_editgroup(eg.id) - print("inserted {}".format(len(objects))) + objects = [self.parse_orcid_dict(json.loads(l)) + for l in batch if l != None] + objects = [o for o in objects if o != None] + for o in objects: + o.editgroup_id = editgroup_id + self.api.create_creator_batch(objects) |