diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-06-11 23:44:20 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-06-11 23:46:08 -0700 |
commit | a3c92de2067064d74a9997bf2a836642bd77bac3 (patch) | |
tree | 23a1405555573c7ed4f6f8bda4ab3a39cb0f3e20 /python | |
parent | 38b66b335b7e7f57e90553c62eb2a1de852a932a (diff) | |
download | fatcat-a3c92de2067064d74a9997bf2a836642bd77bac3.tar.gz fatcat-a3c92de2067064d74a9997bf2a836642bd77bac3.zip |
actually accept editgroups for ORCID import
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat/orcid_importer.py | 21 |
1 files changed, 17 insertions, 4 deletions
diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py index 02681b0a..ba8d0bd7 100644 --- a/python/fatcat/orcid_importer.py +++ b/python/fatcat/orcid_importer.py @@ -47,15 +47,24 @@ class FatcatOrcidImporter: extra=extra) return ce - def process_line(self, line): + def process_line(self, line, editgroup_id=None): + """Doesn't accept the editgroup""" obj = json.loads(line) ce = self.parse_orcid_dict(obj) if ce is not None: + ce.editgroup_id = editgroup_id self.api.create_creator(ce) - def process_source(self, source): - for line in source: - self.process_line(line) + def process_source(self, source, group_size=100): + """Creates and auto-accepts editgropu every group_size lines""" + eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) + for i, line in enumerate(source): + self.process_line(line, editgroup_id=eg.id) + if i > 0 and (i % group_size) == 0: + self.api.accept_editgroup(eg) + eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) + if i == 0 or (i % group_size) != 0: + self.api.accept_editgroup(eg.id) def process_batch(self, source, size=50): """Reads and processes in batches (not API-call-per-line)""" @@ -63,5 +72,9 @@ class FatcatOrcidImporter: objects = [self.parse_orcid_dict(json.loads(l)) for l in lines if l != None] objects = [o for o in objects if o != None] + eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) + for o in objects: + o.editgroup_id = eg.id self.api.create_creator_batch(objects) + self.api.accept_editgroup(eg.id) print("inserted {}".format(len(objects))) |