summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/fatcat/orcid_importer.py21
1 files changed, 17 insertions, 4 deletions
diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py
index 02681b0a..ba8d0bd7 100644
--- a/python/fatcat/orcid_importer.py
+++ b/python/fatcat/orcid_importer.py
@@ -47,15 +47,24 @@ class FatcatOrcidImporter:
extra=extra)
return ce
- def process_line(self, line):
+ def process_line(self, line, editgroup_id=None):
+ """Doesn't accept the editgroup"""
obj = json.loads(line)
ce = self.parse_orcid_dict(obj)
if ce is not None:
+ ce.editgroup_id = editgroup_id
self.api.create_creator(ce)
- def process_source(self, source):
- for line in source:
- self.process_line(line)
+ def process_source(self, source, group_size=100):
+ """Creates and auto-accepts editgropu every group_size lines"""
+ eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1))
+ for i, line in enumerate(source):
+ self.process_line(line, editgroup_id=eg.id)
+ if i > 0 and (i % group_size) == 0:
+ self.api.accept_editgroup(eg)
+ eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1))
+ if i == 0 or (i % group_size) != 0:
+ self.api.accept_editgroup(eg.id)
def process_batch(self, source, size=50):
"""Reads and processes in batches (not API-call-per-line)"""
@@ -63,5 +72,9 @@ class FatcatOrcidImporter:
objects = [self.parse_orcid_dict(json.loads(l))
for l in lines if l != None]
objects = [o for o in objects if o != None]
+ eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1))
+ for o in objects:
+ o.editgroup_id = eg.id
self.api.create_creator_batch(objects)
+ self.api.accept_editgroup(eg.id)
print("inserted {}".format(len(objects)))