diff options
| -rw-r--r-- | python/fatcat/orcid_importer.py | 21 | 
1 files changed, 17 insertions, 4 deletions
| diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py index 02681b0a..ba8d0bd7 100644 --- a/python/fatcat/orcid_importer.py +++ b/python/fatcat/orcid_importer.py @@ -47,15 +47,24 @@ class FatcatOrcidImporter:              extra=extra)          return ce -    def process_line(self, line): +    def process_line(self, line, editgroup_id=None): +        """Doesn't accept the editgroup"""          obj = json.loads(line)          ce = self.parse_orcid_dict(obj)          if ce is not None: +            ce.editgroup_id = editgroup_id              self.api.create_creator(ce) -    def process_source(self, source): -        for line in source: -            self.process_line(line) +    def process_source(self, source, group_size=100): +        """Creates and auto-accepts editgropu every group_size lines""" +        eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) +        for i, line in enumerate(source): +            self.process_line(line, editgroup_id=eg.id) +            if i > 0 and (i % group_size) == 0: +                self.api.accept_editgroup(eg) +                eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) +        if i == 0 or (i % group_size) != 0: +            self.api.accept_editgroup(eg.id)      def process_batch(self, source, size=50):          """Reads and processes in batches (not API-call-per-line)""" @@ -63,5 +72,9 @@ class FatcatOrcidImporter:              objects = [self.parse_orcid_dict(json.loads(l))                         for l in lines if l != None]              objects = [o for o in objects if o != None] +            eg = self.api.create_editgroup(fatcat_client.Editgroup(editor_id=1)) +            for o in objects: +                o.editgroup_id = eg.id              self.api.create_creator_batch(objects) +            self.api.accept_editgroup(eg.id)              print("inserted {}".format(len(objects))) | 
