diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 12 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 14 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 10 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/issn.py | 8 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 20 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/orcid.py | 8 | 
6 files changed, 36 insertions, 36 deletions
| diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 65976a21..e31cabf8 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -54,11 +54,11 @@ class FatcatImporter:          print("Processed {} lines, inserted {}, updated {}.".format(              self.counts['processed_lines'], self.counts['insert'], self.counts['update'])) -    def create_row(self, row, editgroup=None): +    def create_row(self, row, editgroup_id=None):          # sub-classes expected to implement this          raise NotImplementedError -    def create_batch(self, rows, editgroup=None): +    def create_batch(self, rows, editgroup_id=None):          # sub-classes expected to implement this          raise NotImplementedError @@ -68,14 +68,14 @@ class FatcatImporter:              fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))          i = 0          for i, row in enumerate(source): -            self.create_row(row, editgroup=eg.id) +            self.create_row(row, editgroup_id=eg.editgroup_id)              if i > 0 and (i % group_size) == 0: -                self.api.accept_editgroup(eg.id) +                self.api.accept_editgroup(eg.editgroup_id)                  eg = self.api.create_editgroup(                      fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))              self.counts['processed_lines'] += 1          if i == 0 or (i % group_size) != 0: -            self.api.accept_editgroup(eg.id) +            self.api.accept_editgroup(eg.editgroup_id)      def process_batch(self, source, size=50, decode_kafka=False):          """Reads and processes in batches (not API-call-per-)""" @@ -85,7 +85,7 @@ class FatcatImporter:              self.counts['processed_lines'] += len(rows)              eg = self.api.create_editgroup(                  fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) -            self.create_batch(rows, editgroup=eg.id) +            self.create_batch(rows, editgroup_id=eg.editgroup_id)      def process_csv_source(self, source, group_size=100, delimiter=','):          reader = csv.DictReader(source, delimiter=delimiter) diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 13179207..d4d0de68 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -287,7 +287,7 @@ class CrossrefImporter(FatcatImporter):              extra=dict(crossref=extra))          return (re, ce) -    def create_row(self, row, editgroup=None): +    def create_row(self, row, editgroup_id=None):          if row is None:              return          obj = json.loads(row) @@ -295,13 +295,13 @@ class CrossrefImporter(FatcatImporter):          if entities is not None:              (re, ce) = entities              if ce is not None: -                container = self.api.create_container(ce, editgroup=editgroup) +                container = self.api.create_container(ce, editgroup_id=editgroup_id)                  re.container_id = container.ident                  self._issnl_id_map[ce.issnl] = container.ident -            self.api.create_release(re, editgroup=editgroup) +            self.api.create_release(re, editgroup_id=editgroup_id)              self.counts['insert'] += 1 -    def create_batch(self, batch, editgroup=None): +    def create_batch(self, batch, editgroup_id=None):          """Current work/release pairing disallows batch creation of releases.          Could do batch work creation and then match against releases, but meh."""          release_batch = [] @@ -315,10 +315,10 @@ class CrossrefImporter(FatcatImporter):                  if ce is not None:                      ce_eg = self.api.create_editgroup(                          fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) -                    container = self.api.create_container(ce, editgroup=ce_eg.id) -                    self.api.accept_editgroup(ce_eg.id) +                    container = self.api.create_container(ce, editgroup_id=ce_eg.editgroup_id) +                    self.api.accept_editgroup(ce_eg.editgroup_id)                      re.container_id = container.ident                      self._issnl_id_map[ce.issnl] = container.ident                  release_batch.append(re) -        self.api.create_release_batch(release_batch, autoaccept="true", editgroup=editgroup) +        self.api.create_release_batch(release_batch, autoaccept="true", editgroup_id=editgroup_id)          self.counts['insert'] += len(release_batch) diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 47a753a6..2cb97b01 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -130,7 +130,7 @@ class GrobidMetadataImporter(FatcatImporter):              sha1=sha1,              size=int(file_size),              mimetype=mimetype, -            releases=[], +            release_ids=[],              urls=[],          ) @@ -147,7 +147,7 @@ class GrobidMetadataImporter(FatcatImporter):          return fe -    def create_row(self, row, editgroup=None): +    def create_row(self, row, editgroup_id=None):          if not row:              return          fields = row.split('\t') @@ -159,11 +159,11 @@ class GrobidMetadataImporter(FatcatImporter):          fe = self.parse_file_metadata(sha1_key, cdx, mimetype, file_size)          re = self.parse_grobid_json(grobid_meta)          if fe and re: -            release_entity = self.api.create_release(re, editgroup=editgroup) +            release_entity = self.api.create_release(re, editgroup_id=editgroup_id)              # release ident can't already be in release list because we just              # created it -            fe.releases.append(release_entity.ident) -            file_entity = self.api.create_file(fe, editgroup=editgroup) +            fe.release_ids.append(release_entity.ident) +            file_entity = self.api.create_file(fe, editgroup_id=editgroup_id)              self.counts['insert'] += 1      # NB: batch mode not implemented diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/issn.py index f702dc60..9b9ca63f 100644 --- a/python/fatcat_tools/importers/issn.py +++ b/python/fatcat_tools/importers/issn.py @@ -64,16 +64,16 @@ class IssnImporter(FatcatImporter):              extra=extra)          return ce -    def create_row(self, row, editgroup=None): +    def create_row(self, row, editgroup_id=None):          ce = self.parse_issn_row(row)          if ce is not None: -            self.api.create_container(ce, editgroup=editgroup) +            self.api.create_container(ce, editgroup_id=editgroup_id)              self.counts['insert'] += 1 -    def create_batch(self, batch, editgroup=None): +    def create_batch(self, batch, editgroup_id=None):          """Reads and processes in batches (not API-call-per-line)"""          objects = [self.parse_issn_row(l)                     for l in batch if (l is not None)]          objects = [o for o in objects if (o is not None)] -        self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup) +        self.api.create_container_batch(objects, autoaccept="true", editgroup_id=editgroup_id)          self.counts['insert'] += len(objects) diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 6f83dd23..5dbda27c 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -70,7 +70,7 @@ class MatchedImporter(FatcatImporter):          if fe is None:              fe = fatcat_client.FileEntity(                  sha1=sha1, -                releases=[], +                release_ids=[],                  urls=[],              ) @@ -89,10 +89,10 @@ class MatchedImporter(FatcatImporter):                  re_list.add(re.ident)          if len(re_list) == 0:              return None -        if fe.releases == set(re_list): +        if fe.release_ids == set(re_list):              return None -        re_list.update(fe.releases) -        fe.releases = list(re_list) +        re_list.update(fe.release_ids) +        fe.release_ids = list(re_list)          # parse URLs and CDX          existing_urls = [feu.url for feu in fe.urls] @@ -125,26 +125,26 @@ class MatchedImporter(FatcatImporter):              fe.mimetype = obj.get('mimetype')          return fe -    def create_row(self, row, editgroup=None): +    def create_row(self, row, editgroup_id=None):          obj = json.loads(row)          fe = self.parse_matched_dict(obj)          if fe is not None:              if fe.ident is None: -                self.api.create_file(fe, editgroup=editgroup) +                self.api.create_file(fe, editgroup_id=editgroup_id)                  self.counts['insert'] += 1              else: -                self.api.update_file(fe.ident, fe, editgroup=editgroup) +                self.api.update_file(fe.ident, fe, editgroup_id=editgroup_id)                  self.counts['update'] += 1 -    def create_batch(self, batch, editgroup=None): +    def create_batch(self, batch, editgroup_id=None):          """Reads and processes in batches (not API-call-per-line)"""          objects = [self.parse_matched_dict(json.loads(l))                     for l in batch if l != None]          new_objects = [o for o in objects if o != None and o.ident == None]          update_objects = [o for o in objects if o != None and o.ident != None]          for obj in update_objects: -            self.api.update_file(obj.ident, obj, editgroup=editgroup) +            self.api.update_file(obj.ident, obj, editgroup_id=editgroup_id)          if len(new_objects) > 0: -            self.api.create_file_batch(new_objects, autoaccept="true", editgroup=editgroup) +            self.api.create_file_batch(new_objects, autoaccept="true", editgroup_id=editgroup_id)          self.counts['update'] += len(update_objects)          self.counts['insert'] += len(new_objects) diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index 9e4767f9..fc4562d0 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -57,17 +57,17 @@ class OrcidImporter(FatcatImporter):              extra=extra)          return ce -    def create_row(self, row, editgroup=None): +    def create_row(self, row, editgroup_id=None):          obj = json.loads(row)          ce = self.parse_orcid_dict(obj)          if ce is not None: -            self.api.create_creator(ce, editgroup=editgroup) +            self.api.create_creator(ce, editgroup_id=editgroup_id)              self.counts['insert'] += 1 -    def create_batch(self, batch, editgroup=None): +    def create_batch(self, batch, editgroup_id=None):          """Reads and processes in batches (not API-call-per-line)"""          objects = [self.parse_orcid_dict(json.loads(l))                     for l in batch if l != None]          objects = [o for o in objects if o != None] -        self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup) +        self.api.create_creator_batch(objects, autoaccept="true", editgroup_id=editgroup_id)          self.counts['insert'] += len(objects) | 
