From 457c1612e29730e5b9ee2820c3ea813a1d2dd405 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 24 Dec 2018 16:04:53 -0800 Subject: python impl of API ident harmonization --- python/fatcat_tools/importers/common.py | 12 ++++++------ python/fatcat_tools/importers/crossref.py | 14 +++++++------- python/fatcat_tools/importers/grobid_metadata.py | 10 +++++----- python/fatcat_tools/importers/issn.py | 8 ++++---- python/fatcat_tools/importers/matched.py | 20 ++++++++++---------- python/fatcat_tools/importers/orcid.py | 8 ++++---- 6 files changed, 36 insertions(+), 36 deletions(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 65976a21..e31cabf8 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -54,11 +54,11 @@ class FatcatImporter: print("Processed {} lines, inserted {}, updated {}.".format( self.counts['processed_lines'], self.counts['insert'], self.counts['update'])) - def create_row(self, row, editgroup=None): + def create_row(self, row, editgroup_id=None): # sub-classes expected to implement this raise NotImplementedError - def create_batch(self, rows, editgroup=None): + def create_batch(self, rows, editgroup_id=None): # sub-classes expected to implement this raise NotImplementedError @@ -68,14 +68,14 @@ class FatcatImporter: fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) i = 0 for i, row in enumerate(source): - self.create_row(row, editgroup=eg.id) + self.create_row(row, editgroup_id=eg.editgroup_id) if i > 0 and (i % group_size) == 0: - self.api.accept_editgroup(eg.id) + self.api.accept_editgroup(eg.editgroup_id) eg = self.api.create_editgroup( fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) self.counts['processed_lines'] += 1 if i == 0 or (i % group_size) != 0: - self.api.accept_editgroup(eg.id) + self.api.accept_editgroup(eg.editgroup_id) def process_batch(self, source, size=50, decode_kafka=False): """Reads and processes in batches (not API-call-per-)""" @@ -85,7 +85,7 @@ class FatcatImporter: self.counts['processed_lines'] += len(rows) eg = self.api.create_editgroup( fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) - self.create_batch(rows, editgroup=eg.id) + self.create_batch(rows, editgroup_id=eg.editgroup_id) def process_csv_source(self, source, group_size=100, delimiter=','): reader = csv.DictReader(source, delimiter=delimiter) diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 13179207..d4d0de68 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -287,7 +287,7 @@ class CrossrefImporter(FatcatImporter): extra=dict(crossref=extra)) return (re, ce) - def create_row(self, row, editgroup=None): + def create_row(self, row, editgroup_id=None): if row is None: return obj = json.loads(row) @@ -295,13 +295,13 @@ class CrossrefImporter(FatcatImporter): if entities is not None: (re, ce) = entities if ce is not None: - container = self.api.create_container(ce, editgroup=editgroup) + container = self.api.create_container(ce, editgroup_id=editgroup_id) re.container_id = container.ident self._issnl_id_map[ce.issnl] = container.ident - self.api.create_release(re, editgroup=editgroup) + self.api.create_release(re, editgroup_id=editgroup_id) self.counts['insert'] += 1 - def create_batch(self, batch, editgroup=None): + def create_batch(self, batch, editgroup_id=None): """Current work/release pairing disallows batch creation of releases. Could do batch work creation and then match against releases, but meh.""" release_batch = [] @@ -315,10 +315,10 @@ class CrossrefImporter(FatcatImporter): if ce is not None: ce_eg = self.api.create_editgroup( fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) - container = self.api.create_container(ce, editgroup=ce_eg.id) - self.api.accept_editgroup(ce_eg.id) + container = self.api.create_container(ce, editgroup_id=ce_eg.editgroup_id) + self.api.accept_editgroup(ce_eg.editgroup_id) re.container_id = container.ident self._issnl_id_map[ce.issnl] = container.ident release_batch.append(re) - self.api.create_release_batch(release_batch, autoaccept="true", editgroup=editgroup) + self.api.create_release_batch(release_batch, autoaccept="true", editgroup_id=editgroup_id) self.counts['insert'] += len(release_batch) diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 47a753a6..2cb97b01 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -130,7 +130,7 @@ class GrobidMetadataImporter(FatcatImporter): sha1=sha1, size=int(file_size), mimetype=mimetype, - releases=[], + release_ids=[], urls=[], ) @@ -147,7 +147,7 @@ class GrobidMetadataImporter(FatcatImporter): return fe - def create_row(self, row, editgroup=None): + def create_row(self, row, editgroup_id=None): if not row: return fields = row.split('\t') @@ -159,11 +159,11 @@ class GrobidMetadataImporter(FatcatImporter): fe = self.parse_file_metadata(sha1_key, cdx, mimetype, file_size) re = self.parse_grobid_json(grobid_meta) if fe and re: - release_entity = self.api.create_release(re, editgroup=editgroup) + release_entity = self.api.create_release(re, editgroup_id=editgroup_id) # release ident can't already be in release list because we just # created it - fe.releases.append(release_entity.ident) - file_entity = self.api.create_file(fe, editgroup=editgroup) + fe.release_ids.append(release_entity.ident) + file_entity = self.api.create_file(fe, editgroup_id=editgroup_id) self.counts['insert'] += 1 # NB: batch mode not implemented diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/issn.py index f702dc60..9b9ca63f 100644 --- a/python/fatcat_tools/importers/issn.py +++ b/python/fatcat_tools/importers/issn.py @@ -64,16 +64,16 @@ class IssnImporter(FatcatImporter): extra=extra) return ce - def create_row(self, row, editgroup=None): + def create_row(self, row, editgroup_id=None): ce = self.parse_issn_row(row) if ce is not None: - self.api.create_container(ce, editgroup=editgroup) + self.api.create_container(ce, editgroup_id=editgroup_id) self.counts['insert'] += 1 - def create_batch(self, batch, editgroup=None): + def create_batch(self, batch, editgroup_id=None): """Reads and processes in batches (not API-call-per-line)""" objects = [self.parse_issn_row(l) for l in batch if (l is not None)] objects = [o for o in objects if (o is not None)] - self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup) + self.api.create_container_batch(objects, autoaccept="true", editgroup_id=editgroup_id) self.counts['insert'] += len(objects) diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 6f83dd23..5dbda27c 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -70,7 +70,7 @@ class MatchedImporter(FatcatImporter): if fe is None: fe = fatcat_client.FileEntity( sha1=sha1, - releases=[], + release_ids=[], urls=[], ) @@ -89,10 +89,10 @@ class MatchedImporter(FatcatImporter): re_list.add(re.ident) if len(re_list) == 0: return None - if fe.releases == set(re_list): + if fe.release_ids == set(re_list): return None - re_list.update(fe.releases) - fe.releases = list(re_list) + re_list.update(fe.release_ids) + fe.release_ids = list(re_list) # parse URLs and CDX existing_urls = [feu.url for feu in fe.urls] @@ -125,26 +125,26 @@ class MatchedImporter(FatcatImporter): fe.mimetype = obj.get('mimetype') return fe - def create_row(self, row, editgroup=None): + def create_row(self, row, editgroup_id=None): obj = json.loads(row) fe = self.parse_matched_dict(obj) if fe is not None: if fe.ident is None: - self.api.create_file(fe, editgroup=editgroup) + self.api.create_file(fe, editgroup_id=editgroup_id) self.counts['insert'] += 1 else: - self.api.update_file(fe.ident, fe, editgroup=editgroup) + self.api.update_file(fe.ident, fe, editgroup_id=editgroup_id) self.counts['update'] += 1 - def create_batch(self, batch, editgroup=None): + def create_batch(self, batch, editgroup_id=None): """Reads and processes in batches (not API-call-per-line)""" objects = [self.parse_matched_dict(json.loads(l)) for l in batch if l != None] new_objects = [o for o in objects if o != None and o.ident == None] update_objects = [o for o in objects if o != None and o.ident != None] for obj in update_objects: - self.api.update_file(obj.ident, obj, editgroup=editgroup) + self.api.update_file(obj.ident, obj, editgroup_id=editgroup_id) if len(new_objects) > 0: - self.api.create_file_batch(new_objects, autoaccept="true", editgroup=editgroup) + self.api.create_file_batch(new_objects, autoaccept="true", editgroup_id=editgroup_id) self.counts['update'] += len(update_objects) self.counts['insert'] += len(new_objects) diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index 9e4767f9..fc4562d0 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -57,17 +57,17 @@ class OrcidImporter(FatcatImporter): extra=extra) return ce - def create_row(self, row, editgroup=None): + def create_row(self, row, editgroup_id=None): obj = json.loads(row) ce = self.parse_orcid_dict(obj) if ce is not None: - self.api.create_creator(ce, editgroup=editgroup) + self.api.create_creator(ce, editgroup_id=editgroup_id) self.counts['insert'] += 1 - def create_batch(self, batch, editgroup=None): + def create_batch(self, batch, editgroup_id=None): """Reads and processes in batches (not API-call-per-line)""" objects = [self.parse_orcid_dict(json.loads(l)) for l in batch if l != None] objects = [o for o in objects if o != None] - self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup) + self.api.create_creator_batch(objects, autoaccept="true", editgroup_id=editgroup_id) self.counts['insert'] += len(objects) -- cgit v1.2.3