From e8a2925394f4cce0b8b4514f58d2bd19f9d7490b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 13 Nov 2018 12:43:12 -0800 Subject: use Counter object instead of per-metric ints --- python/TODO | 1 - python/fatcat_tools/importers/common.py | 12 ++++++------ python/fatcat_tools/importers/crossref.py | 4 ++-- python/fatcat_tools/importers/grobid_metadata.py | 2 +- python/fatcat_tools/importers/issn.py | 4 ++-- python/fatcat_tools/importers/matched.py | 8 ++++---- python/fatcat_tools/importers/orcid.py | 4 ++-- 7 files changed, 17 insertions(+), 18 deletions(-) (limited to 'python') diff --git a/python/TODO b/python/TODO index 46fceb69..8d9cffd3 100644 --- a/python/TODO +++ b/python/TODO @@ -3,7 +3,6 @@ Idea for further module simplification: move codegen'd library into it's own directory (with it's own README, tests, etc), and reference it here via symlink. -- use dict counter type (in python collections) instead of currently janky counters - schema.org metadata for releases additional tests diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 8dfee875..d289171d 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -4,6 +4,8 @@ import sys import csv import json import itertools +from collections import Counter + import fatcat_client from fatcat_client.rest import ApiException @@ -26,13 +28,11 @@ class FatcatImporter: self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$") if issn_map_file: self.read_issn_map_file(issn_map_file) - self.processed_lines = 0 - self.insert_count = 0 - self.update_count = 0 + self.counts = Counter({'insert': 0, 'update': 0, 'processed_lines': 0}) def describe_run(self): print("Processed {} lines, inserted {}, updated {}.".format( - self.processed_lines, self.insert_count, self.update_count)) + self.counts['processed_lines'], self.counts['insert'], self.counts['update'])) def process_source(self, source, group_size=100): """Creates and auto-accepts editgroup every group_size rows""" @@ -44,14 +44,14 @@ class FatcatImporter: self.api.accept_editgroup(eg.id) eg = self.api.create_editgroup( fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) - self.processed_lines = self.processed_lines + 1 + self.counts['processed_lines'] += 1 if i == 0 or (i % group_size) != 0: self.api.accept_editgroup(eg.id) def process_batch(self, source, size=50): """Reads and processes in batches (not API-call-per-)""" for rows in grouper(source, size): - self.processed_lines = self.processed_lines + len(rows) + self.counts['processed_lines'] += len(rows) eg = self.api.create_editgroup( fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) self.create_batch(rows, editgroup=eg.id) diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index dddb58d1..01143551 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -247,7 +247,7 @@ class FatcatCrossrefImporter(FatcatImporter): re.container_id = container.ident self._issnl_id_map[ce.issnl] = container.ident self.api.create_release(re, editgroup=editgroup) - self.insert_count = self.insert_count + 1 + self.counts['insert'] += 1 def create_batch(self, batch, editgroup=None): """Current work/release pairing disallows batch creation of releases. @@ -269,4 +269,4 @@ class FatcatCrossrefImporter(FatcatImporter): self._issnl_id_map[ce.issnl] = container.ident release_batch.append(re) self.api.create_release_batch(release_batch, autoaccept="true", editgroup=editgroup) - self.insert_count = self.insert_count + len(release_batch) + self.counts['insert'] += len(release_batch) diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 56b2ee02..6d635479 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -163,6 +163,6 @@ class FatcatGrobidMetadataImporter(FatcatImporter): # created it fe.releases.append(release_entity.ident) file_entity = self.api.create_file(fe, editgroup=editgroup) - self.insert_count = self.insert_count + 1 + self.counts['insert'] += 1 # NB: batch mode not implemented diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/issn.py index d7fb9082..ba8492c6 100644 --- a/python/fatcat_tools/importers/issn.py +++ b/python/fatcat_tools/importers/issn.py @@ -61,7 +61,7 @@ class FatcatIssnImporter(FatcatImporter): ce = self.parse_issn_row(row) if ce is not None: self.api.create_container(ce, editgroup=editgroup) - self.insert_count = self.insert_count + 1 + self.counts['insert'] += 1 def create_batch(self, batch, editgroup=None): """Reads and processes in batches (not API-call-per-line)""" @@ -69,4 +69,4 @@ class FatcatIssnImporter(FatcatImporter): for l in batch if l != None] objects = [o for o in objects if o != None] self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup) - self.insert_count = self.insert_count + len(objects) + self.counts['insert'] += len(objects) diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 6270fe88..774019c7 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -125,10 +125,10 @@ class FatcatMatchedImporter(FatcatImporter): if fe is not None: if fe.ident is None: self.api.create_file(fe, editgroup=editgroup) - self.insert_count = self.insert_count + 1 + self.counts['insert'] += 1 else: self.api.update_file(fe.ident, fe, editgroup=editgroup) - self.update_count = self.update_count + 1 + self.counts['update'] += 1 def create_batch(self, batch, editgroup=None): """Reads and processes in batches (not API-call-per-line)""" @@ -140,5 +140,5 @@ class FatcatMatchedImporter(FatcatImporter): self.api.update_file(obj.ident, obj, editgroup=editgroup) if len(new_objects) > 0: self.api.create_file_batch(new_objects, autoaccept="true", editgroup=editgroup) - self.update_count = self.update_count + len(update_objects) - self.insert_count = self.insert_count + len(new_objects) + self.counts['update'] += len(update_objects) + self.counts['insert'] += len(new_objects) diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index 350c4c57..527316dd 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -62,7 +62,7 @@ class FatcatOrcidImporter(FatcatImporter): ce = self.parse_orcid_dict(obj) if ce is not None: self.api.create_creator(ce, editgroup=editgroup) - self.insert_count = self.insert_count + 1 + self.counts['insert'] += 1 def create_batch(self, batch, editgroup=None): """Reads and processes in batches (not API-call-per-line)""" @@ -70,4 +70,4 @@ class FatcatOrcidImporter(FatcatImporter): for l in batch if l != None] objects = [o for o in objects if o != None] self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup) - self.insert_count = self.insert_count + len(objects) + self.counts['insert'] += len(objects) -- cgit v1.2.3