diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-13 12:43:12 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-13 12:43:12 -0800 | 
| commit | e8a2925394f4cce0b8b4514f58d2bd19f9d7490b (patch) | |
| tree | c060f01ac5e3e63d08a28cf38d0ade55267fc893 /python | |
| parent | 572fdc7caf74d9539e642e97855d8c8ba94ff93a (diff) | |
| download | fatcat-e8a2925394f4cce0b8b4514f58d2bd19f9d7490b.tar.gz fatcat-e8a2925394f4cce0b8b4514f58d2bd19f9d7490b.zip | |
use Counter object instead of per-metric ints
Diffstat (limited to 'python')
| -rw-r--r-- | python/TODO | 1 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 12 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/issn.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 8 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/orcid.py | 4 | 
7 files changed, 17 insertions, 18 deletions
| diff --git a/python/TODO b/python/TODO index 46fceb69..8d9cffd3 100644 --- a/python/TODO +++ b/python/TODO @@ -3,7 +3,6 @@ Idea for further module simplification: move codegen'd library into it's own  directory (with it's own README, tests, etc), and reference it here via  symlink. -- use dict counter type (in python collections) instead of currently janky counters  - schema.org metadata for releases  additional tests diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 8dfee875..d289171d 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -4,6 +4,8 @@ import sys  import csv  import json  import itertools +from collections import Counter +  import fatcat_client  from fatcat_client.rest import ApiException @@ -26,13 +28,11 @@ class FatcatImporter:          self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$")          if issn_map_file:              self.read_issn_map_file(issn_map_file) -        self.processed_lines = 0 -        self.insert_count = 0 -        self.update_count = 0 +        self.counts = Counter({'insert': 0, 'update': 0, 'processed_lines': 0})      def describe_run(self):          print("Processed {} lines, inserted {}, updated {}.".format( -            self.processed_lines, self.insert_count, self.update_count)) +            self.counts['processed_lines'], self.counts['insert'], self.counts['update']))      def process_source(self, source, group_size=100):          """Creates and auto-accepts editgroup every group_size rows""" @@ -44,14 +44,14 @@ class FatcatImporter:                  self.api.accept_editgroup(eg.id)                  eg = self.api.create_editgroup(                      fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) -            self.processed_lines = self.processed_lines + 1 +            self.counts['processed_lines'] += 1          if i == 0 or (i % group_size) != 0:              self.api.accept_editgroup(eg.id)      def process_batch(self, source, size=50):          """Reads and processes in batches (not API-call-per-)"""          for rows in grouper(source, size): -            self.processed_lines = self.processed_lines + len(rows) +            self.counts['processed_lines'] += len(rows)              eg = self.api.create_editgroup(                  fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))              self.create_batch(rows, editgroup=eg.id) diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index dddb58d1..01143551 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -247,7 +247,7 @@ class FatcatCrossrefImporter(FatcatImporter):                  re.container_id = container.ident                  self._issnl_id_map[ce.issnl] = container.ident              self.api.create_release(re, editgroup=editgroup) -            self.insert_count = self.insert_count + 1 +            self.counts['insert'] += 1      def create_batch(self, batch, editgroup=None):          """Current work/release pairing disallows batch creation of releases. @@ -269,4 +269,4 @@ class FatcatCrossrefImporter(FatcatImporter):                      self._issnl_id_map[ce.issnl] = container.ident                  release_batch.append(re)          self.api.create_release_batch(release_batch, autoaccept="true", editgroup=editgroup) -        self.insert_count = self.insert_count + len(release_batch) +        self.counts['insert'] += len(release_batch) diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 56b2ee02..6d635479 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -163,6 +163,6 @@ class FatcatGrobidMetadataImporter(FatcatImporter):              # created it              fe.releases.append(release_entity.ident)              file_entity = self.api.create_file(fe, editgroup=editgroup) -            self.insert_count = self.insert_count + 1 +            self.counts['insert'] += 1      # NB: batch mode not implemented diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/issn.py index d7fb9082..ba8492c6 100644 --- a/python/fatcat_tools/importers/issn.py +++ b/python/fatcat_tools/importers/issn.py @@ -61,7 +61,7 @@ class FatcatIssnImporter(FatcatImporter):          ce = self.parse_issn_row(row)          if ce is not None:              self.api.create_container(ce, editgroup=editgroup) -            self.insert_count = self.insert_count + 1 +            self.counts['insert'] += 1      def create_batch(self, batch, editgroup=None):          """Reads and processes in batches (not API-call-per-line)""" @@ -69,4 +69,4 @@ class FatcatIssnImporter(FatcatImporter):                     for l in batch if l != None]          objects = [o for o in objects if o != None]          self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup) -        self.insert_count = self.insert_count + len(objects) +        self.counts['insert'] += len(objects) diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 6270fe88..774019c7 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -125,10 +125,10 @@ class FatcatMatchedImporter(FatcatImporter):          if fe is not None:              if fe.ident is None:                  self.api.create_file(fe, editgroup=editgroup) -                self.insert_count = self.insert_count + 1 +                self.counts['insert'] += 1              else:                  self.api.update_file(fe.ident, fe, editgroup=editgroup) -                self.update_count = self.update_count + 1 +                self.counts['update'] += 1      def create_batch(self, batch, editgroup=None):          """Reads and processes in batches (not API-call-per-line)""" @@ -140,5 +140,5 @@ class FatcatMatchedImporter(FatcatImporter):              self.api.update_file(obj.ident, obj, editgroup=editgroup)          if len(new_objects) > 0:              self.api.create_file_batch(new_objects, autoaccept="true", editgroup=editgroup) -        self.update_count = self.update_count + len(update_objects) -        self.insert_count = self.insert_count + len(new_objects) +        self.counts['update'] += len(update_objects) +        self.counts['insert'] += len(new_objects) diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index 350c4c57..527316dd 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -62,7 +62,7 @@ class FatcatOrcidImporter(FatcatImporter):          ce = self.parse_orcid_dict(obj)          if ce is not None:              self.api.create_creator(ce, editgroup=editgroup) -            self.insert_count = self.insert_count + 1 +            self.counts['insert'] += 1      def create_batch(self, batch, editgroup=None):          """Reads and processes in batches (not API-call-per-line)""" @@ -70,4 +70,4 @@ class FatcatOrcidImporter(FatcatImporter):                     for l in batch if l != None]          objects = [o for o in objects if o != None]          self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup) -        self.insert_count = self.insert_count + len(objects) +        self.counts['insert'] += len(objects) | 
