summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-13 12:43:12 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-13 12:43:12 -0800
commite8a2925394f4cce0b8b4514f58d2bd19f9d7490b (patch)
treec060f01ac5e3e63d08a28cf38d0ade55267fc893 /python
parent572fdc7caf74d9539e642e97855d8c8ba94ff93a (diff)
downloadfatcat-e8a2925394f4cce0b8b4514f58d2bd19f9d7490b.tar.gz
fatcat-e8a2925394f4cce0b8b4514f58d2bd19f9d7490b.zip
use Counter object instead of per-metric ints
Diffstat (limited to 'python')
-rw-r--r--python/TODO1
-rw-r--r--python/fatcat_tools/importers/common.py12
-rw-r--r--python/fatcat_tools/importers/crossref.py4
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py2
-rw-r--r--python/fatcat_tools/importers/issn.py4
-rw-r--r--python/fatcat_tools/importers/matched.py8
-rw-r--r--python/fatcat_tools/importers/orcid.py4
7 files changed, 17 insertions, 18 deletions
diff --git a/python/TODO b/python/TODO
index 46fceb69..8d9cffd3 100644
--- a/python/TODO
+++ b/python/TODO
@@ -3,7 +3,6 @@ Idea for further module simplification: move codegen'd library into it's own
directory (with it's own README, tests, etc), and reference it here via
symlink.
-- use dict counter type (in python collections) instead of currently janky counters
- schema.org metadata for releases
additional tests
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 8dfee875..d289171d 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -4,6 +4,8 @@ import sys
import csv
import json
import itertools
+from collections import Counter
+
import fatcat_client
from fatcat_client.rest import ApiException
@@ -26,13 +28,11 @@ class FatcatImporter:
self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$")
if issn_map_file:
self.read_issn_map_file(issn_map_file)
- self.processed_lines = 0
- self.insert_count = 0
- self.update_count = 0
+ self.counts = Counter({'insert': 0, 'update': 0, 'processed_lines': 0})
def describe_run(self):
print("Processed {} lines, inserted {}, updated {}.".format(
- self.processed_lines, self.insert_count, self.update_count))
+ self.counts['processed_lines'], self.counts['insert'], self.counts['update']))
def process_source(self, source, group_size=100):
"""Creates and auto-accepts editgroup every group_size rows"""
@@ -44,14 +44,14 @@ class FatcatImporter:
self.api.accept_editgroup(eg.id)
eg = self.api.create_editgroup(
fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))
- self.processed_lines = self.processed_lines + 1
+ self.counts['processed_lines'] += 1
if i == 0 or (i % group_size) != 0:
self.api.accept_editgroup(eg.id)
def process_batch(self, source, size=50):
"""Reads and processes in batches (not API-call-per-)"""
for rows in grouper(source, size):
- self.processed_lines = self.processed_lines + len(rows)
+ self.counts['processed_lines'] += len(rows)
eg = self.api.create_editgroup(
fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae'))
self.create_batch(rows, editgroup=eg.id)
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index dddb58d1..01143551 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -247,7 +247,7 @@ class FatcatCrossrefImporter(FatcatImporter):
re.container_id = container.ident
self._issnl_id_map[ce.issnl] = container.ident
self.api.create_release(re, editgroup=editgroup)
- self.insert_count = self.insert_count + 1
+ self.counts['insert'] += 1
def create_batch(self, batch, editgroup=None):
"""Current work/release pairing disallows batch creation of releases.
@@ -269,4 +269,4 @@ class FatcatCrossrefImporter(FatcatImporter):
self._issnl_id_map[ce.issnl] = container.ident
release_batch.append(re)
self.api.create_release_batch(release_batch, autoaccept="true", editgroup=editgroup)
- self.insert_count = self.insert_count + len(release_batch)
+ self.counts['insert'] += len(release_batch)
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 56b2ee02..6d635479 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -163,6 +163,6 @@ class FatcatGrobidMetadataImporter(FatcatImporter):
# created it
fe.releases.append(release_entity.ident)
file_entity = self.api.create_file(fe, editgroup=editgroup)
- self.insert_count = self.insert_count + 1
+ self.counts['insert'] += 1
# NB: batch mode not implemented
diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/issn.py
index d7fb9082..ba8492c6 100644
--- a/python/fatcat_tools/importers/issn.py
+++ b/python/fatcat_tools/importers/issn.py
@@ -61,7 +61,7 @@ class FatcatIssnImporter(FatcatImporter):
ce = self.parse_issn_row(row)
if ce is not None:
self.api.create_container(ce, editgroup=editgroup)
- self.insert_count = self.insert_count + 1
+ self.counts['insert'] += 1
def create_batch(self, batch, editgroup=None):
"""Reads and processes in batches (not API-call-per-line)"""
@@ -69,4 +69,4 @@ class FatcatIssnImporter(FatcatImporter):
for l in batch if l != None]
objects = [o for o in objects if o != None]
self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup)
- self.insert_count = self.insert_count + len(objects)
+ self.counts['insert'] += len(objects)
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 6270fe88..774019c7 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -125,10 +125,10 @@ class FatcatMatchedImporter(FatcatImporter):
if fe is not None:
if fe.ident is None:
self.api.create_file(fe, editgroup=editgroup)
- self.insert_count = self.insert_count + 1
+ self.counts['insert'] += 1
else:
self.api.update_file(fe.ident, fe, editgroup=editgroup)
- self.update_count = self.update_count + 1
+ self.counts['update'] += 1
def create_batch(self, batch, editgroup=None):
"""Reads and processes in batches (not API-call-per-line)"""
@@ -140,5 +140,5 @@ class FatcatMatchedImporter(FatcatImporter):
self.api.update_file(obj.ident, obj, editgroup=editgroup)
if len(new_objects) > 0:
self.api.create_file_batch(new_objects, autoaccept="true", editgroup=editgroup)
- self.update_count = self.update_count + len(update_objects)
- self.insert_count = self.insert_count + len(new_objects)
+ self.counts['update'] += len(update_objects)
+ self.counts['insert'] += len(new_objects)
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index 350c4c57..527316dd 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -62,7 +62,7 @@ class FatcatOrcidImporter(FatcatImporter):
ce = self.parse_orcid_dict(obj)
if ce is not None:
self.api.create_creator(ce, editgroup=editgroup)
- self.insert_count = self.insert_count + 1
+ self.counts['insert'] += 1
def create_batch(self, batch, editgroup=None):
"""Reads and processes in batches (not API-call-per-line)"""
@@ -70,4 +70,4 @@ class FatcatOrcidImporter(FatcatImporter):
for l in batch if l != None]
objects = [o for o in objects if o != None]
self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup)
- self.insert_count = self.insert_count + len(objects)
+ self.counts['insert'] += len(objects)