diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-14 15:02:52 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-14 15:06:47 -0700 |
commit | 61caceebcc5cd04b28d9859b27ac314bb2a59bbb (patch) | |
tree | ee70241ade0fb769e33b0312873826d243740282 /python/fatcat | |
parent | ac0b49ee3e04d98ad5b6dd8c2360a71d7ecce1a3 (diff) | |
download | fatcat-61caceebcc5cd04b28d9859b27ac314bb2a59bbb.tar.gz fatcat-61caceebcc5cd04b28d9859b27ac314bb2a59bbb.zip |
add insert counting to importers
Diffstat (limited to 'python/fatcat')
-rw-r--r-- | python/fatcat/crossref_importer.py | 2 | ||||
-rw-r--r-- | python/fatcat/importer_common.py | 9 | ||||
-rw-r--r-- | python/fatcat/issn_importer.py | 2 | ||||
-rw-r--r-- | python/fatcat/orcid_importer.py | 2 |
4 files changed, 15 insertions, 0 deletions
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py index a2e14ed1..2154c8c0 100644 --- a/python/fatcat/crossref_importer.py +++ b/python/fatcat/crossref_importer.py @@ -160,6 +160,7 @@ class FatcatCrossrefImporter(FatcatImporter): re.container_id = container.ident self._issnl_id_map[ce.issnl] = container.ident self.api.create_release(re, editgroup=editgroup) + self.insert_count = self.insert_count + 1 def create_batch(self, batch, editgroup=None): """Current work/release pairing disallows batch creation of releases. @@ -178,3 +179,4 @@ class FatcatCrossrefImporter(FatcatImporter): self._issnl_id_map[ce.issnl] = container.ident release_batch.append(re) self.api.create_release_batch(release_batch, autoaccept="true", editgroup=editgroup) + self.insert_count = self.insert_count + len(release_batch) diff --git a/python/fatcat/importer_common.py b/python/fatcat/importer_common.py index ff0c8a27..95bec8a1 100644 --- a/python/fatcat/importer_common.py +++ b/python/fatcat/importer_common.py @@ -26,6 +26,13 @@ class FatcatImporter: self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$") if issn_map_file: self.read_issn_map_file(issn_map_file) + self.processed_lines = 0 + self.insert_count = 0 + self.update_count = 0 + + def describe_run(self): + print("Processed {} lines, inserted {}, updated {}.".format( + self.processed_lines, self.insert_count, self.update_count)) def process_source(self, source, group_size=100): """Creates and auto-accepts editgroup every group_size rows""" @@ -37,12 +44,14 @@ class FatcatImporter: self.api.accept_editgroup(eg) eg = self.api.create_editgroup( fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) + self.processed_lines = self.processed_lines + 1 if i == 0 or (i % group_size) != 0: self.api.accept_editgroup(eg.id) def process_batch(self, source, size=50): """Reads and processes in batches (not API-call-per-)""" for rows in grouper(source, size): + self.processed_lines = self.processed_lines + len(rows) eg = self.api.create_editgroup( fatcat_client.Editgroup(editor_id='aaaaaaaaaaaabkvkaaaaaaaaae')) self.create_batch(rows, editgroup=eg.id) diff --git a/python/fatcat/issn_importer.py b/python/fatcat/issn_importer.py index ad2cad78..c9ef50b5 100644 --- a/python/fatcat/issn_importer.py +++ b/python/fatcat/issn_importer.py @@ -61,6 +61,7 @@ class FatcatIssnImporter(FatcatImporter): ce = self.parse_issn_row(row) if ce is not None: self.api.create_container(ce, editgroup=editgroup) + self.insert_count = self.insert_count + 1 def create_batch(self, batch, editgroup=None): """Reads and processes in batches (not API-call-per-line)""" @@ -68,3 +69,4 @@ class FatcatIssnImporter(FatcatImporter): for l in batch if l != None] objects = [o for o in objects if o != None] self.api.create_container_batch(objects, autoaccept="true", editgroup=editgroup) + self.insert_count = self.insert_count + len(objects) diff --git a/python/fatcat/orcid_importer.py b/python/fatcat/orcid_importer.py index 2eeac122..e1f5943c 100644 --- a/python/fatcat/orcid_importer.py +++ b/python/fatcat/orcid_importer.py @@ -62,6 +62,7 @@ class FatcatOrcidImporter(FatcatImporter): ce = self.parse_orcid_dict(obj) if ce is not None: self.api.create_creator(ce, editgroup=editgroup) + self.insert_count = self.insert_count + 1 def create_batch(self, batch, editgroup=None): """Reads and processes in batches (not API-call-per-line)""" @@ -69,3 +70,4 @@ class FatcatOrcidImporter(FatcatImporter): for l in batch if l != None] objects = [o for o in objects if o != None] self.api.create_creator_batch(objects, autoaccept="true", editgroup=editgroup) + self.insert_count = self.insert_count + len(objects) |