diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-18 16:23:09 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-18 16:23:09 -0700 |
commit | a14d851ad230b3adb569ec6ca112cd4d9e638b2c (patch) | |
tree | 30d3ccea7dc43d53b8862899a7009b75b59cfb48 /python/fatcat_tools/importers | |
parent | bfc0d6597fa98e89615e6d82eed9488fd9a1e087 (diff) | |
download | fatcat-a14d851ad230b3adb569ec6ca112cd4d9e638b2c.tar.gz fatcat-a14d851ad230b3adb569ec6ca112cd4d9e638b2c.zip |
mechanism to not double-update entities
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/arabesque.py | 7 | ||||
-rw-r--r-- | python/fatcat_tools/importers/common.py | 3 |
2 files changed, 9 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index c0311903..c4850592 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -149,6 +149,10 @@ class ArabesqueMatchImporter(EntityImporter): self.counts['skip-update-disabled'] += 1 return False + if existing.ident in [e.ident for e in self._edits_inflight]: + self.counts['skip-update-inflight'] += 1 + return False + # TODO: this code path never gets hit because of the check above if set(fe.release_ids) == set(existing.release_ids): existing_urls = set([u.url for u in existing.urls]) @@ -162,7 +166,8 @@ class ArabesqueMatchImporter(EntityImporter): existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls] existing.release_ids = list(set(fe.release_ids + existing.release_ids)) existing.mimetype = existing.mimetype or fe.mimetype - self.api.update_file(existing.ident, existing, editgroup_id=self.get_editgroup_id()) + edit = self.api.update_file(existing.ident, existing, editgroup_id=self.get_editgroup_id()) + self._edits_inflight.append(edit) self.counts['update'] += 1 return False diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index dd30e198..49931542 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -171,6 +171,7 @@ class EntityImporter: self._edit_count = 0 self._editgroup_id = None self._entity_queue = [] + self._edits_inflight = [] def push_record(self, raw_record): """ @@ -199,6 +200,7 @@ class EntityImporter: self.api.accept_editgroup(self._editgroup_id) self._editgroup_id = None self._edit_count = 0 + self._edits_inflight = [] if self._entity_queue: self.insert_batch(self._entity_queue) @@ -215,6 +217,7 @@ class EntityImporter: self.api.accept_editgroup(self._editgroup_id) self._editgroup_id = None self._edit_count = 0 + self._edits_inflight = [] if not self._editgroup_id: eg = self.api.create_editgroup( |