summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-04-18 16:23:09 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-04-18 16:23:09 -0700
commita14d851ad230b3adb569ec6ca112cd4d9e638b2c (patch)
tree30d3ccea7dc43d53b8862899a7009b75b59cfb48 /python/fatcat_tools
parentbfc0d6597fa98e89615e6d82eed9488fd9a1e087 (diff)
downloadfatcat-a14d851ad230b3adb569ec6ca112cd4d9e638b2c.tar.gz
fatcat-a14d851ad230b3adb569ec6ca112cd4d9e638b2c.zip
mechanism to not double-update entities
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/arabesque.py7
-rw-r--r--python/fatcat_tools/importers/common.py3
2 files changed, 9 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py
index c0311903..c4850592 100644
--- a/python/fatcat_tools/importers/arabesque.py
+++ b/python/fatcat_tools/importers/arabesque.py
@@ -149,6 +149,10 @@ class ArabesqueMatchImporter(EntityImporter):
self.counts['skip-update-disabled'] += 1
return False
+ if existing.ident in [e.ident for e in self._edits_inflight]:
+ self.counts['skip-update-inflight'] += 1
+ return False
+
# TODO: this code path never gets hit because of the check above
if set(fe.release_ids) == set(existing.release_ids):
existing_urls = set([u.url for u in existing.urls])
@@ -162,7 +166,8 @@ class ArabesqueMatchImporter(EntityImporter):
existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls]
existing.release_ids = list(set(fe.release_ids + existing.release_ids))
existing.mimetype = existing.mimetype or fe.mimetype
- self.api.update_file(existing.ident, existing, editgroup_id=self.get_editgroup_id())
+ edit = self.api.update_file(existing.ident, existing, editgroup_id=self.get_editgroup_id())
+ self._edits_inflight.append(edit)
self.counts['update'] += 1
return False
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index dd30e198..49931542 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -171,6 +171,7 @@ class EntityImporter:
self._edit_count = 0
self._editgroup_id = None
self._entity_queue = []
+ self._edits_inflight = []
def push_record(self, raw_record):
"""
@@ -199,6 +200,7 @@ class EntityImporter:
self.api.accept_editgroup(self._editgroup_id)
self._editgroup_id = None
self._edit_count = 0
+ self._edits_inflight = []
if self._entity_queue:
self.insert_batch(self._entity_queue)
@@ -215,6 +217,7 @@ class EntityImporter:
self.api.accept_editgroup(self._editgroup_id)
self._editgroup_id = None
self._edit_count = 0
+ self._edits_inflight = []
if not self._editgroup_id:
eg = self.api.create_editgroup(