aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-06-24 15:45:45 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-06-24 15:46:21 -0700
commit11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6 (patch)
tree68bae4b5fbef6cebccab6b9a5073540bdf1d6c0d
parent5285d4d1e2cea5dc8a0c57cc5bbf81a65d19163e (diff)
downloadfatcat-11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6.tar.gz
fatcat-11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6.zip
add inflight edit protection to matched importer
-rw-r--r--python/fatcat_tools/importers/matched.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index ed3cfb2f..a94584cd 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -156,6 +156,12 @@ class MatchedImporter(EntityImporter):
self.counts['exists'] += 1
return False
+ # check for edit conflicts
+ if existing.ident in [e.ident for e in self._edits_inflight]:
+ self.counts['skip-update-inflight'] += 1
+ return False
+
+
# minimum viable "existing" URL cleanup to fix dupes and broken links:
# remove 'None' wayback URLs, and set archive.org rel 'archive'
existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)]
@@ -191,7 +197,8 @@ class MatchedImporter(EntityImporter):
existing.md5 = existing.md5 or fe.md5
existing.sha1 = existing.sha1 or fe.sha1
existing.sha256 = existing.sha256 or fe.sha256
- self.api.update_file(self.get_editgroup_id(), existing.ident, existing)
+ edit = self.api.update_file(self.get_editgroup_id(), existing.ident, existing)
+ self._edits_inflight.append(edit)
self.counts['update'] += 1
return False