From 11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 24 Jun 2019 15:45:45 -0700 Subject: add inflight edit protection to matched importer --- python/fatcat_tools/importers/matched.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index ed3cfb2f..a94584cd 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -156,6 +156,12 @@ class MatchedImporter(EntityImporter): self.counts['exists'] += 1 return False + # check for edit conflicts + if existing.ident in [e.ident for e in self._edits_inflight]: + self.counts['skip-update-inflight'] += 1 + return False + + # minimum viable "existing" URL cleanup to fix dupes and broken links: # remove 'None' wayback URLs, and set archive.org rel 'archive' existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)] @@ -191,7 +197,8 @@ class MatchedImporter(EntityImporter): existing.md5 = existing.md5 or fe.md5 existing.sha1 = existing.sha1 or fe.sha1 existing.sha256 = existing.sha256 or fe.sha256 - self.api.update_file(self.get_editgroup_id(), existing.ident, existing) + edit = self.api.update_file(self.get_editgroup_id(), existing.ident, existing) + self._edits_inflight.append(edit) self.counts['update'] += 1 return False -- cgit v1.2.3