diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-06-24 15:45:45 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-06-24 15:46:21 -0700 |
commit | 11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6 (patch) | |
tree | 68bae4b5fbef6cebccab6b9a5073540bdf1d6c0d /python/fatcat_tools/importers | |
parent | 5285d4d1e2cea5dc8a0c57cc5bbf81a65d19163e (diff) | |
download | fatcat-11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6.tar.gz fatcat-11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6.zip |
add inflight edit protection to matched importer
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/matched.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index ed3cfb2f..a94584cd 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -156,6 +156,12 @@ class MatchedImporter(EntityImporter): self.counts['exists'] += 1 return False + # check for edit conflicts + if existing.ident in [e.ident for e in self._edits_inflight]: + self.counts['skip-update-inflight'] += 1 + return False + + # minimum viable "existing" URL cleanup to fix dupes and broken links: # remove 'None' wayback URLs, and set archive.org rel 'archive' existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)] @@ -191,7 +197,8 @@ class MatchedImporter(EntityImporter): existing.md5 = existing.md5 or fe.md5 existing.sha1 = existing.sha1 or fe.sha1 existing.sha256 = existing.sha256 or fe.sha256 - self.api.update_file(self.get_editgroup_id(), existing.ident, existing) + edit = self.api.update_file(self.get_editgroup_id(), existing.ident, existing) + self._edits_inflight.append(edit) self.counts['update'] += 1 return False |