diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-06-24 15:45:45 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-06-24 15:46:21 -0700 | 
| commit | 11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6 (patch) | |
| tree | 68bae4b5fbef6cebccab6b9a5073540bdf1d6c0d /python/fatcat_tools/importers | |
| parent | 5285d4d1e2cea5dc8a0c57cc5bbf81a65d19163e (diff) | |
| download | fatcat-11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6.tar.gz fatcat-11ead5cdc1df5f8e356f7c3b3ec274e3f382b5c6.zip | |
add inflight edit protection to matched importer
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 9 | 
1 files changed, 8 insertions, 1 deletions
| diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index ed3cfb2f..a94584cd 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -156,6 +156,12 @@ class MatchedImporter(EntityImporter):              self.counts['exists'] += 1              return False +        # check for edit conflicts +        if existing.ident in [e.ident for e in self._edits_inflight]: +            self.counts['skip-update-inflight'] += 1 +            return False + +          # minimum viable "existing" URL cleanup to fix dupes and broken links:          # remove 'None' wayback URLs, and set archive.org rel 'archive'          existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)] @@ -191,7 +197,8 @@ class MatchedImporter(EntityImporter):          existing.md5 = existing.md5 or fe.md5          existing.sha1 = existing.sha1 or fe.sha1          existing.sha256 = existing.sha256 or fe.sha256 -        self.api.update_file(self.get_editgroup_id(), existing.ident, existing) +        edit = self.api.update_file(self.get_editgroup_id(), existing.ident, existing) +        self._edits_inflight.append(edit)          self.counts['update'] += 1          return False | 
