diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-13 19:06:24 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-13 19:06:26 -0800 | 
| commit | 037a77d507dc32feb7ac2efd8a27cc2c36761bc2 (patch) | |
| tree | bc28ba8fff82e0d3d2de1e88a307c551781934ac /python/fatcat_tools/importers/ingest.py | |
| parent | 37c0b3354eb0077ec00cb3a1e5ed1760566649b7 (diff) | |
| download | fatcat-037a77d507dc32feb7ac2efd8a27cc2c36761bc2.tar.gz fatcat-037a77d507dc32feb7ac2efd8a27cc2c36761bc2.zip | |
add ingest import file collision protection
The common case is the same URL being submitted repeatedly during
testing.
This is only within-editgroup, and per importer (eg, won't work across
spn importer "submitted" editgroups), but is better than nothing.
Diffstat (limited to 'python/fatcat_tools/importers/ingest.py')
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 6 | 
1 files changed, 6 insertions, 0 deletions
| diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index bd5713c3..c47f0aa7 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -160,6 +160,12 @@ class IngestFileResultImporter(EntityImporter):              self.counts['exists'] += 1              return False +        # check for existing edits-in-progress with same file hash +        for other in self._entity_queue: +            if other.sha1 == fe.sha1: +                self.counts['skip-in-queue'] += 1 +                return False +          if not self.do_updates:              self.counts['skip-update-disabled'] += 1              return False | 
