diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-13 19:06:24 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-13 19:06:26 -0800 |
commit | 037a77d507dc32feb7ac2efd8a27cc2c36761bc2 (patch) | |
tree | bc28ba8fff82e0d3d2de1e88a307c551781934ac /python/fatcat_tools | |
parent | 37c0b3354eb0077ec00cb3a1e5ed1760566649b7 (diff) | |
download | fatcat-037a77d507dc32feb7ac2efd8a27cc2c36761bc2.tar.gz fatcat-037a77d507dc32feb7ac2efd8a27cc2c36761bc2.zip |
add ingest import file collision protection
The common case is the same URL being submitted repeatedly during
testing.
This is only within-editgroup, and per importer (eg, won't work across
spn importer "submitted" editgroups), but is better than nothing.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/ingest.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index bd5713c3..c47f0aa7 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -160,6 +160,12 @@ class IngestFileResultImporter(EntityImporter): self.counts['exists'] += 1 return False + # check for existing edits-in-progress with same file hash + for other in self._entity_queue: + if other.sha1 == fe.sha1: + self.counts['skip-in-queue'] += 1 + return False + if not self.do_updates: self.counts['skip-update-disabled'] += 1 return False |