From 037a77d507dc32feb7ac2efd8a27cc2c36761bc2 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 13 Dec 2019 19:06:24 -0800 Subject: add ingest import file collision protection The common case is the same URL being submitted repeatedly during testing. This is only within-editgroup, and per importer (eg, won't work across spn importer "submitted" editgroups), but is better than nothing. --- python/fatcat_tools/importers/ingest.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index bd5713c3..c47f0aa7 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -160,6 +160,12 @@ class IngestFileResultImporter(EntityImporter): self.counts['exists'] += 1 return False + # check for existing edits-in-progress with same file hash + for other in self._entity_queue: + if other.sha1 == fe.sha1: + self.counts['skip-in-queue'] += 1 + return False + if not self.do_updates: self.counts['skip-update-disabled'] += 1 return False -- cgit v1.2.3