aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/ingest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-12-13 19:06:24 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-12-13 19:06:26 -0800
commit037a77d507dc32feb7ac2efd8a27cc2c36761bc2 (patch)
treebc28ba8fff82e0d3d2de1e88a307c551781934ac /python/fatcat_tools/importers/ingest.py
parent37c0b3354eb0077ec00cb3a1e5ed1760566649b7 (diff)
downloadfatcat-037a77d507dc32feb7ac2efd8a27cc2c36761bc2.tar.gz
fatcat-037a77d507dc32feb7ac2efd8a27cc2c36761bc2.zip
add ingest import file collision protection
The common case is the same URL being submitted repeatedly during testing. This is only within-editgroup, and per importer (eg, won't work across spn importer "submitted" editgroups), but is better than nothing.
Diffstat (limited to 'python/fatcat_tools/importers/ingest.py')
-rw-r--r--python/fatcat_tools/importers/ingest.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index bd5713c3..c47f0aa7 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -160,6 +160,12 @@ class IngestFileResultImporter(EntityImporter):
self.counts['exists'] += 1
return False
+ # check for existing edits-in-progress with same file hash
+ for other in self._entity_queue:
+ if other.sha1 == fe.sha1:
+ self.counts['skip-in-queue'] += 1
+ return False
+
if not self.do_updates:
self.counts['skip-update-disabled'] += 1
return False