aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-12-22 12:26:41 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-12-22 12:26:44 -0800
commitfc6fa5a2d7f24c76d51f9ce2530fed055b20e27f (patch)
tree6fc6913cfc3504ff18259f7ee1e7b1eba15d02c6 /python/fatcat_tools
parentea5b2c1d7096bd00d9e2b6aafaa46a3e6e0cd969 (diff)
downloadfatcat-fc6fa5a2d7f24c76d51f9ce2530fed055b20e27f.tar.gz
fatcat-fc6fa5a2d7f24c76d51f9ce2530fed055b20e27f.zip
fix spn/ingest importer duplication check
Check was happing after the `return True` by mistake, allowing duplicates in SPN editgroups, and potentially in ingest request editgroups as well.
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/ingest.py14
1 files changed, 8 insertions, 6 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index c47f0aa7..ca741eb2 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -152,20 +152,22 @@ class IngestFileResultImporter(EntityImporter):
if err.status != 404:
raise err
+ # check for existing edits-in-progress with same file hash
+ for other in self._entity_queue:
+ if other.sha1 == fe.sha1:
+ self.counts['skip-in-queue'] += 1
+ return False
+
if not existing:
return True
+ # the following checks all assume there is an existing item
+
if (fe.release_ids[0] in existing.release_ids) and existing.urls:
# TODO: could still, in theory update with the new URL?
self.counts['exists'] += 1
return False
- # check for existing edits-in-progress with same file hash
- for other in self._entity_queue:
- if other.sha1 == fe.sha1:
- self.counts['skip-in-queue'] += 1
- return False
-
if not self.do_updates:
self.counts['skip-update-disabled'] += 1
return False