aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/persist.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index c24dec8..7cb4f8d 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -133,6 +133,9 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
if ingest_type not in ('pdf', 'xml'):
self.counts['skip-ingest-type'] += 1
return None
+ if raw['status'] in ("existing", ):
+ self.counts['skip-existing'] += 1
+ return None
result = {
'ingest_type': ingest_type,
'base_url': raw['request']['base_url'],