diff options
-rw-r--r-- | python/sandcrawler/persist.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index c24dec8..7cb4f8d 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -133,6 +133,9 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): if ingest_type not in ('pdf', 'xml'): self.counts['skip-ingest-type'] += 1 return None + if raw['status'] in ("existing", ): + self.counts['skip-existing'] += 1 + return None result = { 'ingest_type': ingest_type, 'base_url': raw['request']['base_url'], |