From ab01652b9d4b5542a973f591031b54cdcfd4701f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 14 Jan 2020 17:02:34 -0800 Subject: ingest persist skips 'existing' ingest results --- python/sandcrawler/persist.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'python') diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index c24dec8..7cb4f8d 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -133,6 +133,9 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): if ingest_type not in ('pdf', 'xml'): self.counts['skip-ingest-type'] += 1 return None + if raw['status'] in ("existing", ): + self.counts['skip-existing'] += 1 + return None result = { 'ingest_type': ingest_type, 'base_url': raw['request']['base_url'], -- cgit v1.2.3