diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-06 18:17:09 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-06 18:17:09 -0800 |
commit | 8f4a22d78acb6518c6546645557ad5f0d2253c66 (patch) | |
tree | 4f8a917fc3bf5ccb4a511a303a309374d9c72ea6 /python/sandcrawler/persist.py | |
parent | 583f11aa95b3af5897d29f143f99716a257e9357 (diff) | |
download | sandcrawler-8f4a22d78acb6518c6546645557ad5f0d2253c66.tar.gz sandcrawler-8f4a22d78acb6518c6546645557ad5f0d2253c66.zip |
html: refactors/tweaks from testing
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r-- | python/sandcrawler/persist.py | 1 |
1 files changed, 0 insertions, 1 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index fbd2bdb..f13b1f3 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -97,7 +97,6 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): self.counts['skip-request-fields'] += 1 return None if raw['ingest_type'] not in ('pdf', 'xml', 'html'): - print(raw['ingest_type']) self.counts['skip-ingest-type'] += 1 return None request = { |