aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-09-03 18:49:17 -0700
committerBryan Newbold <bnewbold@archive.org>2021-09-03 18:49:17 -0700
commit1a743b4fccb688d5b395e05a8ad71405f24e9c51 (patch)
treeeee3d5c4a0a0bbbf45931e90d70bfdb836a82d52
parent379276a55b14474a0babe829a41f10bf3a89bbe7 (diff)
downloadsandcrawler-1a743b4fccb688d5b395e05a8ad71405f24e9c51.tar.gz
sandcrawler-1a743b4fccb688d5b395e05a8ad71405f24e9c51.zip
yet more PDF sha1 to skip
-rw-r--r--python/sandcrawler/pdfextract.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index 77278eb..6cf0ff4 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -57,6 +57,7 @@ BAD_PDF_SHA1HEX = [
"43a8c0abf0386d3e3397cf5e22a884761dd63db7",
"445968ef735b228c08c3ff4238d99fc9f4824619",
"447fa6b5a90742a86429a932f6608d8e141688c0",
+ "45f014d7d631559dc7726e5c5513f1e7c91c48a9",
"47577ff6d6876117ca69bec60a5764f7d2c2ec70",
"4785181cec8944eee00ddb631a5dfc771b89bab7",
"47db2db2cc976429568841a0496c0ab4ed7b5977",
@@ -136,11 +137,15 @@ BAD_PDF_SHA1HEX = [
"e2c3b8a2cf33d5e8972bc9ddb78373766a75e412",
"e64714a81f60ab9286ec90cad682cb22e564fb6f",
"e9d7716b4f94bbc3d94459b5fe9bb8b15cb2e433",
+ "e9e84e17383e93a784a8471708619162b32fb399",
"eac7df5f799983d5a7cc55d10b4d426dc557febf",
+ "eaf84b2efd2f69c7b3f407f89ea66ac4c41fac36",
"eb1b39fd7a874896688855a22efddef10272427c",
"eb5fffaa590a52bcc3705b888c6ff9c4dc4c45b2",
"edf8dcc8736f06afbaca0e01d60bd2c475403a3d",
+ "ee2ee6ae2cf05128810d0d95bbe69bd263e140de",
"ee9530a2c5a3d1e3813ccb51a55cc8b0d9b5dfc7",
+ "ef1dfa325c21cff4cd8bb1a9b6c4ee6996d43c8f",
"ef6749d9263a01f921ba7d72df0d17671d14e5f6",
"f0ea221d8587cede25592266486e119d277f7096",
"f68f9a9202a75d2aee35252e104d796f9515001e",