From 1a743b4fccb688d5b395e05a8ad71405f24e9c51 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 3 Sep 2021 18:49:17 -0700 Subject: yet more PDF sha1 to skip --- python/sandcrawler/pdfextract.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'python') diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 77278eb..6cf0ff4 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -57,6 +57,7 @@ BAD_PDF_SHA1HEX = [ "43a8c0abf0386d3e3397cf5e22a884761dd63db7", "445968ef735b228c08c3ff4238d99fc9f4824619", "447fa6b5a90742a86429a932f6608d8e141688c0", + "45f014d7d631559dc7726e5c5513f1e7c91c48a9", "47577ff6d6876117ca69bec60a5764f7d2c2ec70", "4785181cec8944eee00ddb631a5dfc771b89bab7", "47db2db2cc976429568841a0496c0ab4ed7b5977", @@ -136,11 +137,15 @@ BAD_PDF_SHA1HEX = [ "e2c3b8a2cf33d5e8972bc9ddb78373766a75e412", "e64714a81f60ab9286ec90cad682cb22e564fb6f", "e9d7716b4f94bbc3d94459b5fe9bb8b15cb2e433", + "e9e84e17383e93a784a8471708619162b32fb399", "eac7df5f799983d5a7cc55d10b4d426dc557febf", + "eaf84b2efd2f69c7b3f407f89ea66ac4c41fac36", "eb1b39fd7a874896688855a22efddef10272427c", "eb5fffaa590a52bcc3705b888c6ff9c4dc4c45b2", "edf8dcc8736f06afbaca0e01d60bd2c475403a3d", + "ee2ee6ae2cf05128810d0d95bbe69bd263e140de", "ee9530a2c5a3d1e3813ccb51a55cc8b0d9b5dfc7", + "ef1dfa325c21cff4cd8bb1a9b6c4ee6996d43c8f", "ef6749d9263a01f921ba7d72df0d17671d14e5f6", "f0ea221d8587cede25592266486e119d277f7096", "f68f9a9202a75d2aee35252e104d796f9515001e", -- cgit v1.2.3