diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-09-03 18:49:17 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-09-03 18:49:17 -0700 |
commit | 1a743b4fccb688d5b395e05a8ad71405f24e9c51 (patch) | |
tree | eee3d5c4a0a0bbbf45931e90d70bfdb836a82d52 | |
parent | 379276a55b14474a0babe829a41f10bf3a89bbe7 (diff) | |
download | sandcrawler-1a743b4fccb688d5b395e05a8ad71405f24e9c51.tar.gz sandcrawler-1a743b4fccb688d5b395e05a8ad71405f24e9c51.zip |
yet more PDF sha1 to skip
-rw-r--r-- | python/sandcrawler/pdfextract.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 77278eb..6cf0ff4 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -57,6 +57,7 @@ BAD_PDF_SHA1HEX = [ "43a8c0abf0386d3e3397cf5e22a884761dd63db7", "445968ef735b228c08c3ff4238d99fc9f4824619", "447fa6b5a90742a86429a932f6608d8e141688c0", + "45f014d7d631559dc7726e5c5513f1e7c91c48a9", "47577ff6d6876117ca69bec60a5764f7d2c2ec70", "4785181cec8944eee00ddb631a5dfc771b89bab7", "47db2db2cc976429568841a0496c0ab4ed7b5977", @@ -136,11 +137,15 @@ BAD_PDF_SHA1HEX = [ "e2c3b8a2cf33d5e8972bc9ddb78373766a75e412", "e64714a81f60ab9286ec90cad682cb22e564fb6f", "e9d7716b4f94bbc3d94459b5fe9bb8b15cb2e433", + "e9e84e17383e93a784a8471708619162b32fb399", "eac7df5f799983d5a7cc55d10b4d426dc557febf", + "eaf84b2efd2f69c7b3f407f89ea66ac4c41fac36", "eb1b39fd7a874896688855a22efddef10272427c", "eb5fffaa590a52bcc3705b888c6ff9c4dc4c45b2", "edf8dcc8736f06afbaca0e01d60bd2c475403a3d", + "ee2ee6ae2cf05128810d0d95bbe69bd263e140de", "ee9530a2c5a3d1e3813ccb51a55cc8b0d9b5dfc7", + "ef1dfa325c21cff4cd8bb1a9b6c4ee6996d43c8f", "ef6749d9263a01f921ba7d72df0d17671d14e5f6", "f0ea221d8587cede25592266486e119d277f7096", "f68f9a9202a75d2aee35252e104d796f9515001e", |