aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-06 18:36:44 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-06 18:36:44 -0800
commit116df020d7d1cdadca1c56dcde25ff3939aa73e3 (patch)
tree8309981c4b37063c58a1706b4c03918e7a4f391b /python
parent175019c96fced3e21d0f60ea1a4a37da6b8872ac (diff)
downloadsandcrawler-116df020d7d1cdadca1c56dcde25ff3939aa73e3.tar.gz
sandcrawler-116df020d7d1cdadca1c56dcde25ff3939aa73e3.zip
many bad PDF sha1 from prod
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/pdfextract.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index 70d2f93..3adee3a 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -20,55 +20,88 @@ BAD_PDF_SHA1HEX = [
"011478a1e63a2a31eae1a93832a74cc95f220760",
"018dfe9824de6d2ac068ce0f7dc9961bffa1b558",
"057c7a9dfb611bfd52f7de6c39b2d5757c5e4e53",
+ "06061af0707298c12932516d1bb7c2b6dc443824",
"0641822e68c5a07538b967489fd19a1d5dc371a5",
"09cba9b00494d12759c50cb914f1fb7c9746f5d1",
"09db7c9f2efb496c974427a61e84292ae27fc702",
+ "0a1c13cb8783bbbf248b2345b9890e2410aa3f0a",
"0d1c1567ea70e7b922ba88ccb868ffc7ca18e75c",
"10c6577a658bf6203557e2998b25ea9788f8adfe",
+ "15a720921ce30da983fcd1bfa7fe9aeeda503e41",
+ "1659881a31edc2d0e170f6bb26d32e74cc4ca387",
"17e679b0ec9444fff2ea4d02caec05dd2de80ec3",
"182749ad1db1d5e999d07f010bdcfc2978dadc88",
+ "1cb166f0c0b5ffe673e6bbf6a29d77278711f253",
+ "1d04e46b6848e6479dd90fe26bb11627044fb664",
+ "1d967c95546d31edaaf0c3ef9ffcc11113a9e11a",
"20589d9dd0a22c8c938ad97b7f4f12648aa119fa",
"25ab9e6169f041be05844a9b4edd6574918af769",
"281de904c4642a9be4f17b9774fc0a2bdc8a90e3",
+ "2bd5322975653536550a039eb055174b2bf241b3",
"2fc64da736175810918fd32c94c5068b0d660bcc",
"32318fba9b05b2756b7362bcaa4722c92ed8d449",
"336833c6fc968cd0938250dfc93c032a30111cfc",
+ "362ad00bc24d650c8f11851f9e554fc560b73e7a",
"373f84dfab4ed47047826e604e2918a9cd6a95b2",
"3ac0b6e17e30d141871a0a5b127536919fe5aa19",
"3c8a6a708da0dc1802f5f3e5267a49b3c25e1ffe",
"3e5f9fb94e7314447a22f3d009419a922136177f",
+ "3fad493c940137ce703f2f570ebb504e360c6df3",
"436c9183724f051b22c96285aa8ff1d2ba709574",
"445968ef735b228c08c3ff4238d99fc9f4824619",
"447fa6b5a90742a86429a932f6608d8e141688c0",
"4785181cec8944eee00ddb631a5dfc771b89bab7",
"47db2db2cc976429568841a0496c0ab4ed7b5977",
"4c81129904f7976a50825595a3497ea7b52579ef",
+ "4edc1402712fa6827c4501fed8042e9f4447829c",
"50b3c5a3122272aca69855ef06b85d0b43a76eb1",
+ "52fc9b3c5199ef395d410c7cee5961dc812e4d29",
"58d9ae7dcb0a7dbbdfc58ad266030b037e9cd0ff",
+ "59cfc843ebdb1c1e5db1efc76a40f46cb3bb06f0",
"5ab98405b676ee81a6ca74fba51a9e4a6cff7311",
+ "5e04779cbbae5ce88bb786064f756885dd6895fe",
"5e6a3adde9f08c276c4efd72bfacb256f2ec35d9",
"623ff84b616383d0a3e0dd8dbce12f0b5fe9a6ac",
"646c4a654270606256397684204ff0f3d17be2e7",
"64d821d728f9a3dc944b4c03be00feea0b57e314",
"6909f0b62d8b7835de3dec7777aad7f8ef507ee3",
+ "74e617dc95555e8ca3aadd19d0c85b71cd77d1d9",
+ "75c2662a96ccc48891228df7c85eb7d4da9dd621",
"771f1ca0007a6fbed5b4a434c73f524f715d33c1",
"776859635e9dc01d97b0582f49c814ffbcb019fb",
"781dafda896a9f5c30f3d0a011f79a3b79b574c4",
"788672c7c2bcdecf6e2f6a2177c01e60f04d9cfb",
+ "79d6cba3c6e577a0f3a3a9fe575680d38454938d",
+ "7daf61526ec825151f384cc1db510ca5237d5d80",
"7e9d846f3bf9ce15cdb991b78cc870ab8a2bed76",
+ "859d7ec532a0bf3b52b17c7f2d8ecc58410c0aad",
"88edcbab1cac2d70af5870422974afc253f4f0c6",
+ "89860fc475fcb2a2d86c4544df52ec8fd5e6533f",
+ "8dcaf4ef132900dd378f7be526c884b17452713b",
"8e4f03c29ae1fe7227140ab4b625f375f6c00d31",
"949dfb7d833da9576b2ccb9eb1ab5457469c53d3",
+ "961ec451172f373f919c593737466300e42062cb",
+ "976989fa6e447578d9ce16ec5b526f0e09d6df50",
+ "98b02eb70066c182c705ef4d14d8b723ad7f1fab",
+ "993ca31f6974f8387bb18dd7d38987d290da8781",
"9dbd05af3442e6f42d67868054751b76973f4171",
"a2298c137b9c8c8975bad62eea9224edb95e6952",
"a2671738755ab8b24775e95375dc72f1ca4e5fd6",
+ "a26f299fb97c646effeebd4c5e2968786bd0f781",
+ "a48f9b7ad627909f76d780aa4208530304ece42c",
+ "a69665d0b5d3b95f54f68406eee3ed50c67efb45",
+ "a69665d0b5d3b95f54f68406eee3ed50c67efb45",
+ "a8357c31837404f9ebd798999d546c9398ab3648",
"a9162b9aef5e5da0897275fede1a6cff8cc93dfc",
+ "ad038725bf6855a79f3c768ebe93c7103d14522f",
"b2b66b9c7f817a20144456f99c0be805602e8597",
"b2d719120306b90eb8dd3580b699a61ec70556f4",
+ "b4b8e18e27f102e59b2be2d58c7b54d0a0eb457a",
"b5be7f409a3a2601208c5ce08cf52b9ac1094aae",
"b5bf8b7467fb095c90adf3b49aa1687291e4469c",
"b8b427e5b3d650ba9e03197f9c3917e25b878930",
"bad48b89b639b5b7df2c6a2d5288181fcb8b0e35",
+ "be0cda7642e9247b3ee41cd2017fa709aab4f344",
"c1b583fbd052572f08158d39ffe4d7510dadbebb",
"c7220d1bf1e71fb755d9f26bbdd4c539dc162960",
"c7687fa6f637c7d32a25be0e772867d87536d35c",
@@ -81,10 +114,13 @@ BAD_PDF_SHA1HEX = [
"da2211ee2dbc6dda36571976d810e2366a3d2504",
"e01bb7256d77aea258313bb410dfcfc10512f420",
"e2bf5d0a5885359381fe8ef2cd9290171d494e9b",
+ "e2c3b8a2cf33d5e8972bc9ddb78373766a75e412",
"e9d7716b4f94bbc3d94459b5fe9bb8b15cb2e433",
+ "eac7df5f799983d5a7cc55d10b4d426dc557febf",
"eb1b39fd7a874896688855a22efddef10272427c",
"eb5fffaa590a52bcc3705b888c6ff9c4dc4c45b2",
"ee9530a2c5a3d1e3813ccb51a55cc8b0d9b5dfc7",
+ "f0ea221d8587cede25592266486e119d277f7096",
"f68f9a9202a75d2aee35252e104d796f9515001e",
"f9314d3bf2eac78a7d78d18adcccdb35542054ef",
]