diff options
| -rw-r--r-- | python/sandcrawler/pdfextract.py | 20 | 
1 files changed, 20 insertions, 0 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 3adee3a..311bbf8 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -25,16 +25,20 @@ BAD_PDF_SHA1HEX = [      "09cba9b00494d12759c50cb914f1fb7c9746f5d1",      "09db7c9f2efb496c974427a61e84292ae27fc702",      "0a1c13cb8783bbbf248b2345b9890e2410aa3f0a", +    "0ccc6dc94f4e2d809fac8543870265c3421f3c9e",      "0d1c1567ea70e7b922ba88ccb868ffc7ca18e75c",      "10c6577a658bf6203557e2998b25ea9788f8adfe",      "15a720921ce30da983fcd1bfa7fe9aeeda503e41",      "1659881a31edc2d0e170f6bb26d32e74cc4ca387",      "17e679b0ec9444fff2ea4d02caec05dd2de80ec3",      "182749ad1db1d5e999d07f010bdcfc2978dadc88", +    "1a17a4fc43397804830cc29021281aac2e8cf0cb",      "1cb166f0c0b5ffe673e6bbf6a29d77278711f253",      "1d04e46b6848e6479dd90fe26bb11627044fb664",      "1d967c95546d31edaaf0c3ef9ffcc11113a9e11a", +    "1f90194bf0c7fff1fe1ed5fff77a934c7a1b32a0",      "20589d9dd0a22c8c938ad97b7f4f12648aa119fa", +    "2195e528fa1cf5f8ae3b2adcc516896016c3411f",      "25ab9e6169f041be05844a9b4edd6574918af769",      "281de904c4642a9be4f17b9774fc0a2bdc8a90e3",      "2bd5322975653536550a039eb055174b2bf241b3", @@ -47,11 +51,16 @@ BAD_PDF_SHA1HEX = [      "3c8a6a708da0dc1802f5f3e5267a49b3c25e1ffe",      "3e5f9fb94e7314447a22f3d009419a922136177f",      "3fad493c940137ce703f2f570ebb504e360c6df3", +    "40aa94602ab13e5a7d9df8c989fca4fa5c01239e", +    "427479c94d7d0e512f898bc7ff0b6f210069f902",      "436c9183724f051b22c96285aa8ff1d2ba709574", +    "43a8c0abf0386d3e3397cf5e22a884761dd63db7",      "445968ef735b228c08c3ff4238d99fc9f4824619",      "447fa6b5a90742a86429a932f6608d8e141688c0", +    "47577ff6d6876117ca69bec60a5764f7d2c2ec70",      "4785181cec8944eee00ddb631a5dfc771b89bab7",      "47db2db2cc976429568841a0496c0ab4ed7b5977", +    "481c0bae81873988fcc8662ba8a269e8823fdea2",      "4c81129904f7976a50825595a3497ea7b52579ef",      "4edc1402712fa6827c4501fed8042e9f4447829c",      "50b3c5a3122272aca69855ef06b85d0b43a76eb1", @@ -64,6 +73,7 @@ BAD_PDF_SHA1HEX = [      "623ff84b616383d0a3e0dd8dbce12f0b5fe9a6ac",      "646c4a654270606256397684204ff0f3d17be2e7",      "64d821d728f9a3dc944b4c03be00feea0b57e314", +    "689b5cb3ddef213d612363a903f10d0358ea64d2",      "6909f0b62d8b7835de3dec7777aad7f8ef507ee3",      "74e617dc95555e8ca3aadd19d0c85b71cd77d1d9",      "75c2662a96ccc48891228df7c85eb7d4da9dd621", @@ -74,6 +84,7 @@ BAD_PDF_SHA1HEX = [      "79d6cba3c6e577a0f3a3a9fe575680d38454938d",      "7daf61526ec825151f384cc1db510ca5237d5d80",      "7e9d846f3bf9ce15cdb991b78cc870ab8a2bed76", +    "8398b211a5ec4da1195a4ba1bc29ca8c0ac40f67",      "859d7ec532a0bf3b52b17c7f2d8ecc58410c0aad",      "88edcbab1cac2d70af5870422974afc253f4f0c6",      "89860fc475fcb2a2d86c4544df52ec8fd5e6533f", @@ -103,11 +114,16 @@ BAD_PDF_SHA1HEX = [      "bad48b89b639b5b7df2c6a2d5288181fcb8b0e35",      "be0cda7642e9247b3ee41cd2017fa709aab4f344",      "c1b583fbd052572f08158d39ffe4d7510dadbebb", +    "c2526f75a013dc67b14ce1e2d0e4fc80bb93c6e1", +    "c4abbb284f4acaca9e8ceb88f842901984e84d33",      "c7220d1bf1e71fb755d9f26bbdd4c539dc162960",      "c7687fa6f637c7d32a25be0e772867d87536d35c", +    "c7d8b37ec99cf0d987e60667f05299f200e18a5d",      "c92b9ae9eefa07504950b405625aef54b48f0e1a",      "ccb1debcfae006a3fc984e9e91309b9706a5c375", +    "cd611c765cbb0b3b7cb2fdc07d8f0b9cc93ec257",      "cd8a7c3b8d850ebedc1ca791ccb37b9a2689f9c3", +    "d055c054c330f99ec011e37186d2b429339758fd",      "d17b1e254cce82df5c6eb4fd492cef91e7e11558",      "d188762a7e3ab5d4ee8a897204316513e4e636ec",      "d6b0f405bf13c23d0e90c54eea527442786d1cd3", @@ -115,14 +131,18 @@ BAD_PDF_SHA1HEX = [      "e01bb7256d77aea258313bb410dfcfc10512f420",      "e2bf5d0a5885359381fe8ef2cd9290171d494e9b",      "e2c3b8a2cf33d5e8972bc9ddb78373766a75e412", +    "e64714a81f60ab9286ec90cad682cb22e564fb6f",      "e9d7716b4f94bbc3d94459b5fe9bb8b15cb2e433",      "eac7df5f799983d5a7cc55d10b4d426dc557febf",      "eb1b39fd7a874896688855a22efddef10272427c",      "eb5fffaa590a52bcc3705b888c6ff9c4dc4c45b2", +    "edf8dcc8736f06afbaca0e01d60bd2c475403a3d",      "ee9530a2c5a3d1e3813ccb51a55cc8b0d9b5dfc7", +    "ef6749d9263a01f921ba7d72df0d17671d14e5f6",      "f0ea221d8587cede25592266486e119d277f7096",      "f68f9a9202a75d2aee35252e104d796f9515001e",      "f9314d3bf2eac78a7d78d18adcccdb35542054ef", +    "fd9bd560662e070b222d63052830837829c490f0",  ]  @dataclass  | 
