diff options
-rw-r--r-- | python/sandcrawler/pdfextract.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 3adee3a..311bbf8 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -25,16 +25,20 @@ BAD_PDF_SHA1HEX = [ "09cba9b00494d12759c50cb914f1fb7c9746f5d1", "09db7c9f2efb496c974427a61e84292ae27fc702", "0a1c13cb8783bbbf248b2345b9890e2410aa3f0a", + "0ccc6dc94f4e2d809fac8543870265c3421f3c9e", "0d1c1567ea70e7b922ba88ccb868ffc7ca18e75c", "10c6577a658bf6203557e2998b25ea9788f8adfe", "15a720921ce30da983fcd1bfa7fe9aeeda503e41", "1659881a31edc2d0e170f6bb26d32e74cc4ca387", "17e679b0ec9444fff2ea4d02caec05dd2de80ec3", "182749ad1db1d5e999d07f010bdcfc2978dadc88", + "1a17a4fc43397804830cc29021281aac2e8cf0cb", "1cb166f0c0b5ffe673e6bbf6a29d77278711f253", "1d04e46b6848e6479dd90fe26bb11627044fb664", "1d967c95546d31edaaf0c3ef9ffcc11113a9e11a", + "1f90194bf0c7fff1fe1ed5fff77a934c7a1b32a0", "20589d9dd0a22c8c938ad97b7f4f12648aa119fa", + "2195e528fa1cf5f8ae3b2adcc516896016c3411f", "25ab9e6169f041be05844a9b4edd6574918af769", "281de904c4642a9be4f17b9774fc0a2bdc8a90e3", "2bd5322975653536550a039eb055174b2bf241b3", @@ -47,11 +51,16 @@ BAD_PDF_SHA1HEX = [ "3c8a6a708da0dc1802f5f3e5267a49b3c25e1ffe", "3e5f9fb94e7314447a22f3d009419a922136177f", "3fad493c940137ce703f2f570ebb504e360c6df3", + "40aa94602ab13e5a7d9df8c989fca4fa5c01239e", + "427479c94d7d0e512f898bc7ff0b6f210069f902", "436c9183724f051b22c96285aa8ff1d2ba709574", + "43a8c0abf0386d3e3397cf5e22a884761dd63db7", "445968ef735b228c08c3ff4238d99fc9f4824619", "447fa6b5a90742a86429a932f6608d8e141688c0", + "47577ff6d6876117ca69bec60a5764f7d2c2ec70", "4785181cec8944eee00ddb631a5dfc771b89bab7", "47db2db2cc976429568841a0496c0ab4ed7b5977", + "481c0bae81873988fcc8662ba8a269e8823fdea2", "4c81129904f7976a50825595a3497ea7b52579ef", "4edc1402712fa6827c4501fed8042e9f4447829c", "50b3c5a3122272aca69855ef06b85d0b43a76eb1", @@ -64,6 +73,7 @@ BAD_PDF_SHA1HEX = [ "623ff84b616383d0a3e0dd8dbce12f0b5fe9a6ac", "646c4a654270606256397684204ff0f3d17be2e7", "64d821d728f9a3dc944b4c03be00feea0b57e314", + "689b5cb3ddef213d612363a903f10d0358ea64d2", "6909f0b62d8b7835de3dec7777aad7f8ef507ee3", "74e617dc95555e8ca3aadd19d0c85b71cd77d1d9", "75c2662a96ccc48891228df7c85eb7d4da9dd621", @@ -74,6 +84,7 @@ BAD_PDF_SHA1HEX = [ "79d6cba3c6e577a0f3a3a9fe575680d38454938d", "7daf61526ec825151f384cc1db510ca5237d5d80", "7e9d846f3bf9ce15cdb991b78cc870ab8a2bed76", + "8398b211a5ec4da1195a4ba1bc29ca8c0ac40f67", "859d7ec532a0bf3b52b17c7f2d8ecc58410c0aad", "88edcbab1cac2d70af5870422974afc253f4f0c6", "89860fc475fcb2a2d86c4544df52ec8fd5e6533f", @@ -103,11 +114,16 @@ BAD_PDF_SHA1HEX = [ "bad48b89b639b5b7df2c6a2d5288181fcb8b0e35", "be0cda7642e9247b3ee41cd2017fa709aab4f344", "c1b583fbd052572f08158d39ffe4d7510dadbebb", + "c2526f75a013dc67b14ce1e2d0e4fc80bb93c6e1", + "c4abbb284f4acaca9e8ceb88f842901984e84d33", "c7220d1bf1e71fb755d9f26bbdd4c539dc162960", "c7687fa6f637c7d32a25be0e772867d87536d35c", + "c7d8b37ec99cf0d987e60667f05299f200e18a5d", "c92b9ae9eefa07504950b405625aef54b48f0e1a", "ccb1debcfae006a3fc984e9e91309b9706a5c375", + "cd611c765cbb0b3b7cb2fdc07d8f0b9cc93ec257", "cd8a7c3b8d850ebedc1ca791ccb37b9a2689f9c3", + "d055c054c330f99ec011e37186d2b429339758fd", "d17b1e254cce82df5c6eb4fd492cef91e7e11558", "d188762a7e3ab5d4ee8a897204316513e4e636ec", "d6b0f405bf13c23d0e90c54eea527442786d1cd3", @@ -115,14 +131,18 @@ BAD_PDF_SHA1HEX = [ "e01bb7256d77aea258313bb410dfcfc10512f420", "e2bf5d0a5885359381fe8ef2cd9290171d494e9b", "e2c3b8a2cf33d5e8972bc9ddb78373766a75e412", + "e64714a81f60ab9286ec90cad682cb22e564fb6f", "e9d7716b4f94bbc3d94459b5fe9bb8b15cb2e433", "eac7df5f799983d5a7cc55d10b4d426dc557febf", "eb1b39fd7a874896688855a22efddef10272427c", "eb5fffaa590a52bcc3705b888c6ff9c4dc4c45b2", + "edf8dcc8736f06afbaca0e01d60bd2c475403a3d", "ee9530a2c5a3d1e3813ccb51a55cc8b0d9b5dfc7", + "ef6749d9263a01f921ba7d72df0d17671d14e5f6", "f0ea221d8587cede25592266486e119d277f7096", "f68f9a9202a75d2aee35252e104d796f9515001e", "f9314d3bf2eac78a7d78d18adcccdb35542054ef", + "fd9bd560662e070b222d63052830837829c490f0", ] @dataclass |