aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/pdfextract.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index 3adee3a..311bbf8 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -25,16 +25,20 @@ BAD_PDF_SHA1HEX = [
"09cba9b00494d12759c50cb914f1fb7c9746f5d1",
"09db7c9f2efb496c974427a61e84292ae27fc702",
"0a1c13cb8783bbbf248b2345b9890e2410aa3f0a",
+ "0ccc6dc94f4e2d809fac8543870265c3421f3c9e",
"0d1c1567ea70e7b922ba88ccb868ffc7ca18e75c",
"10c6577a658bf6203557e2998b25ea9788f8adfe",
"15a720921ce30da983fcd1bfa7fe9aeeda503e41",
"1659881a31edc2d0e170f6bb26d32e74cc4ca387",
"17e679b0ec9444fff2ea4d02caec05dd2de80ec3",
"182749ad1db1d5e999d07f010bdcfc2978dadc88",
+ "1a17a4fc43397804830cc29021281aac2e8cf0cb",
"1cb166f0c0b5ffe673e6bbf6a29d77278711f253",
"1d04e46b6848e6479dd90fe26bb11627044fb664",
"1d967c95546d31edaaf0c3ef9ffcc11113a9e11a",
+ "1f90194bf0c7fff1fe1ed5fff77a934c7a1b32a0",
"20589d9dd0a22c8c938ad97b7f4f12648aa119fa",
+ "2195e528fa1cf5f8ae3b2adcc516896016c3411f",
"25ab9e6169f041be05844a9b4edd6574918af769",
"281de904c4642a9be4f17b9774fc0a2bdc8a90e3",
"2bd5322975653536550a039eb055174b2bf241b3",
@@ -47,11 +51,16 @@ BAD_PDF_SHA1HEX = [
"3c8a6a708da0dc1802f5f3e5267a49b3c25e1ffe",
"3e5f9fb94e7314447a22f3d009419a922136177f",
"3fad493c940137ce703f2f570ebb504e360c6df3",
+ "40aa94602ab13e5a7d9df8c989fca4fa5c01239e",
+ "427479c94d7d0e512f898bc7ff0b6f210069f902",
"436c9183724f051b22c96285aa8ff1d2ba709574",
+ "43a8c0abf0386d3e3397cf5e22a884761dd63db7",
"445968ef735b228c08c3ff4238d99fc9f4824619",
"447fa6b5a90742a86429a932f6608d8e141688c0",
+ "47577ff6d6876117ca69bec60a5764f7d2c2ec70",
"4785181cec8944eee00ddb631a5dfc771b89bab7",
"47db2db2cc976429568841a0496c0ab4ed7b5977",
+ "481c0bae81873988fcc8662ba8a269e8823fdea2",
"4c81129904f7976a50825595a3497ea7b52579ef",
"4edc1402712fa6827c4501fed8042e9f4447829c",
"50b3c5a3122272aca69855ef06b85d0b43a76eb1",
@@ -64,6 +73,7 @@ BAD_PDF_SHA1HEX = [
"623ff84b616383d0a3e0dd8dbce12f0b5fe9a6ac",
"646c4a654270606256397684204ff0f3d17be2e7",
"64d821d728f9a3dc944b4c03be00feea0b57e314",
+ "689b5cb3ddef213d612363a903f10d0358ea64d2",
"6909f0b62d8b7835de3dec7777aad7f8ef507ee3",
"74e617dc95555e8ca3aadd19d0c85b71cd77d1d9",
"75c2662a96ccc48891228df7c85eb7d4da9dd621",
@@ -74,6 +84,7 @@ BAD_PDF_SHA1HEX = [
"79d6cba3c6e577a0f3a3a9fe575680d38454938d",
"7daf61526ec825151f384cc1db510ca5237d5d80",
"7e9d846f3bf9ce15cdb991b78cc870ab8a2bed76",
+ "8398b211a5ec4da1195a4ba1bc29ca8c0ac40f67",
"859d7ec532a0bf3b52b17c7f2d8ecc58410c0aad",
"88edcbab1cac2d70af5870422974afc253f4f0c6",
"89860fc475fcb2a2d86c4544df52ec8fd5e6533f",
@@ -103,11 +114,16 @@ BAD_PDF_SHA1HEX = [
"bad48b89b639b5b7df2c6a2d5288181fcb8b0e35",
"be0cda7642e9247b3ee41cd2017fa709aab4f344",
"c1b583fbd052572f08158d39ffe4d7510dadbebb",
+ "c2526f75a013dc67b14ce1e2d0e4fc80bb93c6e1",
+ "c4abbb284f4acaca9e8ceb88f842901984e84d33",
"c7220d1bf1e71fb755d9f26bbdd4c539dc162960",
"c7687fa6f637c7d32a25be0e772867d87536d35c",
+ "c7d8b37ec99cf0d987e60667f05299f200e18a5d",
"c92b9ae9eefa07504950b405625aef54b48f0e1a",
"ccb1debcfae006a3fc984e9e91309b9706a5c375",
+ "cd611c765cbb0b3b7cb2fdc07d8f0b9cc93ec257",
"cd8a7c3b8d850ebedc1ca791ccb37b9a2689f9c3",
+ "d055c054c330f99ec011e37186d2b429339758fd",
"d17b1e254cce82df5c6eb4fd492cef91e7e11558",
"d188762a7e3ab5d4ee8a897204316513e4e636ec",
"d6b0f405bf13c23d0e90c54eea527442786d1cd3",
@@ -115,14 +131,18 @@ BAD_PDF_SHA1HEX = [
"e01bb7256d77aea258313bb410dfcfc10512f420",
"e2bf5d0a5885359381fe8ef2cd9290171d494e9b",
"e2c3b8a2cf33d5e8972bc9ddb78373766a75e412",
+ "e64714a81f60ab9286ec90cad682cb22e564fb6f",
"e9d7716b4f94bbc3d94459b5fe9bb8b15cb2e433",
"eac7df5f799983d5a7cc55d10b4d426dc557febf",
"eb1b39fd7a874896688855a22efddef10272427c",
"eb5fffaa590a52bcc3705b888c6ff9c4dc4c45b2",
+ "edf8dcc8736f06afbaca0e01d60bd2c475403a3d",
"ee9530a2c5a3d1e3813ccb51a55cc8b0d9b5dfc7",
+ "ef6749d9263a01f921ba7d72df0d17671d14e5f6",
"f0ea221d8587cede25592266486e119d277f7096",
"f68f9a9202a75d2aee35252e104d796f9515001e",
"f9314d3bf2eac78a7d78d18adcccdb35542054ef",
+ "fd9bd560662e070b222d63052830837829c490f0",
]
@dataclass