diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-10-10 12:19:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-10 12:19:37 -0700 |
commit | 41fdef2a3768bec1cdbde21ab72fcbbe44c180c7 (patch) | |
tree | 68afe816cc031a2d650aba6ff233d93a686d9cc6 | |
parent | 9753876b85c767a9848467065b4d4dd613d5ed68 (diff) | |
download | sandcrawler-41fdef2a3768bec1cdbde21ab72fcbbe44c180c7.tar.gz sandcrawler-41fdef2a3768bec1cdbde21ab72fcbbe44c180c7.zip |
yet more bad sha1 PDFs to skip
-rw-r--r-- | python/sandcrawler/pdfextract.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 5d5f6f1..58199c8 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -26,25 +26,36 @@ BAD_PDF_SHA1HEX = [ "09db7c9f2efb496c974427a61e84292ae27fc702", "0d1c1567ea70e7b922ba88ccb868ffc7ca18e75c", "10c6577a658bf6203557e2998b25ea9788f8adfe", + "17e679b0ec9444fff2ea4d02caec05dd2de80ec3", "182749ad1db1d5e999d07f010bdcfc2978dadc88", "20589d9dd0a22c8c938ad97b7f4f12648aa119fa", "25ab9e6169f041be05844a9b4edd6574918af769", "281de904c4642a9be4f17b9774fc0a2bdc8a90e3", "2fc64da736175810918fd32c94c5068b0d660bcc", "32318fba9b05b2756b7362bcaa4722c92ed8d449", + "336833c6fc968cd0938250dfc93c032a30111cfc", "373f84dfab4ed47047826e604e2918a9cd6a95b2", "3ac0b6e17e30d141871a0a5b127536919fe5aa19", + "3c8a6a708da0dc1802f5f3e5267a49b3c25e1ffe", + "3e5f9fb94e7314447a22f3d009419a922136177f", "436c9183724f051b22c96285aa8ff1d2ba709574", "445968ef735b228c08c3ff4238d99fc9f4824619", "447fa6b5a90742a86429a932f6608d8e141688c0", + "4785181cec8944eee00ddb631a5dfc771b89bab7", + "47db2db2cc976429568841a0496c0ab4ed7b5977", "4c81129904f7976a50825595a3497ea7b52579ef", "50b3c5a3122272aca69855ef06b85d0b43a76eb1", "58d9ae7dcb0a7dbbdfc58ad266030b037e9cd0ff", + "5ab98405b676ee81a6ca74fba51a9e4a6cff7311", "5e6a3adde9f08c276c4efd72bfacb256f2ec35d9", + "623ff84b616383d0a3e0dd8dbce12f0b5fe9a6ac", "646c4a654270606256397684204ff0f3d17be2e7", "64d821d728f9a3dc944b4c03be00feea0b57e314", "6909f0b62d8b7835de3dec7777aad7f8ef507ee3", "771f1ca0007a6fbed5b4a434c73f524f715d33c1", + "781dafda896a9f5c30f3d0a011f79a3b79b574c4", + "788672c7c2bcdecf6e2f6a2177c01e60f04d9cfb", + "7e9d846f3bf9ce15cdb991b78cc870ab8a2bed76", "88edcbab1cac2d70af5870422974afc253f4f0c6", "8e4f03c29ae1fe7227140ab4b625f375f6c00d31", "949dfb7d833da9576b2ccb9eb1ab5457469c53d3", @@ -55,15 +66,24 @@ BAD_PDF_SHA1HEX = [ "b2d719120306b90eb8dd3580b699a61ec70556f4", "b5be7f409a3a2601208c5ce08cf52b9ac1094aae", "b5bf8b7467fb095c90adf3b49aa1687291e4469c", + "b8b427e5b3d650ba9e03197f9c3917e25b878930", + "bad48b89b639b5b7df2c6a2d5288181fcb8b0e35", "c1b583fbd052572f08158d39ffe4d7510dadbebb", + "c7220d1bf1e71fb755d9f26bbdd4c539dc162960", + "c7687fa6f637c7d32a25be0e772867d87536d35c", + "c92b9ae9eefa07504950b405625aef54b48f0e1a", "ccb1debcfae006a3fc984e9e91309b9706a5c375", "cd8a7c3b8d850ebedc1ca791ccb37b9a2689f9c3", "d17b1e254cce82df5c6eb4fd492cef91e7e11558", "d188762a7e3ab5d4ee8a897204316513e4e636ec", "d6b0f405bf13c23d0e90c54eea527442786d1cd3", + "da2211ee2dbc6dda36571976d810e2366a3d2504", "e01bb7256d77aea258313bb410dfcfc10512f420", + "e2bf5d0a5885359381fe8ef2cd9290171d494e9b", + "e9d7716b4f94bbc3d94459b5fe9bb8b15cb2e433", "eb1b39fd7a874896688855a22efddef10272427c", "eb5fffaa590a52bcc3705b888c6ff9c4dc4c45b2", + "ee9530a2c5a3d1e3813ccb51a55cc8b0d9b5dfc7", "f68f9a9202a75d2aee35252e104d796f9515001e", ] |