From 4f9ac8aae155a74439f03aa2ab9aa95e1d813092 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 2 Jul 2020 10:55:44 -0700 Subject: add another bad PDF sha1hex --- python/sandcrawler/pdfextract.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 350c591..4fd8712 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -19,6 +19,7 @@ from .ia import WaybackClient, WaybackError, PetaboxError # these, maybe due to threading. BAD_PDF_SHA1HEX = [ "373f84dfab4ed47047826e604e2918a9cd6a95b2", + "64d821d728f9a3dc944b4c03be00feea0b57e314", "88edcbab1cac2d70af5870422974afc253f4f0c6", "8e4f03c29ae1fe7227140ab4b625f375f6c00d31", ] -- cgit v1.2.3