From 155a4c9918cf052bed7b73165a7a9ed965e69c6e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 25 Jun 2020 16:02:30 -0700 Subject: pdfextract_tool fixes from prod usage --- python/sandcrawler/pdfextract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python/sandcrawler/pdfextract.py') diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index efb6cca..4606632 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -259,5 +259,5 @@ class PdfExtractBlobWorker(SandcrawlerWorker): if self.thumbnail_sink and result.page0_thumbnail is not None: self.thumbnail_sink.push_record(result.page0_thumbnail, key=result.sha1hex) - return result + return result.to_pdftext_dict() -- cgit v1.2.3