diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 16:02:30 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 16:02:30 -0700 |
commit | 155a4c9918cf052bed7b73165a7a9ed965e69c6e (patch) | |
tree | cb1c3079aa6a54c6d88218a81e8c23887f23678a /python/sandcrawler/pdfextract.py | |
parent | 445532ac28519508713306e7ad4ab8524333a367 (diff) | |
download | sandcrawler-155a4c9918cf052bed7b73165a7a9ed965e69c6e.tar.gz sandcrawler-155a4c9918cf052bed7b73165a7a9ed965e69c6e.zip |
pdfextract_tool fixes from prod usage
Diffstat (limited to 'python/sandcrawler/pdfextract.py')
-rw-r--r-- | python/sandcrawler/pdfextract.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index efb6cca..4606632 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -259,5 +259,5 @@ class PdfExtractBlobWorker(SandcrawlerWorker): if self.thumbnail_sink and result.page0_thumbnail is not None: self.thumbnail_sink.push_record(result.page0_thumbnail, key=result.sha1hex) - return result + return result.to_pdftext_dict() |