aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/pdfextract.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-25 16:02:30 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-25 16:02:30 -0700
commit155a4c9918cf052bed7b73165a7a9ed965e69c6e (patch)
treecb1c3079aa6a54c6d88218a81e8c23887f23678a /python/sandcrawler/pdfextract.py
parent445532ac28519508713306e7ad4ab8524333a367 (diff)
downloadsandcrawler-155a4c9918cf052bed7b73165a7a9ed965e69c6e.tar.gz
sandcrawler-155a4c9918cf052bed7b73165a7a9ed965e69c6e.zip
pdfextract_tool fixes from prod usage
Diffstat (limited to 'python/sandcrawler/pdfextract.py')
-rw-r--r--python/sandcrawler/pdfextract.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index efb6cca..4606632 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -259,5 +259,5 @@ class PdfExtractBlobWorker(SandcrawlerWorker):
if self.thumbnail_sink and result.page0_thumbnail is not None:
self.thumbnail_sink.push_record(result.page0_thumbnail, key=result.sha1hex)
- return result
+ return result.to_pdftext_dict()