diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 15:00:44 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 15:00:44 -0700 |
commit | f751f37a1328069b77ea8cf30c5de6ab00aca808 (patch) | |
tree | 42f4df01e24e72d9cbb349afdace325fa2324b11 /python | |
parent | d14715279f03fae6cf0fe1998d7845e3a245e86e (diff) | |
download | sandcrawler-f751f37a1328069b77ea8cf30c5de6ab00aca808.tar.gz sandcrawler-f751f37a1328069b77ea8cf30c5de6ab00aca808.zip |
pdfextract: fix pdf_extra key names
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/pdfextract.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 301754a..efb6cca 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -199,8 +199,8 @@ def process_pdf(blob: bytes, thumb_size=(180,300), thumb_type="JPEG") -> PdfExtr meta_xml=pdf.metadata or None, pdf_info=pdf_info, pdf_extra=dict( - height=page0rect.height, - width=page0rect.width, + page0_height=page0rect.height, + page0_width=page0rect.width, page_count=pdf.pages, permanent_id=permanent_id, update_id=update_id, |