aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-25 15:00:44 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-25 15:00:44 -0700
commitf751f37a1328069b77ea8cf30c5de6ab00aca808 (patch)
tree42f4df01e24e72d9cbb349afdace325fa2324b11
parentd14715279f03fae6cf0fe1998d7845e3a245e86e (diff)
downloadsandcrawler-f751f37a1328069b77ea8cf30c5de6ab00aca808.tar.gz
sandcrawler-f751f37a1328069b77ea8cf30c5de6ab00aca808.zip
pdfextract: fix pdf_extra key names
-rw-r--r--python/sandcrawler/pdfextract.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index 301754a..efb6cca 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -199,8 +199,8 @@ def process_pdf(blob: bytes, thumb_size=(180,300), thumb_type="JPEG") -> PdfExtr
meta_xml=pdf.metadata or None,
pdf_info=pdf_info,
pdf_extra=dict(
- height=page0rect.height,
- width=page0rect.width,
+ page0_height=page0rect.height,
+ page0_width=page0rect.width,
page_count=pdf.pages,
permanent_id=permanent_id,
update_id=update_id,