aboutsummaryrefslogtreecommitdiffstats
path: root/python/scripts
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-16 17:27:51 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-16 17:27:56 -0700
commitd49ea4fb3f567351c63816e703348d8a9fd49ff0 (patch)
tree6f994cde1ae84d3e4ccde0dd67872ac9616bbd08 /python/scripts
parentbb46cd951a4cbe8664f02cc69c7983429e3cce3e (diff)
downloadsandcrawler-d49ea4fb3f567351c63816e703348d8a9fd49ff0.tar.gz
sandcrawler-d49ea4fb3f567351c63816e703348d8a9fd49ff0.zip
pdf_thumbnail script: demonstrate PDF thumbnail generation
Diffstat (limited to 'python/scripts')
-rwxr-xr-xpython/scripts/pdf_thumbnail.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/python/scripts/pdf_thumbnail.py b/python/scripts/pdf_thumbnail.py
new file mode 100755
index 0000000..e093dc3
--- /dev/null
+++ b/python/scripts/pdf_thumbnail.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+"""
+Quick CLI script to convert a PDF to thumbnail (.png, jpeg, etc).
+
+Originally used to benchmark and compare file size/quality.
+"""
+
+import sys
+import poppler
+from PIL import Image
+
+
+def run(inpath, outpath):
+
+ try:
+ pdf = poppler.load_from_file(inpath)
+ page = pdf.create_page(0)
+ except Exception as e:
+ print(str(e), file=sys.stderr)
+ sys.exit(0)
+
+ renderer = poppler.PageRenderer()
+ full_page = renderer.render_page(page)
+ img = Image.frombuffer("RGBA", (full_page.width, full_page.height), full_page.data, 'raw', "RGBA", 0, 1)
+ img.thumbnail((180,300), Image.BICUBIC)
+ #img.thumbnail((360,600), Image.BICUBIC)
+ img.save(outpath)
+ #img.save(outpath, quality=95)
+
+if __name__ == '__main__':
+ if len(sys.argv) != 3:
+ print("expect two parameters: INPUT.png OUTPUT.png", file=sys.stderr)
+ sys.exit(-1)
+ run(sys.argv[1], sys.argv[2])