aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/pdfextract.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 12:22:38 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 12:22:38 -0700
commit3cdf4af9be4c762ff2ed79a57b5ad30637909f1e (patch)
treeb7e7e27ff2032c99fd782b3ea40daf1d12f9164e /python/sandcrawler/pdfextract.py
parentf67d870ba4ca9cecd0b75f106335997c813e9df4 (diff)
downloadsandcrawler-3cdf4af9be4c762ff2ed79a57b5ad30637909f1e.tar.gz
sandcrawler-3cdf4af9be4c762ff2ed79a57b5ad30637909f1e.zip
python: isort all imports
Diffstat (limited to 'python/sandcrawler/pdfextract.py')
-rw-r--r--python/sandcrawler/pdfextract.py11
1 files changed, 5 insertions, 6 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index 9b4e834..2fb34b8 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -1,17 +1,16 @@
-import sys
-import json
import datetime
-from io import BytesIO
+import json
+import sys
from dataclasses import dataclass
-from typing import Optional, Dict, Any
+from io import BytesIO
+from typing import Any, Dict, Optional
import poppler
from PIL import Image
-from .workers import SandcrawlerWorker, SandcrawlerFetchWorker
from .misc import gen_file_metadata
-
+from .workers import SandcrawlerFetchWorker, SandcrawlerWorker
# This is a hack to work around timeouts when processing certain PDFs with
# poppler. For some reason, the usual Kafka timeout catcher isn't working on