diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 12:22:38 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 12:22:38 -0700 |
commit | 3cdf4af9be4c762ff2ed79a57b5ad30637909f1e (patch) | |
tree | b7e7e27ff2032c99fd782b3ea40daf1d12f9164e /python/sandcrawler/pdfextract.py | |
parent | f67d870ba4ca9cecd0b75f106335997c813e9df4 (diff) | |
download | sandcrawler-3cdf4af9be4c762ff2ed79a57b5ad30637909f1e.tar.gz sandcrawler-3cdf4af9be4c762ff2ed79a57b5ad30637909f1e.zip |
python: isort all imports
Diffstat (limited to 'python/sandcrawler/pdfextract.py')
-rw-r--r-- | python/sandcrawler/pdfextract.py | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index 9b4e834..2fb34b8 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -1,17 +1,16 @@ -import sys -import json import datetime -from io import BytesIO +import json +import sys from dataclasses import dataclass -from typing import Optional, Dict, Any +from io import BytesIO +from typing import Any, Dict, Optional import poppler from PIL import Image -from .workers import SandcrawlerWorker, SandcrawlerFetchWorker from .misc import gen_file_metadata - +from .workers import SandcrawlerFetchWorker, SandcrawlerWorker # This is a hack to work around timeouts when processing certain PDFs with # poppler. For some reason, the usual Kafka timeout catcher isn't working on |