aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/__init__.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-17 11:12:59 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-17 11:12:59 -0700
commitd2fb570038ced65e6890e689e900a0f1aaed917c (patch)
tree706a7f6107ae30c21d783773fa5d418f18d2aac6 /python/sandcrawler/__init__.py
parent82c7ec45dfbaa83e3b29b968846016cc6ae8e87f (diff)
downloadsandcrawler-d2fb570038ced65e6890e689e900a0f1aaed917c.tar.gz
sandcrawler-d2fb570038ced65e6890e689e900a0f1aaed917c.zip
add new pdf workers/persisters
Diffstat (limited to 'python/sandcrawler/__init__.py')
-rw-r--r--python/sandcrawler/__init__.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py
index 2e5efd7..a01d1f8 100644
--- a/python/sandcrawler/__init__.py
+++ b/python/sandcrawler/__init__.py
@@ -5,6 +5,6 @@ from .misc import gen_file_metadata, b32_hex, parse_cdx_line, parse_cdx_datetime
from .workers import KafkaSink, KafkaCompressSink, JsonLinePusher, CdxLinePusher, CdxLinePusher, KafkaJsonPusher, BlackholeSink, ZipfilePusher, MultiprocessWrapper
from .ia import WaybackClient, WaybackError, CdxApiClient, CdxApiError, SavePageNowClient, SavePageNowError, PetaboxError, ResourceResult, WarcResource, CdxPartial, CdxRow
from .ingest import IngestFileWorker
-from .persist import PersistCdxWorker, PersistIngestFileResultWorker, PersistGrobidWorker, PersistGrobidDiskWorker, PersistPdfTrioWorker, PersistIngestRequestWorker
+from .persist import PersistCdxWorker, PersistIngestFileResultWorker, PersistGrobidWorker, PersistGrobidDiskWorker, PersistPdfTrioWorker, PersistIngestRequestWorker, PersistPdfTextWorker, PersistThumbnailWorker
from .db import SandcrawlerPostgrestClient, SandcrawlerPostgresClient
-from .pdf import PdfExtractWorker
+from .pdf import PdfExtractWorker, PdfExtractBlobWorker