aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/__init__.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-09-30 15:09:42 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-15 18:15:04 -0700
commit86107e39b761e5b799562af662219fda04ade1be (patch)
tree9a9801fc1d8a583a792457c2b8468ce6dbea22f1 /python/sandcrawler/__init__.py
parent7430ddbcdec76091220de474060b968f0ef1bb70 (diff)
downloadsandcrawler-86107e39b761e5b799562af662219fda04ade1be.tar.gz
sandcrawler-86107e39b761e5b799562af662219fda04ade1be.zip
refactoring; progress on filesets
Diffstat (limited to 'python/sandcrawler/__init__.py')
-rw-r--r--python/sandcrawler/__init__.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py
index e461462..724a39c 100644
--- a/python/sandcrawler/__init__.py
+++ b/python/sandcrawler/__init__.py
@@ -4,7 +4,8 @@ from .pdftrio import PdfTrioClient, PdfTrioWorker, PdfTrioBlobWorker
from .misc import gen_file_metadata, b32_hex, parse_cdx_line, parse_cdx_datetime, clean_url
from .workers import KafkaSink, KafkaCompressSink, JsonLinePusher, CdxLinePusher, CdxLinePusher, KafkaJsonPusher, BlackholeSink, ZipfilePusher, MultiprocessWrapper
from .ia import WaybackClient, WaybackError, WaybackContentError, CdxApiClient, CdxApiError, SavePageNowClient, SavePageNowError, PetaboxError, ResourceResult, WarcResource, CdxPartial, CdxRow
-from .ingest import IngestFileWorker
+from .ingest_file import IngestFileWorker
+from .ingest_fileset import IngestFilesetWorker
from .persist import PersistCdxWorker, PersistIngestFileResultWorker, PersistGrobidWorker, PersistGrobidDiskWorker, PersistPdfTrioWorker, PersistIngestRequestWorker, PersistPdfTextWorker, PersistThumbnailWorker
from .db import SandcrawlerPostgrestClient, SandcrawlerPostgresClient
from .pdfextract import PdfExtractWorker, PdfExtractBlobWorker