aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/__init__.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-21 12:20:52 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-21 12:20:54 -0700
commit200bf734bd459dd3c7a147b3dfe127dbf0ed7f70 (patch)
tree4f010e66a059271ac3b9c496d15a3bc90bd763c4 /python/sandcrawler/__init__.py
parent33249f2679851afb64142c428be45d16f35f5539 (diff)
downloadsandcrawler-200bf734bd459dd3c7a147b3dfe127dbf0ed7f70.tar.gz
sandcrawler-200bf734bd459dd3c7a147b3dfe127dbf0ed7f70.zip
differential wayback-error from wayback-content-error
The motivation here is to distinguish errors due to current content in wayback (eg, in WARCs) from operational errors (eg, wayback machine is down, or network failures/disruption).
Diffstat (limited to 'python/sandcrawler/__init__.py')
-rw-r--r--python/sandcrawler/__init__.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py
index 71c2023..e461462 100644
--- a/python/sandcrawler/__init__.py
+++ b/python/sandcrawler/__init__.py
@@ -3,7 +3,7 @@ from .grobid import GrobidClient, GrobidWorker, GrobidBlobWorker
from .pdftrio import PdfTrioClient, PdfTrioWorker, PdfTrioBlobWorker
from .misc import gen_file_metadata, b32_hex, parse_cdx_line, parse_cdx_datetime, clean_url
from .workers import KafkaSink, KafkaCompressSink, JsonLinePusher, CdxLinePusher, CdxLinePusher, KafkaJsonPusher, BlackholeSink, ZipfilePusher, MultiprocessWrapper
-from .ia import WaybackClient, WaybackError, CdxApiClient, CdxApiError, SavePageNowClient, SavePageNowError, PetaboxError, ResourceResult, WarcResource, CdxPartial, CdxRow
+from .ia import WaybackClient, WaybackError, WaybackContentError, CdxApiClient, CdxApiError, SavePageNowClient, SavePageNowError, PetaboxError, ResourceResult, WarcResource, CdxPartial, CdxRow
from .ingest import IngestFileWorker
from .persist import PersistCdxWorker, PersistIngestFileResultWorker, PersistGrobidWorker, PersistGrobidDiskWorker, PersistPdfTrioWorker, PersistIngestRequestWorker, PersistPdfTextWorker, PersistThumbnailWorker
from .db import SandcrawlerPostgrestClient, SandcrawlerPostgresClient