From 200bf734bd459dd3c7a147b3dfe127dbf0ed7f70 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 21 Oct 2020 12:20:52 -0700 Subject: differential wayback-error from wayback-content-error The motivation here is to distinguish errors due to current content in wayback (eg, in WARCs) from operational errors (eg, wayback machine is down, or network failures/disruption). --- python/sandcrawler/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python/sandcrawler/__init__.py') diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py index 71c2023..e461462 100644 --- a/python/sandcrawler/__init__.py +++ b/python/sandcrawler/__init__.py @@ -3,7 +3,7 @@ from .grobid import GrobidClient, GrobidWorker, GrobidBlobWorker from .pdftrio import PdfTrioClient, PdfTrioWorker, PdfTrioBlobWorker from .misc import gen_file_metadata, b32_hex, parse_cdx_line, parse_cdx_datetime, clean_url from .workers import KafkaSink, KafkaCompressSink, JsonLinePusher, CdxLinePusher, CdxLinePusher, KafkaJsonPusher, BlackholeSink, ZipfilePusher, MultiprocessWrapper -from .ia import WaybackClient, WaybackError, CdxApiClient, CdxApiError, SavePageNowClient, SavePageNowError, PetaboxError, ResourceResult, WarcResource, CdxPartial, CdxRow +from .ia import WaybackClient, WaybackError, WaybackContentError, CdxApiClient, CdxApiError, SavePageNowClient, SavePageNowError, PetaboxError, ResourceResult, WarcResource, CdxPartial, CdxRow from .ingest import IngestFileWorker from .persist import PersistCdxWorker, PersistIngestFileResultWorker, PersistGrobidWorker, PersistGrobidDiskWorker, PersistPdfTrioWorker, PersistIngestRequestWorker, PersistPdfTextWorker, PersistThumbnailWorker from .db import SandcrawlerPostgrestClient, SandcrawlerPostgresClient -- cgit v1.2.3