From 9df71395046d045d7f8b568a55de4ea000de8791 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 16 Jun 2020 17:10:44 -0700 Subject: rename KafkaGrobidSink -> KafkaCompressSink --- python/sandcrawler/__init__.py | 2 +- python/sandcrawler/workers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'python/sandcrawler') diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py index 492b558..654df35 100644 --- a/python/sandcrawler/__init__.py +++ b/python/sandcrawler/__init__.py @@ -2,7 +2,7 @@ from .grobid import GrobidClient, GrobidWorker, GrobidBlobWorker from .pdftrio import PdfTrioClient, PdfTrioWorker, PdfTrioBlobWorker from .misc import gen_file_metadata, b32_hex, parse_cdx_line, parse_cdx_datetime, clean_url -from .workers import KafkaSink, KafkaGrobidSink, JsonLinePusher, CdxLinePusher, CdxLinePusher, KafkaJsonPusher, BlackholeSink, ZipfilePusher, MultiprocessWrapper +from .workers import KafkaSink, KafkaCompressSink, JsonLinePusher, CdxLinePusher, CdxLinePusher, KafkaJsonPusher, BlackholeSink, ZipfilePusher, MultiprocessWrapper from .ia import WaybackClient, WaybackError, CdxApiClient, CdxApiError, SavePageNowClient, SavePageNowError, PetaboxError, ResourceResult, WarcResource, CdxPartial, CdxRow from .ingest import IngestFileWorker from .persist import PersistCdxWorker, PersistIngestFileResultWorker, PersistGrobidWorker, PersistGrobidDiskWorker, PersistPdfTrioWorker, PersistIngestRequestWorker diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 6425e99..a42b1a4 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -217,7 +217,7 @@ class KafkaSink(SandcrawlerWorker): return self.counts -class KafkaGrobidSink(KafkaSink): +class KafkaCompressSink(KafkaSink): """ Variant of KafkaSink for large documents. Used for, eg, GROBID output. """ -- cgit v1.2.3