aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rwxr-xr-xpython/grobid_tool.py2
-rw-r--r--python/sandcrawler/__init__.py2
-rw-r--r--python/sandcrawler/workers.py2
3 files changed, 3 insertions, 3 deletions
diff --git a/python/grobid_tool.py b/python/grobid_tool.py
index a2d74a1..fe507a0 100755
--- a/python/grobid_tool.py
+++ b/python/grobid_tool.py
@@ -140,7 +140,7 @@ def main():
if args.kafka_mode:
produce_topic = "sandcrawler-{}.grobid-output-pg".format(args.kafka_env)
print("Running in kafka output mode, publishing to {}\n".format(produce_topic))
- args.sink = KafkaGrobidSink(kafka_hosts=args.kafka_hosts,
+ args.sink = KafkaCompressSink(kafka_hosts=args.kafka_hosts,
produce_topic=produce_topic)
args.func(args)
diff --git a/python/sandcrawler/__init__.py b/python/sandcrawler/__init__.py
index 492b558..654df35 100644
--- a/python/sandcrawler/__init__.py
+++ b/python/sandcrawler/__init__.py
@@ -2,7 +2,7 @@
from .grobid import GrobidClient, GrobidWorker, GrobidBlobWorker
from .pdftrio import PdfTrioClient, PdfTrioWorker, PdfTrioBlobWorker
from .misc import gen_file_metadata, b32_hex, parse_cdx_line, parse_cdx_datetime, clean_url
-from .workers import KafkaSink, KafkaGrobidSink, JsonLinePusher, CdxLinePusher, CdxLinePusher, KafkaJsonPusher, BlackholeSink, ZipfilePusher, MultiprocessWrapper
+from .workers import KafkaSink, KafkaCompressSink, JsonLinePusher, CdxLinePusher, CdxLinePusher, KafkaJsonPusher, BlackholeSink, ZipfilePusher, MultiprocessWrapper
from .ia import WaybackClient, WaybackError, CdxApiClient, CdxApiError, SavePageNowClient, SavePageNowError, PetaboxError, ResourceResult, WarcResource, CdxPartial, CdxRow
from .ingest import IngestFileWorker
from .persist import PersistCdxWorker, PersistIngestFileResultWorker, PersistGrobidWorker, PersistGrobidDiskWorker, PersistPdfTrioWorker, PersistIngestRequestWorker
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 6425e99..a42b1a4 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -217,7 +217,7 @@ class KafkaSink(SandcrawlerWorker):
return self.counts
-class KafkaGrobidSink(KafkaSink):
+class KafkaCompressSink(KafkaSink):
"""
Variant of KafkaSink for large documents. Used for, eg, GROBID output.
"""