diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 11:19:58 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 11:19:58 -0700 |
commit | 2a7ef915ad83dbcd2b00fa211f210a80cd561f27 (patch) | |
tree | e2556012b6aa24f2c19c48740c116e557bcd4a0a | |
parent | 8083c3e6708a1f702b2325a9587d7197345e8d0e (diff) | |
download | sandcrawler-2a7ef915ad83dbcd2b00fa211f210a80cd561f27.tar.gz sandcrawler-2a7ef915ad83dbcd2b00fa211f210a80cd561f27.zip |
pdfextract: don't compress thumbnail output
-rwxr-xr-x | python/pdfextract_tool.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/pdfextract_tool.py b/python/pdfextract_tool.py index ce96d38..e05d48d 100755 --- a/python/pdfextract_tool.py +++ b/python/pdfextract_tool.py @@ -122,7 +122,7 @@ def main(): thumbnail_topic = "sandcrawler-{}.pdf-thumbnail-180px-jpg".format(args.kafka_env) args.sink = KafkaCompressSink(kafka_hosts=args.kafka_hosts, produce_topic=text_topic) - args.thumbnail_sink = KafkaCompressSink(kafka_hosts=args.kafka_hosts, + args.thumbnail_sink = KafkaSink(kafka_hosts=args.kafka_hosts, produce_topic=thumbnail_topic) print("Running in kafka output mode, publishing to {} and {}\n".format( text_topic, thumbnail_topic), file=sys.stderr) |