From 3adcaf9802928346dda597cefd4b66b2e62fa942 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 3 Nov 2020 19:12:14 -0800 Subject: refactor 'minio' to 'seaweedfs'; and BLOB env vars This goes along with changes to ansible deployment to use the correct key names and values. --- python/sandcrawler/minio.py | 4 ++-- python/sandcrawler/persist.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'python/sandcrawler') diff --git a/python/sandcrawler/minio.py b/python/sandcrawler/minio.py index 8b02211..c7deea1 100644 --- a/python/sandcrawler/minio.py +++ b/python/sandcrawler/minio.py @@ -17,8 +17,8 @@ class SandcrawlerMinioClient(object): Example config: host="localhost:9000", - access_key=os.environ['MINIO_ACCESS_KEY'], - secret_key=os.environ['MINIO_SECRET_KEY'], + access_key=os.environ['SANDCRAWLER_BLOB_ACCESS_KEY'], + secret_key=os.environ['SANDCRAWLER_BLOB_ACCESS_KEY'], """ self.mc = minio.Minio( host_url, diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index fbc5273..aa05195 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -452,9 +452,11 @@ class PersistPdfTextWorker(SandcrawlerWorker): class PersistThumbnailWorker(SandcrawlerWorker): """ - Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL table. + Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL + table. - This worker *must* be used with raw kakfa mode. + This worker *must* be used with raw kakfa mode; thumbnails are *not* + wrapped in JSON like most sandcrawler kafka messages. """ def __init__(self, **kwargs): -- cgit v1.2.3