diff options
Diffstat (limited to 'python/sandcrawler')
-rw-r--r-- | python/sandcrawler/minio.py | 4 | ||||
-rw-r--r-- | python/sandcrawler/persist.py | 6 |
2 files changed, 6 insertions, 4 deletions
diff --git a/python/sandcrawler/minio.py b/python/sandcrawler/minio.py index 8b02211..c7deea1 100644 --- a/python/sandcrawler/minio.py +++ b/python/sandcrawler/minio.py @@ -17,8 +17,8 @@ class SandcrawlerMinioClient(object): Example config: host="localhost:9000", - access_key=os.environ['MINIO_ACCESS_KEY'], - secret_key=os.environ['MINIO_SECRET_KEY'], + access_key=os.environ['SANDCRAWLER_BLOB_ACCESS_KEY'], + secret_key=os.environ['SANDCRAWLER_BLOB_ACCESS_KEY'], """ self.mc = minio.Minio( host_url, diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index fbc5273..aa05195 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -452,9 +452,11 @@ class PersistPdfTextWorker(SandcrawlerWorker): class PersistThumbnailWorker(SandcrawlerWorker): """ - Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL table. + Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL + table. - This worker *must* be used with raw kakfa mode. + This worker *must* be used with raw kakfa mode; thumbnails are *not* + wrapped in JSON like most sandcrawler kafka messages. """ def __init__(self, **kwargs): |