diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 19:12:14 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 19:12:16 -0800 |
commit | 3adcaf9802928346dda597cefd4b66b2e62fa942 (patch) | |
tree | dc7794d8d142a73a41fbad4221d058dc3567a2e4 /python/sandcrawler | |
parent | e99d9f2fddcb8b52ba52128b290ec5e0f367392f (diff) | |
download | sandcrawler-3adcaf9802928346dda597cefd4b66b2e62fa942.tar.gz sandcrawler-3adcaf9802928346dda597cefd4b66b2e62fa942.zip |
refactor 'minio' to 'seaweedfs'; and BLOB env vars
This goes along with changes to ansible deployment to use the correct
key names and values.
Diffstat (limited to 'python/sandcrawler')
-rw-r--r-- | python/sandcrawler/minio.py | 4 | ||||
-rw-r--r-- | python/sandcrawler/persist.py | 6 |
2 files changed, 6 insertions, 4 deletions
diff --git a/python/sandcrawler/minio.py b/python/sandcrawler/minio.py index 8b02211..c7deea1 100644 --- a/python/sandcrawler/minio.py +++ b/python/sandcrawler/minio.py @@ -17,8 +17,8 @@ class SandcrawlerMinioClient(object): Example config: host="localhost:9000", - access_key=os.environ['MINIO_ACCESS_KEY'], - secret_key=os.environ['MINIO_SECRET_KEY'], + access_key=os.environ['SANDCRAWLER_BLOB_ACCESS_KEY'], + secret_key=os.environ['SANDCRAWLER_BLOB_ACCESS_KEY'], """ self.mc = minio.Minio( host_url, diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index fbc5273..aa05195 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -452,9 +452,11 @@ class PersistPdfTextWorker(SandcrawlerWorker): class PersistThumbnailWorker(SandcrawlerWorker): """ - Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL table. + Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL + table. - This worker *must* be used with raw kakfa mode. + This worker *must* be used with raw kakfa mode; thumbnails are *not* + wrapped in JSON like most sandcrawler kafka messages. """ def __init__(self, **kwargs): |