diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 19:12:14 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 19:12:16 -0800 |
commit | 3adcaf9802928346dda597cefd4b66b2e62fa942 (patch) | |
tree | dc7794d8d142a73a41fbad4221d058dc3567a2e4 /python/sandcrawler/persist.py | |
parent | e99d9f2fddcb8b52ba52128b290ec5e0f367392f (diff) | |
download | sandcrawler-3adcaf9802928346dda597cefd4b66b2e62fa942.tar.gz sandcrawler-3adcaf9802928346dda597cefd4b66b2e62fa942.zip |
refactor 'minio' to 'seaweedfs'; and BLOB env vars
This goes along with changes to ansible deployment to use the correct
key names and values.
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r-- | python/sandcrawler/persist.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index fbc5273..aa05195 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -452,9 +452,11 @@ class PersistPdfTextWorker(SandcrawlerWorker): class PersistThumbnailWorker(SandcrawlerWorker): """ - Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL table. + Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL + table. - This worker *must* be used with raw kakfa mode. + This worker *must* be used with raw kakfa mode; thumbnails are *not* + wrapped in JSON like most sandcrawler kafka messages. """ def __init__(self, **kwargs): |