aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/persist.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-03 19:12:14 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-03 19:12:16 -0800
commit3adcaf9802928346dda597cefd4b66b2e62fa942 (patch)
treedc7794d8d142a73a41fbad4221d058dc3567a2e4 /python/sandcrawler/persist.py
parente99d9f2fddcb8b52ba52128b290ec5e0f367392f (diff)
downloadsandcrawler-3adcaf9802928346dda597cefd4b66b2e62fa942.tar.gz
sandcrawler-3adcaf9802928346dda597cefd4b66b2e62fa942.zip
refactor 'minio' to 'seaweedfs'; and BLOB env vars
This goes along with changes to ansible deployment to use the correct key names and values.
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r--python/sandcrawler/persist.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index fbc5273..aa05195 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -452,9 +452,11 @@ class PersistPdfTextWorker(SandcrawlerWorker):
class PersistThumbnailWorker(SandcrawlerWorker):
"""
- Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL table.
+ Pushes text file to blob store (S3/seaweed/minio) and PDF metadata to SQL
+ table.
- This worker *must* be used with raw kakfa mode.
+ This worker *must* be used with raw kakfa mode; thumbnails are *not*
+ wrapped in JSON like most sandcrawler kafka messages.
"""
def __init__(self, **kwargs):