diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-12-24 16:41:09 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-02 18:12:58 -0800 |
commit | 03b04aabc9d9b63ff54a80f52590b619aee06159 (patch) | |
tree | 6121f29f9234fcb3a41a62120d614e9a2420542c /python/sandcrawler/minio.py | |
parent | a8cd91e6f6fb6dafac35f8c239113b55b2230b13 (diff) | |
download | sandcrawler-03b04aabc9d9b63ff54a80f52590b619aee06159.tar.gz sandcrawler-03b04aabc9d9b63ff54a80f52590b619aee06159.zip |
start work on DB connector and minio client
Diffstat (limited to 'python/sandcrawler/minio.py')
-rw-r--r-- | python/sandcrawler/minio.py | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/python/sandcrawler/minio.py b/python/sandcrawler/minio.py new file mode 100644 index 0000000..e6ebe41 --- /dev/null +++ b/python/sandcrawler/minio.py @@ -0,0 +1,59 @@ + +import os +import minio + + +class SandcrawlerMinioClient(object): + + def __init__(self, host, access_key, secret_key, default_bucket=None): + """ + host is minio connection string (host:port) + access and secret key are as expected + default_bucket can be supplied so that it doesn't need to be repeated for each function call + + Example config: + + host="localhost:9000", + access_key=os.environ['MINIO_ACCESS_KEY'], + secret_key=os.environ['MINIO_SECRET_KEY'], + """ + self.mc = minio.Minio( + host, + access_key=access_key, + secret_key=secret_key, + secure=False, + ) + self.default_bucket = default_bucket + + def upload_blob(self, folder, blob, sha1hex=None, extension="", prefix="", bucket=None): + """ + blob should be bytes + sha1hex is assumed to be sha1 of the blob itself; if not supplied it will be calculated + Uploads blob to path in the given bucket. Files are stored in a top-level + folder, then in two levels of sub-directory based on sha1, then the + filename is SHA1 with an optional file extension. + """ + if type(blob) == str: + blob = blob.encode('utf-8') + assert type(blob) == bytes + if not sha1hex: + h = hashlib.sha1() + h.update(blob) + sha1hex = h.hexdigest() + obj_path = "{}{}/{}/{}/{}{}".format( + prefix, + folder, + sha1hex[0:2], + sha1hex[2:4], + sha1hex, + extension, + ) + if not bucket: + bucket = self.default_bucket + self.mc.put_object( + self.default_bucket, + obj_path, + blob, + len(blob), + ) + return (bucket, obj_path) |