aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/minio.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/minio.py')
-rw-r--r--python/sandcrawler/minio.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/python/sandcrawler/minio.py b/python/sandcrawler/minio.py
new file mode 100644
index 0000000..e6ebe41
--- /dev/null
+++ b/python/sandcrawler/minio.py
@@ -0,0 +1,59 @@
+
+import os
+import minio
+
+
+class SandcrawlerMinioClient(object):
+
+ def __init__(self, host, access_key, secret_key, default_bucket=None):
+ """
+ host is minio connection string (host:port)
+ access and secret key are as expected
+ default_bucket can be supplied so that it doesn't need to be repeated for each function call
+
+ Example config:
+
+ host="localhost:9000",
+ access_key=os.environ['MINIO_ACCESS_KEY'],
+ secret_key=os.environ['MINIO_SECRET_KEY'],
+ """
+ self.mc = minio.Minio(
+ host,
+ access_key=access_key,
+ secret_key=secret_key,
+ secure=False,
+ )
+ self.default_bucket = default_bucket
+
+ def upload_blob(self, folder, blob, sha1hex=None, extension="", prefix="", bucket=None):
+ """
+ blob should be bytes
+ sha1hex is assumed to be sha1 of the blob itself; if not supplied it will be calculated
+ Uploads blob to path in the given bucket. Files are stored in a top-level
+ folder, then in two levels of sub-directory based on sha1, then the
+ filename is SHA1 with an optional file extension.
+ """
+ if type(blob) == str:
+ blob = blob.encode('utf-8')
+ assert type(blob) == bytes
+ if not sha1hex:
+ h = hashlib.sha1()
+ h.update(blob)
+ sha1hex = h.hexdigest()
+ obj_path = "{}{}/{}/{}/{}{}".format(
+ prefix,
+ folder,
+ sha1hex[0:2],
+ sha1hex[2:4],
+ sha1hex,
+ extension,
+ )
+ if not bucket:
+ bucket = self.default_bucket
+ self.mc.put_object(
+ self.default_bucket,
+ obj_path,
+ blob,
+ len(blob),
+ )
+ return (bucket, obj_path)