diff options
Diffstat (limited to 'fatcat_scholar/sandcrawler.py')
-rw-r--r-- | fatcat_scholar/sandcrawler.py | 35 |
1 files changed, 14 insertions, 21 deletions
diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py index db6014f..408682f 100644 --- a/fatcat_scholar/sandcrawler.py +++ b/fatcat_scholar/sandcrawler.py @@ -1,16 +1,15 @@ - import json import minio import requests from typing import Dict, Optional, Any -class SandcrawlerPostgrestClient(): +class SandcrawlerPostgrestClient: def __init__(self, api_url: str): self.api_url = api_url - + def get_grobid(self, sha1: str) -> Optional[Dict[str, Any]]: - resp = requests.get(self.api_url + "/grobid", params=dict(sha1hex='eq.'+sha1)) + resp = requests.get(self.api_url + "/grobid", params=dict(sha1hex="eq." + sha1)) resp.raise_for_status() resp_json = resp.json() if resp_json: @@ -20,8 +19,13 @@ class SandcrawlerPostgrestClient(): class SandcrawlerMinioClient(object): - - def __init__(self, host_url: str, access_key: Optional[str] = None, secret_key: Optional[str] = None, default_bucket: Optional[str] = "sandcrawler"): + def __init__( + self, + host_url: str, + access_key: Optional[str] = None, + secret_key: Optional[str] = None, + default_bucket: Optional[str] = "sandcrawler", + ): """ host is minio connection string (host:port) access and secret key are as expected @@ -34,10 +38,7 @@ class SandcrawlerMinioClient(object): secret_key=os.environ['MINIO_SECRET_KEY'], """ self.mc = minio.Minio( - host_url, - access_key=access_key, - secret_key=secret_key, - secure=False, + host_url, access_key=access_key, secret_key=secret_key, secure=False, ) self.default_bucket = default_bucket @@ -48,14 +49,9 @@ class SandcrawlerMinioClient(object): prefix = "" assert len(sha1hex) == 40 obj_path = "{}{}/{}/{}/{}{}".format( - prefix, - folder, - sha1hex[0:2], - sha1hex[2:4], - sha1hex, - extension, + prefix, folder, sha1hex[0:2], sha1hex[2:4], sha1hex, extension, ) - return obj_path + return obj_path def get_blob(self, folder, sha1hex, extension="", prefix="", bucket=None): """ @@ -67,9 +63,6 @@ class SandcrawlerMinioClient(object): if not bucket: bucket = self.default_bucket assert bucket - blob = self.mc.get_object( - bucket, - obj_path, - ) + blob = self.mc.get_object(bucket, obj_path,) # TODO: optionally verify SHA-1? return blob.data |