summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/sandcrawler.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-03 22:46:16 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-03 22:46:16 -0700
commit62d3299471b7b4229ae63e21e9d45ef7efde4348 (patch)
treefc0d04531ce4c81224723bd3646c88f74c7c42a2 /fatcat_scholar/sandcrawler.py
parent58688676ee2d96d9d7006e2fd8a14d47f271aa8f (diff)
downloadfatcat-scholar-62d3299471b7b4229ae63e21e9d45ef7efde4348.tar.gz
fatcat-scholar-62d3299471b7b4229ae63e21e9d45ef7efde4348.zip
reformat python code with black
Diffstat (limited to 'fatcat_scholar/sandcrawler.py')
-rw-r--r--fatcat_scholar/sandcrawler.py35
1 files changed, 14 insertions, 21 deletions
diff --git a/fatcat_scholar/sandcrawler.py b/fatcat_scholar/sandcrawler.py
index db6014f..408682f 100644
--- a/fatcat_scholar/sandcrawler.py
+++ b/fatcat_scholar/sandcrawler.py
@@ -1,16 +1,15 @@
-
import json
import minio
import requests
from typing import Dict, Optional, Any
-class SandcrawlerPostgrestClient():
+class SandcrawlerPostgrestClient:
def __init__(self, api_url: str):
self.api_url = api_url
-
+
def get_grobid(self, sha1: str) -> Optional[Dict[str, Any]]:
- resp = requests.get(self.api_url + "/grobid", params=dict(sha1hex='eq.'+sha1))
+ resp = requests.get(self.api_url + "/grobid", params=dict(sha1hex="eq." + sha1))
resp.raise_for_status()
resp_json = resp.json()
if resp_json:
@@ -20,8 +19,13 @@ class SandcrawlerPostgrestClient():
class SandcrawlerMinioClient(object):
-
- def __init__(self, host_url: str, access_key: Optional[str] = None, secret_key: Optional[str] = None, default_bucket: Optional[str] = "sandcrawler"):
+ def __init__(
+ self,
+ host_url: str,
+ access_key: Optional[str] = None,
+ secret_key: Optional[str] = None,
+ default_bucket: Optional[str] = "sandcrawler",
+ ):
"""
host is minio connection string (host:port)
access and secret key are as expected
@@ -34,10 +38,7 @@ class SandcrawlerMinioClient(object):
secret_key=os.environ['MINIO_SECRET_KEY'],
"""
self.mc = minio.Minio(
- host_url,
- access_key=access_key,
- secret_key=secret_key,
- secure=False,
+ host_url, access_key=access_key, secret_key=secret_key, secure=False,
)
self.default_bucket = default_bucket
@@ -48,14 +49,9 @@ class SandcrawlerMinioClient(object):
prefix = ""
assert len(sha1hex) == 40
obj_path = "{}{}/{}/{}/{}{}".format(
- prefix,
- folder,
- sha1hex[0:2],
- sha1hex[2:4],
- sha1hex,
- extension,
+ prefix, folder, sha1hex[0:2], sha1hex[2:4], sha1hex, extension,
)
- return obj_path
+ return obj_path
def get_blob(self, folder, sha1hex, extension="", prefix="", bucket=None):
"""
@@ -67,9 +63,6 @@ class SandcrawlerMinioClient(object):
if not bucket:
bucket = self.default_bucket
assert bucket
- blob = self.mc.get_object(
- bucket,
- obj_path,
- )
+ blob = self.mc.get_object(bucket, obj_path,)
# TODO: optionally verify SHA-1?
return blob.data