1 files changed, 40 insertions, 30 deletions
diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py
index d29e03b..5a61f53 100644
--- a/fatcat_scholar/search.py
+++ b/fatcat_scholar/search.py
@@ -1,4 +1,3 @@
-
 """
 Helpers to make elasticsearch queries.
 """
@@ -17,6 +16,7 @@ from typing import List, Dict, Tuple, Optional, Any, Sequence
 # i18n note: the use of gettext below doesn't actually do the translation here,
 # it just ensures that the strings are caught by babel for translation later
 
+
 class FulltextQuery(BaseModel):
     q: Optional[str] = None
     limit: Optional[int] = None
@@ -76,31 +76,42 @@ class FulltextHits(BaseModel):
     offset: int
     limit: int
     deep_page_limit: int
-    query_time_ms: int 
+    query_time_ms: int
     results: List[Any]
 
 
-def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> FulltextHits:
+def do_fulltext_search(
+    query: FulltextQuery, deep_page_limit: int = 2000
+) -> FulltextHits:
 
     es_client = elasticsearch.Elasticsearch(settings.ELASTICSEARCH_BACKEND)
     search = Search(using=es_client, index=settings.ELASTICSEARCH_FULLTEXT_INDEX)
 
     # Convert raw DOIs to DOI queries
-    if query.q and len(query.q.split()) == 1 and query.q.startswith("10.") and query.q.count("/") >= 1:
+    if (
+        query.q
+        and len(query.q.split()) == 1
+        and query.q.startswith("10.")
+        and query.q.count("/") >= 1
+    ):
         search = search.filter("terms", doi=query.q)
         query.q = "*"
 
     # type filters
     if query.filter_type == "papers":
-        search = search.filter("terms", type=[ "article-journal", "paper-conference", "chapter", ])
+        search = search.filter(
+            "terms", type=["article-journal", "paper-conference", "chapter",]
+        )
     elif query.filter_type == "reports":
-        search = search.filter("terms", type=[ "report", "standard", ])
+        search = search.filter("terms", type=["report", "standard",])
     elif query.filter_type == "datasets":
-        search = search.filter("terms", type=[ "dataset", "software", ])
+        search = search.filter("terms", type=["dataset", "software",])
     elif query.filter_type == "everything" or query.filter_type == None:
         pass
     else:
-        raise ValueError(f"Unknown 'filter_type' parameter value: '{query.filter_type}'")
+        raise ValueError(
+            f"Unknown 'filter_type' parameter value: '{query.filter_type}'"
+        )
 
     # time filters
     if query.filter_time == "past_week":
@@ -111,7 +122,9 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
         # the later to catch papers which don't have release_date defined
         year_ago_date = str(datetime.date.today() - datetime.timedelta(days=365))
         this_year = datetime.date.today().year
-        search = search.filter(Q("range", date=dict(gte=year_ago_date)) | Q("term", year=this_year))
+        search = search.filter(
+            Q("range", date=dict(gte=year_ago_date)) | Q("term", year=this_year)
+        )
     elif query.filter_time == "since_2000":
         search = search.filter("range", year=dict(gte=2000))
     elif query.filter_time == "before_1925":
@@ -119,7 +132,9 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
     elif query.filter_time == "all_time" or query.filter_time == None:
         pass
     else:
-        raise ValueError(f"Unknown 'filter_time' parameter value: '{query.filter_time}'")
+        raise ValueError(
+            f"Unknown 'filter_time' parameter value: '{query.filter_time}'"
+        )
 
     # availability filters
     if query.filter_availability == "oa":
@@ -129,13 +144,15 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
     elif query.filter_availability == "fulltext" or query.filter_availability == None:
         search = search.filter("terms", access_type=["wayback", "ia_file", "ia_sim"])
     else:
-        raise ValueError(f"Unknown 'filter_availability' parameter value: '{query.filter_availability}'")
+        raise ValueError(
+            f"Unknown 'filter_availability' parameter value: '{query.filter_availability}'"
+        )
 
     # we combined several queries to improve scoring.
 
     # this query use the fancy built-in query string parser
     basic_fulltext = Q(
-        'query_string',
+        "query_string",
         query=query.q,
         default_operator="AND",
         analyze_wildcard=True,
@@ -150,12 +167,9 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
             "everything",
         ],
     )
-    has_fulltext = Q(
-        'terms',
-        access_type=["ia_sim", "ia_file", "wayback"],
-    )
+    has_fulltext = Q("terms", access_type=["ia_sim", "ia_file", "wayback"],)
     poor_metadata = Q(
-        'bool',
+        "bool",
         should=[
             # if these fields aren't set, metadata is poor. The more that do
             # not exist, the stronger the signal.
@@ -168,11 +182,7 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
 
     search = search.query(
         "boosting",
-        positive=Q(
-            "bool",
-            must=basic_fulltext,
-            should=[has_fulltext],
-        ),
+        positive=Q("bool", must=basic_fulltext, should=[has_fulltext],),
         negative=poor_metadata,
         negative_boost=0.5,
     )
@@ -201,15 +211,15 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
         # Avoid deep paging problem.
         offset = deep_page_limit
 
-    search = search[offset:offset+limit]
+    search = search[offset : offset + limit]
 
     try:
         resp = search.execute()
     except elasticsearch.exceptions.RequestError as e:
         # this is a "user" error
         print("elasticsearch 400: " + str(e.info), file=sys.stderr)
-        if e.info.get('error', {}).get('root_cause', {}):
-            raise ValueError(str(e.info['error']['root_cause'][0].get('reason')))
+        if e.info.get("error", {}).get("root_cause", {}):
+            raise ValueError(str(e.info["error"]["root_cause"][0].get("reason")))
         else:
             raise ValueError(str(e.info))
     except elasticsearch.exceptions.TransportError as e:
@@ -221,12 +231,12 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
     results = []
     for h in resp:
         r = h._d_
-        #print(json.dumps(h.meta._d_, indent=2))
-        r['_highlights'] = []
-        if 'highlight' in dir(h.meta):
+        # print(json.dumps(h.meta._d_, indent=2))
+        r["_highlights"] = []
+        if "highlight" in dir(h.meta):
             highlights = h.meta.highlight._d_
             for k in highlights:
-                r['_highlights'] += highlights[k]
+                r["_highlights"] += highlights[k]
         results.append(r)
 
     for h in results:
@@ -235,7 +245,7 @@ def do_fulltext_search(query: FulltextQuery, deep_page_limit: int = 2000) -> Ful
         # "Crimes against Unicode"; production workaround
         for key in h:
             if type(h[key]) is str:
-                h[key] = h[key].encode('utf8', 'ignore').decode('utf8')
+                h[key] = h[key].encode("utf8", "ignore").decode("utf8")
 
     return FulltextHits(
         count_returned=len(results),