summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar
diff options
context:
space:
mode:
Diffstat (limited to 'fatcat_scholar')
-rw-r--r--fatcat_scholar/issue_db.py20
-rw-r--r--fatcat_scholar/schema.py3
-rw-r--r--fatcat_scholar/sim_pipeline.py4
-rw-r--r--fatcat_scholar/web.py14
-rw-r--r--fatcat_scholar/work_pipeline.py14
5 files changed, 28 insertions, 27 deletions
diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py
index 829560b..fed12a1 100644
--- a/fatcat_scholar/issue_db.py
+++ b/fatcat_scholar/issue_db.py
@@ -3,7 +3,7 @@ import json
import sqlite3
import argparse
from dataclasses import dataclass
-from typing import List, Dict, Optional, Any, Sequence
+from typing import List, Dict, Optional, Any, Sequence, Tuple
import fatcat_openapi_client
import elasticsearch
from elasticsearch_dsl import Search
@@ -22,7 +22,7 @@ class SimPubRow:
container_ident: Optional[str]
wikidata_qid: Optional[str]
- def tuple(self):
+ def tuple(self) -> Tuple:
return (
self.sim_pubid,
self.pub_collection,
@@ -67,7 +67,7 @@ class SimIssueRow:
last_page: Optional[int]
release_count: Optional[int]
- def tuple(self):
+ def tuple(self) -> Tuple:
return (
self.issue_item,
self.sim_pubid,
@@ -101,7 +101,7 @@ class ReleaseCountsRow:
year: Optional[int]
volume: Optional[str]
- def tuple(self):
+ def tuple(self) -> Tuple:
return (
self.sim_pubid,
self.year,
@@ -147,7 +147,7 @@ def es_container_aggs(es_client: Any, container_id: str) -> List[Dict[str, Any]]
class IssueDB:
- def __init__(self, db_file):
+ def __init__(self, db_file: str):
"""
To create a temporary database, pass ":memory:" as db_file
"""
@@ -155,7 +155,7 @@ class IssueDB:
self._pubid2container_map: Dict[str, Optional[str]] = dict()
self._container2pubid_map: Dict[str, Optional[str]] = dict()
- def init_db(self):
+ def init_db(self) -> None:
self.db.executescript(
"""
PRAGMA main.page_size = 4096;
@@ -240,7 +240,7 @@ class IssueDB:
return None
return SimPubRow.from_tuple(row[0])
- def load_pubs(self, json_lines: Sequence[str], api: Any):
+ def load_pubs(self, json_lines: Sequence[str], api: Any) -> None:
"""
Reads a file (or some other iterator) of JSON lines, parses them into a
dict, then inserts rows.
@@ -274,7 +274,7 @@ class IssueDB:
cur.close()
self.db.commit()
- def load_issues(self, json_lines: Sequence[str], es_client: Any):
+ def load_issues(self, json_lines: Sequence[str], es_client: Any) -> None:
"""
Reads a file (or some other iterator) of JSON lines, parses them into a
dict, then inserts rows.
@@ -337,7 +337,7 @@ class IssueDB:
cur.close()
self.db.commit()
- def load_counts(self, es_client: Any):
+ def load_counts(self, es_client: Any) -> None:
all_pub_containers = list(
self.db.execute(
"SELECT sim_pubid, container_ident FROM sim_pub WHERE container_ident IS NOT NULL;"
@@ -359,7 +359,7 @@ class IssueDB:
self.db.commit()
-def main():
+def main() -> None:
"""
Run this command like:
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index d3cc1fe..725764e 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -346,8 +346,7 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio:
license_slug=release.license_slug,
publisher=publisher,
container_name=container_name,
- # XXX: prod hack? container_original_name=container_original_name,
- container_original_name=None,
+ container_original_name=container_original_name,
container_ident=container_ident,
container_type=container_type,
container_issnl=container_issnl,
diff --git a/fatcat_scholar/sim_pipeline.py b/fatcat_scholar/sim_pipeline.py
index 3541bdc..f99471f 100644
--- a/fatcat_scholar/sim_pipeline.py
+++ b/fatcat_scholar/sim_pipeline.py
@@ -110,7 +110,7 @@ class SimPipeline:
issue_item_metadata=truncate_issue_meta(issue_meta),
)
- def run_issue_db(self, limit: int = None):
+ def run_issue_db(self, limit: int = None) -> None:
count = 0
self.issue_db.db.row_factory = sqlite3.Row
cur = self.issue_db.db.cursor()
@@ -157,7 +157,7 @@ class SimPipeline:
break
-def main():
+def main() -> None:
"""
Run this command like:
diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index 8b55f75..2d2678a 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -9,7 +9,7 @@ import babel.support
from fastapi import FastAPI, APIRouter, Request, Depends
from fastapi.staticfiles import StaticFiles
from dynaconf import settings
-from typing import Optional, List, Any
+from typing import Optional, Any
from fatcat_scholar.hacks import Jinja2Templates
from fatcat_scholar.search import do_fulltext_search, FulltextQuery, FulltextHits
@@ -71,20 +71,20 @@ web = APIRouter()
def locale_gettext(translations: Any) -> Any:
- def gt(s):
+ def gt(s): # noqa: ANN001,ANN201
return translations.ugettext(s)
return gt
def locale_ngettext(translations: Any) -> Any:
- def ngt(s, n):
+ def ngt(s, n): # noqa: ANN001,ANN201
return translations.ungettext(s)
return ngt
-def load_i18n_templates():
+def load_i18n_templates() -> Any:
"""
This is a hack to work around lack of per-request translation
(babel/gettext) locale switching in FastAPI and Starlette. Flask (and
@@ -111,7 +111,7 @@ def load_i18n_templates():
)
# remove a lot of whitespace in HTML output with these configs
templates.env.trim_blocks = True
- templates.env.istrip_blocks = True
+ templates.env.lstrip_blocks = True
# pass-through application settings to be available in templates
templates.env.globals["settings"] = settings
d[lang_opt] = templates
@@ -126,7 +126,7 @@ async def web_home(
request: Request,
lang: LangPrefix = Depends(LangPrefix),
content: ContentNegotiation = Depends(ContentNegotiation),
-):
+) -> Any:
if content.mimetype == "application/json":
return await home()
return i18n_templates[lang.code].TemplateResponse(
@@ -136,7 +136,7 @@ async def web_home(
@web.get("/about", include_in_schema=False)
-async def web_about(request: Request, lang: LangPrefix = Depends(LangPrefix)):
+async def web_about(request: Request, lang: LangPrefix = Depends(LangPrefix)) -> Any:
return i18n_templates[lang.code].TemplateResponse(
"about.html",
{"request": request, "locale": lang.code, "lang_prefix": lang.prefix},
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py
index 303f9b1..accbc1d 100644
--- a/fatcat_scholar/work_pipeline.py
+++ b/fatcat_scholar/work_pipeline.py
@@ -1,12 +1,14 @@
import os
import io
import sys
-import minio
-import requests
import argparse
from typing import List, Dict, Tuple, Optional, Any, Sequence
-from fatcat_openapi_client import ReleaseEntity, FileEntity
+
+import minio
+import requests
import internetarchive
+from dynaconf import settings
+from fatcat_openapi_client import ReleaseEntity, FileEntity
from fatcat_scholar.api_entities import *
from fatcat_scholar.djvu import djvu_extract_leaf_texts
@@ -41,7 +43,7 @@ def parse_pages(raw: str) -> Tuple[Optional[int], Optional[int]]:
return (first, last)
-def test_parse_pages():
+def test_parse_pages() -> None:
assert parse_pages("479-89") == (479, 489)
assert parse_pages("466-7") == (466, 467)
assert parse_pages("466-501") == (466, 501)
@@ -331,7 +333,7 @@ class WorkPipeline:
sim_fulltext=sim_fulltext,
)
- def run_releases(self, release_stream: Sequence[str]):
+ def run_releases(self, release_stream: Sequence[str]) -> None:
"""
Iterates over the stream of releases, which are expected to be grouped
(sorted) by work_ident.
@@ -365,7 +367,7 @@ class WorkPipeline:
print(ib.json())
-def main():
+def main() -> None:
"""
Run this command like: