diff options
-rw-r--r-- | fatcat_scholar/issue_db.py | 20 | ||||
-rw-r--r-- | fatcat_scholar/schema.py | 3 | ||||
-rw-r--r-- | fatcat_scholar/sim_pipeline.py | 4 | ||||
-rw-r--r-- | fatcat_scholar/web.py | 14 | ||||
-rw-r--r-- | fatcat_scholar/work_pipeline.py | 14 |
5 files changed, 28 insertions, 27 deletions
diff --git a/fatcat_scholar/issue_db.py b/fatcat_scholar/issue_db.py index 829560b..fed12a1 100644 --- a/fatcat_scholar/issue_db.py +++ b/fatcat_scholar/issue_db.py @@ -3,7 +3,7 @@ import json import sqlite3 import argparse from dataclasses import dataclass -from typing import List, Dict, Optional, Any, Sequence +from typing import List, Dict, Optional, Any, Sequence, Tuple import fatcat_openapi_client import elasticsearch from elasticsearch_dsl import Search @@ -22,7 +22,7 @@ class SimPubRow: container_ident: Optional[str] wikidata_qid: Optional[str] - def tuple(self): + def tuple(self) -> Tuple: return ( self.sim_pubid, self.pub_collection, @@ -67,7 +67,7 @@ class SimIssueRow: last_page: Optional[int] release_count: Optional[int] - def tuple(self): + def tuple(self) -> Tuple: return ( self.issue_item, self.sim_pubid, @@ -101,7 +101,7 @@ class ReleaseCountsRow: year: Optional[int] volume: Optional[str] - def tuple(self): + def tuple(self) -> Tuple: return ( self.sim_pubid, self.year, @@ -147,7 +147,7 @@ def es_container_aggs(es_client: Any, container_id: str) -> List[Dict[str, Any]] class IssueDB: - def __init__(self, db_file): + def __init__(self, db_file: str): """ To create a temporary database, pass ":memory:" as db_file """ @@ -155,7 +155,7 @@ class IssueDB: self._pubid2container_map: Dict[str, Optional[str]] = dict() self._container2pubid_map: Dict[str, Optional[str]] = dict() - def init_db(self): + def init_db(self) -> None: self.db.executescript( """ PRAGMA main.page_size = 4096; @@ -240,7 +240,7 @@ class IssueDB: return None return SimPubRow.from_tuple(row[0]) - def load_pubs(self, json_lines: Sequence[str], api: Any): + def load_pubs(self, json_lines: Sequence[str], api: Any) -> None: """ Reads a file (or some other iterator) of JSON lines, parses them into a dict, then inserts rows. @@ -274,7 +274,7 @@ class IssueDB: cur.close() self.db.commit() - def load_issues(self, json_lines: Sequence[str], es_client: Any): + def load_issues(self, json_lines: Sequence[str], es_client: Any) -> None: """ Reads a file (or some other iterator) of JSON lines, parses them into a dict, then inserts rows. @@ -337,7 +337,7 @@ class IssueDB: cur.close() self.db.commit() - def load_counts(self, es_client: Any): + def load_counts(self, es_client: Any) -> None: all_pub_containers = list( self.db.execute( "SELECT sim_pubid, container_ident FROM sim_pub WHERE container_ident IS NOT NULL;" @@ -359,7 +359,7 @@ class IssueDB: self.db.commit() -def main(): +def main() -> None: """ Run this command like: diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index d3cc1fe..725764e 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -346,8 +346,7 @@ def es_biblio_from_release(release: ReleaseEntity) -> ScholarBiblio: license_slug=release.license_slug, publisher=publisher, container_name=container_name, - # XXX: prod hack? container_original_name=container_original_name, - container_original_name=None, + container_original_name=container_original_name, container_ident=container_ident, container_type=container_type, container_issnl=container_issnl, diff --git a/fatcat_scholar/sim_pipeline.py b/fatcat_scholar/sim_pipeline.py index 3541bdc..f99471f 100644 --- a/fatcat_scholar/sim_pipeline.py +++ b/fatcat_scholar/sim_pipeline.py @@ -110,7 +110,7 @@ class SimPipeline: issue_item_metadata=truncate_issue_meta(issue_meta), ) - def run_issue_db(self, limit: int = None): + def run_issue_db(self, limit: int = None) -> None: count = 0 self.issue_db.db.row_factory = sqlite3.Row cur = self.issue_db.db.cursor() @@ -157,7 +157,7 @@ class SimPipeline: break -def main(): +def main() -> None: """ Run this command like: diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index 8b55f75..2d2678a 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -9,7 +9,7 @@ import babel.support from fastapi import FastAPI, APIRouter, Request, Depends from fastapi.staticfiles import StaticFiles from dynaconf import settings -from typing import Optional, List, Any +from typing import Optional, Any from fatcat_scholar.hacks import Jinja2Templates from fatcat_scholar.search import do_fulltext_search, FulltextQuery, FulltextHits @@ -71,20 +71,20 @@ web = APIRouter() def locale_gettext(translations: Any) -> Any: - def gt(s): + def gt(s): # noqa: ANN001,ANN201 return translations.ugettext(s) return gt def locale_ngettext(translations: Any) -> Any: - def ngt(s, n): + def ngt(s, n): # noqa: ANN001,ANN201 return translations.ungettext(s) return ngt -def load_i18n_templates(): +def load_i18n_templates() -> Any: """ This is a hack to work around lack of per-request translation (babel/gettext) locale switching in FastAPI and Starlette. Flask (and @@ -111,7 +111,7 @@ def load_i18n_templates(): ) # remove a lot of whitespace in HTML output with these configs templates.env.trim_blocks = True - templates.env.istrip_blocks = True + templates.env.lstrip_blocks = True # pass-through application settings to be available in templates templates.env.globals["settings"] = settings d[lang_opt] = templates @@ -126,7 +126,7 @@ async def web_home( request: Request, lang: LangPrefix = Depends(LangPrefix), content: ContentNegotiation = Depends(ContentNegotiation), -): +) -> Any: if content.mimetype == "application/json": return await home() return i18n_templates[lang.code].TemplateResponse( @@ -136,7 +136,7 @@ async def web_home( @web.get("/about", include_in_schema=False) -async def web_about(request: Request, lang: LangPrefix = Depends(LangPrefix)): +async def web_about(request: Request, lang: LangPrefix = Depends(LangPrefix)) -> Any: return i18n_templates[lang.code].TemplateResponse( "about.html", {"request": request, "locale": lang.code, "lang_prefix": lang.prefix}, diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index 303f9b1..accbc1d 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -1,12 +1,14 @@ import os import io import sys -import minio -import requests import argparse from typing import List, Dict, Tuple, Optional, Any, Sequence -from fatcat_openapi_client import ReleaseEntity, FileEntity + +import minio +import requests import internetarchive +from dynaconf import settings +from fatcat_openapi_client import ReleaseEntity, FileEntity from fatcat_scholar.api_entities import * from fatcat_scholar.djvu import djvu_extract_leaf_texts @@ -41,7 +43,7 @@ def parse_pages(raw: str) -> Tuple[Optional[int], Optional[int]]: return (first, last) -def test_parse_pages(): +def test_parse_pages() -> None: assert parse_pages("479-89") == (479, 489) assert parse_pages("466-7") == (466, 467) assert parse_pages("466-501") == (466, 501) @@ -331,7 +333,7 @@ class WorkPipeline: sim_fulltext=sim_fulltext, ) - def run_releases(self, release_stream: Sequence[str]): + def run_releases(self, release_stream: Sequence[str]) -> None: """ Iterates over the stream of releases, which are expected to be grouped (sorted) by work_ident. @@ -365,7 +367,7 @@ class WorkPipeline: print(ib.json()) -def main(): +def main() -> None: """ Run this command like: |