diff options
Diffstat (limited to 'fatcat_scholar')
-rw-r--r-- | fatcat_scholar/transform.py | 5 | ||||
-rw-r--r-- | fatcat_scholar/web.py | 4 | ||||
-rw-r--r-- | fatcat_scholar/work_pipeline.py | 2 |
3 files changed, 6 insertions, 5 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 9088b16..bf24da9 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -13,14 +13,15 @@ from fatcat_scholar.schema import * from fatcat_scholar.config import settings, GIT_REVISION from fatcat_scholar.grobid2json import teixml2json -MAX_BODY_CHARS = 1024*1024 +MAX_BODY_CHARS = 1024 * 1024 + def es_fulltext_from_sim(sim: Dict[str, Any]) -> Optional[ScholarFulltext]: if not sim["page_texts"]: return None first_page = sim["page_texts"][0]["page_num"] issue_item = sim["issue_item"] - body="\n".join([p["raw_text"] for p in sim["page_texts"]]) + body = "\n".join([p["raw_text"] for p in sim["page_texts"]]) if body and len(body) > MAX_BODY_CHARS: body = body[MAX_BODY_CHARS:] return ScholarFulltext( diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index 8a1faa7..dce732b 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -99,7 +99,7 @@ class HitsModel(BaseModel): async def search(query: FulltextQuery = Depends(FulltextQuery)) -> FulltextHits: hits: Optional[FulltextHits] = None if query.q is None: - raise HTTPException(status_code=400, detail=f"Expected a 'q' query parameter") + raise HTTPException(status_code=400, detail="Expected a 'q' query parameter") try: hits = process_query(query) except ValueError as e: @@ -328,7 +328,7 @@ app.add_middleware( allow_origins=["*"], allow_credentials=False, allow_methods=["GET"], - allow_headers=[], # some defaults always enabled + allow_headers=[], # some defaults always enabled ) if settings.SENTRY_DSN: diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py index 282378b..10b701b 100644 --- a/fatcat_scholar/work_pipeline.py +++ b/fatcat_scholar/work_pipeline.py @@ -208,7 +208,7 @@ class WorkPipeline: return None except urllib3.exceptions.MaxRetryError: # HACK: work around broken seaweedfs keys - print(f"seaweedfs failure: sha1hex={fe.sha1}", file=sys.stderr) + print(f"seaweedfs failure: sha1hex={sha1hex}", file=sys.stderr) return None return dict( |