From e35e99bceff3277afaac8f2d5519aa4f07aabe49 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 23 Apr 2021 19:01:22 -0700 Subject: web: initial implementation of work landing page and citation_pdf_url access redirect The initial intent is to have something that can be used by indexing services to pull the citation_pdf_url meta tag and bounce to a direct IA PDF access URL. For now the landing page stubs are just formatted as SERP results. Presumbably these will get re-styled at some point and include citation graph links, etc. --- fatcat_scholar/search.py | 38 +++++++++++- fatcat_scholar/templates/search_macros.html | 2 +- fatcat_scholar/templates/work.html | 48 +++++++++++++++ fatcat_scholar/web.py | 92 ++++++++++++++++++++++++++++- 4 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 fatcat_scholar/templates/work.html diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py index 5571909..c5fca35 100644 --- a/fatcat_scholar/search.py +++ b/fatcat_scholar/search.py @@ -21,7 +21,7 @@ from pydantic import BaseModel from fatcat_scholar.config import settings from fatcat_scholar.identifiers import * -from fatcat_scholar.schema import ScholarDoc +from fatcat_scholar.schema import ScholarDoc, ScholarFulltext from fatcat_scholar.query_parse import sniff_citation_query, pre_parse_query from fatcat_scholar.query_citation import try_fuzzy_match @@ -444,3 +444,39 @@ def es_scholar_index_alive() -> bool: return bool(resp["_shards"]["successful"] == resp["_shards"]["total"]) except KeyError: return False + + +def get_es_scholar_doc(key: str) -> Optional[dict]: + """ + Fetch a single document from search index, by key. Returns None if not found. + """ + try: + resp = es_client.get(settings.ELASTICSEARCH_QUERY_FULLTEXT_INDEX, key) + except elasticsearch.exceptions.NotFoundError: + return None + doc = resp["_source"] + try: + doc["_obj"] = ScholarDoc.parse_obj(doc) + except Exception: + pass + return doc + + +def lookup_fulltext_pdf(sha1: str) -> Optional[dict]: + """ + Fetch a document by fulltext file sha1, returning only the 'fulltext' sub-document. + """ + sha1 = sha1.lower() + assert len(sha1) == 40 and sha1.isalnum() + hits = do_lookup_query( + f'fulltext.file_sha1:{sha1} fulltext.file_mimetype:"application/pdf"' + ) + if not hits.results: + return None + fulltext = ScholarFulltext.parse_obj(hits.results[0]["fulltext"]) + if not fulltext.access_type in ("ia_file", "wayback"): + return None + assert fulltext.file_sha1 == sha1 + assert fulltext.file_mimetype == "application/pdf" + assert fulltext.access_url + return fulltext diff --git a/fatcat_scholar/templates/search_macros.html b/fatcat_scholar/templates/search_macros.html index 9524d7e..63c988d 100644 --- a/fatcat_scholar/templates/search_macros.html +++ b/fatcat_scholar/templates/search_macros.html @@ -435,7 +435,7 @@ {{ platform_access_button(paper.biblio) }} {# ### COLLAPSED HITS #} - {% if paper._collapsed_count > 0 %} + {% if paper._collapsed_count and paper._collapsed_count > 0 %}