diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2022-03-25 18:34:28 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2022-03-25 18:34:30 -0700 | 
| commit | c3624eaad8d5f79c52f192e7978dfa4fe69044f6 (patch) | |
| tree | dfc7718c273e009afcc26a41177e5196b1b98bb3 | |
| parent | 9292e1ac19aba56f1b49fc83515b1fb61ce485c7 (diff) | |
| download | fatcat-scholar-c3624eaad8d5f79c52f192e7978dfa4fe69044f6.tar.gz fatcat-scholar-c3624eaad8d5f79c52f192e7978dfa4fe69044f6.zip  | |
initial implementation of RSS search feeds
UI integration needs work
| -rw-r--r-- | fatcat_scholar/templates/base.html | 15 | ||||
| -rw-r--r-- | fatcat_scholar/web.py | 59 | ||||
| -rw-r--r-- | proposals/2022-03-25_search_rss_feed.md | 8 | 
3 files changed, 82 insertions, 0 deletions
diff --git a/fatcat_scholar/templates/base.html b/fatcat_scholar/templates/base.html index 093d4e4..ce74f26 100644 --- a/fatcat_scholar/templates/base.html +++ b/fatcat_scholar/templates/base.html @@ -136,6 +136,21 @@          <div style="display: flex; width: 100%; justify-content: space-between; padding-top: 0.5em;">            <div></div>            <div> +            {% if query %} +              <form class="" id="rss_feed_form" name="rss_feed_form" action="{{ lang_prefix }}/feed/rss" method="get"> +                <input form="rss_feed_form" type="hidden" name="q" value="{{ query.q }}"> +                {% if query.filter_type %} +                  <input form="rss_feed_form" type="hidden" name="filter_type" value="{{ query.filter_type }}"> +                {% endif %} +                {% if query.filter_availability %} +                  <input form="rss_feed_form" type="hidden" name="filter_availability" value="{{ query.filter_availability }}"> +                {% endif %} +                <button class="text-button" form="rss_feed_form" type="submit"> +                  {% trans %}RSS Feed{% endtrans %} +                </button> +              </form> +                +            {% endif %}              <a rel="help" href="{{ lang_prefix }}/help" style="order: -1;">{{ _("User Guide") }}</a>            </div>          </div> diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index 83f80ad..ef7a5e1 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -4,12 +4,14 @@ This contains the FastAPI web application and RESTful API.  So far there are few endpoints, so we just put them all here!  """ +import datetime  import logging  import urllib.parse  from typing import Any, Dict, List, Optional  import babel.numbers  import babel.support +import fastapi_rss  import fatcat_openapi_client  import sentry_sdk  from fastapi import APIRouter, Depends, FastAPI, HTTPException, Query, Request, Response @@ -176,6 +178,63 @@ def search(query: FulltextQuery = Depends(FulltextQuery)) -> FulltextHits:      return hits +@api.get("/feed/rss", operation_id="get_feed_rss", include_in_schema=False) +def feed_rss( +    query: FulltextQuery = Depends(FulltextQuery), +    lang: LangPrefix = Depends(LangPrefix), +) -> fastapi_rss.RSSResponse: + +    # override some query params for feeds +    if query.q: +        query.q += " doc_type:work" +    query.offset = None +    query.filter_time = "past_year" +    query.sort_order = "time_desc" + +    hits: FulltextHits = process_query(query) + +    rss_items = [] +    for hit in hits.results: +        scholar_doc = hit["_obj"] +        abstract: Optional[str] = None +        if scholar_doc.abstracts: +            abstract = scholar_doc.abstracts[0].body +        authors = ", ".join(scholar_doc.biblio.contrib_names) or None +        pub_date = None +        if scholar_doc.biblio.release_date: +            # convert datetime.date to datetime.datetime +            pub_date = datetime.datetime( +                *scholar_doc.biblio.release_date.timetuple()[:6] +            ) +        rss_items.append( +            fastapi_rss.Item( +                title=scholar_doc.biblio.title, +                link=f"https://scholar.archive.org/work/{scholar_doc.work_ident}", +                description=abstract, +                author=authors, +                pub_date=pub_date, +                guid=fastapi_rss.GUID(content=scholar_doc.key), +            ) +        ) + +    last_build_date = None +    if rss_items: +        last_build_date = rss_items[0].pub_date +    feed = fastapi_rss.RSSFeed( +        title=f"IA Scholar: {query.q}", +        link="https://scholar.archive.org/", +        description="Internet Archive Scholar query results feed", +        language="en", +        last_build_date=last_build_date, +        docs="https://scholar.archive.org/help", +        generator="fatcat-scholar", +        webmaster="info@archive.org", +        ttl=60 * 24,  # 24 hours, in minutes +        item=rss_items, +    ) +    return fastapi_rss.RSSResponse(feed) + +  @api.get("/work/{work_ident}", operation_id="get_work")  def get_work(work_ident: str = Query(..., min_length=20, max_length=20)) -> dict:      doc = get_es_scholar_doc(f"work_{work_ident}") diff --git a/proposals/2022-03-25_search_rss_feed.md b/proposals/2022-03-25_search_rss_feed.md new file mode 100644 index 0000000..88eb85b --- /dev/null +++ b/proposals/2022-03-25_search_rss_feed.md @@ -0,0 +1,8 @@ + +status: brainstorm + +Idea is to have a way to generate a "feed" for any search query, as RSS, which +can be used to keep up with recent publications. + +This idea was originally discussed in 2021 here: <https://github.com/internetarchive/fatcat-scholar/issues/37> +  | 
