diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-03-25 18:34:28 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-03-25 18:34:30 -0700 |
commit | c3624eaad8d5f79c52f192e7978dfa4fe69044f6 (patch) | |
tree | dfc7718c273e009afcc26a41177e5196b1b98bb3 | |
parent | 9292e1ac19aba56f1b49fc83515b1fb61ce485c7 (diff) | |
download | fatcat-scholar-c3624eaad8d5f79c52f192e7978dfa4fe69044f6.tar.gz fatcat-scholar-c3624eaad8d5f79c52f192e7978dfa4fe69044f6.zip |
initial implementation of RSS search feeds
UI integration needs work
-rw-r--r-- | fatcat_scholar/templates/base.html | 15 | ||||
-rw-r--r-- | fatcat_scholar/web.py | 59 | ||||
-rw-r--r-- | proposals/2022-03-25_search_rss_feed.md | 8 |
3 files changed, 82 insertions, 0 deletions
diff --git a/fatcat_scholar/templates/base.html b/fatcat_scholar/templates/base.html index 093d4e4..ce74f26 100644 --- a/fatcat_scholar/templates/base.html +++ b/fatcat_scholar/templates/base.html @@ -136,6 +136,21 @@ <div style="display: flex; width: 100%; justify-content: space-between; padding-top: 0.5em;"> <div></div> <div> + {% if query %} + <form class="" id="rss_feed_form" name="rss_feed_form" action="{{ lang_prefix }}/feed/rss" method="get"> + <input form="rss_feed_form" type="hidden" name="q" value="{{ query.q }}"> + {% if query.filter_type %} + <input form="rss_feed_form" type="hidden" name="filter_type" value="{{ query.filter_type }}"> + {% endif %} + {% if query.filter_availability %} + <input form="rss_feed_form" type="hidden" name="filter_availability" value="{{ query.filter_availability }}"> + {% endif %} + <button class="text-button" form="rss_feed_form" type="submit"> + {% trans %}RSS Feed{% endtrans %} + </button> + </form> + + {% endif %} <a rel="help" href="{{ lang_prefix }}/help" style="order: -1;">{{ _("User Guide") }}</a> </div> </div> diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index 83f80ad..ef7a5e1 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -4,12 +4,14 @@ This contains the FastAPI web application and RESTful API. So far there are few endpoints, so we just put them all here! """ +import datetime import logging import urllib.parse from typing import Any, Dict, List, Optional import babel.numbers import babel.support +import fastapi_rss import fatcat_openapi_client import sentry_sdk from fastapi import APIRouter, Depends, FastAPI, HTTPException, Query, Request, Response @@ -176,6 +178,63 @@ def search(query: FulltextQuery = Depends(FulltextQuery)) -> FulltextHits: return hits +@api.get("/feed/rss", operation_id="get_feed_rss", include_in_schema=False) +def feed_rss( + query: FulltextQuery = Depends(FulltextQuery), + lang: LangPrefix = Depends(LangPrefix), +) -> fastapi_rss.RSSResponse: + + # override some query params for feeds + if query.q: + query.q += " doc_type:work" + query.offset = None + query.filter_time = "past_year" + query.sort_order = "time_desc" + + hits: FulltextHits = process_query(query) + + rss_items = [] + for hit in hits.results: + scholar_doc = hit["_obj"] + abstract: Optional[str] = None + if scholar_doc.abstracts: + abstract = scholar_doc.abstracts[0].body + authors = ", ".join(scholar_doc.biblio.contrib_names) or None + pub_date = None + if scholar_doc.biblio.release_date: + # convert datetime.date to datetime.datetime + pub_date = datetime.datetime( + *scholar_doc.biblio.release_date.timetuple()[:6] + ) + rss_items.append( + fastapi_rss.Item( + title=scholar_doc.biblio.title, + link=f"https://scholar.archive.org/work/{scholar_doc.work_ident}", + description=abstract, + author=authors, + pub_date=pub_date, + guid=fastapi_rss.GUID(content=scholar_doc.key), + ) + ) + + last_build_date = None + if rss_items: + last_build_date = rss_items[0].pub_date + feed = fastapi_rss.RSSFeed( + title=f"IA Scholar: {query.q}", + link="https://scholar.archive.org/", + description="Internet Archive Scholar query results feed", + language="en", + last_build_date=last_build_date, + docs="https://scholar.archive.org/help", + generator="fatcat-scholar", + webmaster="info@archive.org", + ttl=60 * 24, # 24 hours, in minutes + item=rss_items, + ) + return fastapi_rss.RSSResponse(feed) + + @api.get("/work/{work_ident}", operation_id="get_work") def get_work(work_ident: str = Query(..., min_length=20, max_length=20)) -> dict: doc = get_es_scholar_doc(f"work_{work_ident}") diff --git a/proposals/2022-03-25_search_rss_feed.md b/proposals/2022-03-25_search_rss_feed.md new file mode 100644 index 0000000..88eb85b --- /dev/null +++ b/proposals/2022-03-25_search_rss_feed.md @@ -0,0 +1,8 @@ + +status: brainstorm + +Idea is to have a way to generate a "feed" for any search query, as RSS, which +can be used to keep up with recent publications. + +This idea was originally discussed in 2021 here: <https://github.com/internetarchive/fatcat-scholar/issues/37> + |