From c3624eaad8d5f79c52f192e7978dfa4fe69044f6 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@archive.org>
Date: Fri, 25 Mar 2022 18:34:28 -0700
Subject: initial implementation of RSS search feeds

UI integration needs work
---
 fatcat_scholar/web.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'fatcat_scholar/web.py')

diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index 83f80ad..ef7a5e1 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -4,12 +4,14 @@ This contains the FastAPI web application and RESTful API.
 So far there are few endpoints, so we just put them all here!
 """
 
+import datetime
 import logging
 import urllib.parse
 from typing import Any, Dict, List, Optional
 
 import babel.numbers
 import babel.support
+import fastapi_rss
 import fatcat_openapi_client
 import sentry_sdk
 from fastapi import APIRouter, Depends, FastAPI, HTTPException, Query, Request, Response
@@ -176,6 +178,63 @@ def search(query: FulltextQuery = Depends(FulltextQuery)) -> FulltextHits:
     return hits
 
 
+@api.get("/feed/rss", operation_id="get_feed_rss", include_in_schema=False)
+def feed_rss(
+    query: FulltextQuery = Depends(FulltextQuery),
+    lang: LangPrefix = Depends(LangPrefix),
+) -> fastapi_rss.RSSResponse:
+
+    # override some query params for feeds
+    if query.q:
+        query.q += " doc_type:work"
+    query.offset = None
+    query.filter_time = "past_year"
+    query.sort_order = "time_desc"
+
+    hits: FulltextHits = process_query(query)
+
+    rss_items = []
+    for hit in hits.results:
+        scholar_doc = hit["_obj"]
+        abstract: Optional[str] = None
+        if scholar_doc.abstracts:
+            abstract = scholar_doc.abstracts[0].body
+        authors = ", ".join(scholar_doc.biblio.contrib_names) or None
+        pub_date = None
+        if scholar_doc.biblio.release_date:
+            # convert datetime.date to datetime.datetime
+            pub_date = datetime.datetime(
+                *scholar_doc.biblio.release_date.timetuple()[:6]
+            )
+        rss_items.append(
+            fastapi_rss.Item(
+                title=scholar_doc.biblio.title,
+                link=f"https://scholar.archive.org/work/{scholar_doc.work_ident}",
+                description=abstract,
+                author=authors,
+                pub_date=pub_date,
+                guid=fastapi_rss.GUID(content=scholar_doc.key),
+            )
+        )
+
+    last_build_date = None
+    if rss_items:
+        last_build_date = rss_items[0].pub_date
+    feed = fastapi_rss.RSSFeed(
+        title=f"IA Scholar: {query.q}",
+        link="https://scholar.archive.org/",
+        description="Internet Archive Scholar query results feed",
+        language="en",
+        last_build_date=last_build_date,
+        docs="https://scholar.archive.org/help",
+        generator="fatcat-scholar",
+        webmaster="info@archive.org",
+        ttl=60 * 24,  # 24 hours, in minutes
+        item=rss_items,
+    )
+    return fastapi_rss.RSSResponse(feed)
+
+
 @api.get("/work/{work_ident}", operation_id="get_work")
 def get_work(work_ident: str = Query(..., min_length=20, max_length=20)) -> dict:
     doc = get_es_scholar_doc(f"work_{work_ident}")
-- 
cgit v1.2.3