aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-04-06 17:40:30 -0700
committerBryan Newbold <bnewbold@archive.org>2022-04-06 17:40:30 -0700
commitddf54c9670c6d4adb500e08342d831cf1214d641 (patch)
tree5f4533779c6a85e8dda318f5def1619d73d5adce
parentc06508bb7b2c28360c74ac5a98cad0b1ee9f77c7 (diff)
downloadfatcat-scholar-ddf54c9670c6d4adb500e08342d831cf1214d641.tar.gz
fatcat-scholar-ddf54c9670c6d4adb500e08342d831cf1214d641.zip
polish RSS feed generation, and add basic test coverage
-rw-r--r--fatcat_scholar/templates/base.html20
-rw-r--r--fatcat_scholar/web.py122
-rw-r--r--tests/test_web.py22
3 files changed, 91 insertions, 73 deletions
diff --git a/fatcat_scholar/templates/base.html b/fatcat_scholar/templates/base.html
index ce74f26..1abc216 100644
--- a/fatcat_scholar/templates/base.html
+++ b/fatcat_scholar/templates/base.html
@@ -136,19 +136,13 @@
<div style="display: flex; width: 100%; justify-content: space-between; padding-top: 0.5em;">
<div></div>
<div>
- {% if query %}
- <form class="" id="rss_feed_form" name="rss_feed_form" action="{{ lang_prefix }}/feed/rss" method="get">
- <input form="rss_feed_form" type="hidden" name="q" value="{{ query.q }}">
- {% if query.filter_type %}
- <input form="rss_feed_form" type="hidden" name="filter_type" value="{{ query.filter_type }}">
- {% endif %}
- {% if query.filter_availability %}
- <input form="rss_feed_form" type="hidden" name="filter_availability" value="{{ query.filter_availability }}">
- {% endif %}
- <button class="text-button" form="rss_feed_form" type="submit">
- {% trans %}RSS Feed{% endtrans %}
- </button>
- </form>
+ {% if query and query.q %}
+ {% set rss_params = dict(q=query.q) %}
+ {% if query.filter_type %}{% do rss_params.update(dict(filter_type= query.filter_type)) %}{% endif %}
+ {% if query.filter_availability %}{% do rss_params.update(dict(filter_availabilty=query.filter_availability)) %}{% endif %}
+ <a href="{{ lang_prefix }}/feed/rss?{{ rss_params | urlencode }}" rel="nofollow">
+ {% trans %}RSS Feed{% endtrans %}
+ </a>
&nbsp;
{% endif %}
<a rel="help" href="{{ lang_prefix }}/help" style="order: -1;">{{ _("User Guide") }}</a>
diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index 0f2835e..18299ee 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -178,65 +178,6 @@ def search(query: FulltextQuery = Depends(FulltextQuery)) -> FulltextHits:
return hits
-@api.get("/feed/rss", operation_id="get_feed_rss", include_in_schema=False)
-def feed_rss(
- query: FulltextQuery = Depends(FulltextQuery),
- lang: LangPrefix = Depends(LangPrefix),
-) -> fastapi_rss.RSSResponse:
-
- # override some query params for feeds
- original_query = query.q
- if query.q:
- query.q += " doc_type:work"
- query.offset = None
- query.filter_time = "past_year"
- query.sort_order = "time_desc"
- query.limit = 20
-
- hits: FulltextHits = process_query(query)
-
- rss_items = []
- for hit in hits.results:
- scholar_doc = hit["_obj"]
- abstract: Optional[str] = None
- if scholar_doc.abstracts:
- abstract = scholar_doc.abstracts[0].body
- authors = ", ".join(scholar_doc.biblio.contrib_names) or None
- pub_date = None
- if scholar_doc.biblio.release_date:
- # convert datetime.date to datetime.datetime
- pub_date = datetime.datetime(
- *scholar_doc.biblio.release_date.timetuple()[:6]
- )
- rss_items.append(
- fastapi_rss.Item(
- title=scholar_doc.biblio.title,
- link=f"https://scholar.archive.org/work/{scholar_doc.work_ident}",
- description=abstract,
- author=authors,
- pub_date=pub_date,
- guid=fastapi_rss.GUID(content=scholar_doc.key),
- )
- )
-
- last_build_date = None
- if rss_items:
- last_build_date = rss_items[0].pub_date
- feed = fastapi_rss.RSSFeed(
- title=f"IA Scholar Query: {original_query}",
- link="https://scholar.archive.org/",
- description="Internet Archive Scholar query results feed",
- language="en",
- last_build_date=last_build_date,
- docs="https://scholar.archive.org/help",
- generator="fatcat-scholar",
- webmaster="info@archive.org",
- ttl=60 * 24, # 24 hours, in minutes
- item=rss_items,
- )
- return fastapi_rss.RSSResponse(feed)
-
-
@api.get("/work/{work_ident}", operation_id="get_work")
def get_work(work_ident: str = Query(..., min_length=20, max_length=20)) -> dict:
doc = get_es_scholar_doc(f"work_{work_ident}")
@@ -284,7 +225,7 @@ def load_i18n_templates() -> Any:
)
templates = Jinja2Templates(
directory="fatcat_scholar/templates",
- extensions=["jinja2.ext.i18n"],
+ extensions=["jinja2.ext.i18n", "jinja2.ext.do"],
)
templates.env.install_gettext_translations(translations, newstyle=True) # type: ignore
templates.env.install_gettext_callables( # type: ignore
@@ -387,6 +328,67 @@ def web_search(
)
+@web.get("/feed/rss", operation_id="get_feed_rss", include_in_schema=False)
+def web_feed_rss(
+ query: FulltextQuery = Depends(FulltextQuery),
+ lang: LangPrefix = Depends(LangPrefix),
+) -> fastapi_rss.RSSResponse:
+
+ # override some query params for feeds
+ original_query = query.q
+ if query.q:
+ query.q += " doc_type:work"
+ query.offset = None
+ query.filter_time = "past_year"
+ query.sort_order = "time_desc"
+ query.limit = 20
+
+ hits: FulltextHits = process_query(query)
+
+ rss_items = []
+ for hit in hits.results:
+ scholar_doc = hit["_obj"]
+ abstract: Optional[str] = None
+ if scholar_doc.abstracts:
+ abstract = scholar_doc.abstracts[0].body
+ authors = ", ".join(scholar_doc.biblio.contrib_names) or None
+ pub_date = None
+ if scholar_doc.biblio.release_date:
+ # convert datetime.date to datetime.datetime
+ pub_date = datetime.datetime(
+ *scholar_doc.biblio.release_date.timetuple()[:6]
+ )
+ rss_items.append(
+ # NOTE(i18n): could prefer "original title" and abstract based on lang context
+ fastapi_rss.Item(
+ title=scholar_doc.biblio.title or f"(Microfilm Page)",
+ link=f"https://scholar.archive.org{lang.prefix}/work/{scholar_doc.work_ident}",
+ description=abstract,
+ author=authors,
+ pub_date=pub_date,
+ guid=fastapi_rss.GUID(content=scholar_doc.key),
+ )
+ )
+
+ last_build_date = None
+ if rss_items:
+ last_build_date = rss_items[0].pub_date
+ # i18n: unsure how to swap in translated strings here (in code, not in jinja2 template)
+ feed = fastapi_rss.RSSFeed(
+ title=f"IA Scholar Query: {original_query}",
+ link=f"https://scholar.archive.org{lang.prefix}/",
+ description="Internet Archive Scholar query results feed",
+ language="en",
+ last_build_date=last_build_date,
+ docs=f"https://scholar.archive.org{lang.prefix}/help",
+ generator="fatcat-scholar",
+ webmaster="info@archive.org",
+ ttl=60 * 24, # 24 hours, in minutes
+ item=rss_items,
+ )
+ return fastapi_rss.RSSResponse(feed)
+
+
@web.get("/work/{work_ident}", include_in_schema=False)
def web_work(
request: Request,
diff --git a/tests/test_web.py b/tests/test_web.py
index 3ffab87..31f7982 100644
--- a/tests/test_web.py
+++ b/tests/test_web.py
@@ -104,6 +104,28 @@ def test_basic_search(client: Any, mocker: Any) -> None:
assert rv.status_code == 200
+def test_basic_rss_feed(client: Any, mocker: Any) -> None:
+
+ with open("tests/files/elastic_fulltext_search.json") as f:
+ elastic_resp = json.loads(f.read())
+
+ es_raw = mocker.patch(
+ "elasticsearch.connection.Urllib3HttpConnection.perform_request"
+ )
+ es_raw.side_effect = [
+ (200, {}, json.dumps(elastic_resp)),
+ (200, {}, json.dumps(elastic_resp)),
+ ]
+
+ rv = client.get("/feed/rss?q=blood")
+ assert rv.status_code == 200
+ assert rv.content.startswith(b"<rss")
+
+ rv = client.get("/zh/feed/rss?q=blood")
+ assert rv.status_code == 200
+ assert rv.content.startswith(b"<rss")
+
+
def test_basic_work_landing_page(client: Any, mocker: Any) -> None:
with open("tests/files/elastic_fulltext_get.json") as f: