From fa1ad78cd0e00f524221f972889ee32373d7b94e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 7 Feb 2022 19:51:56 -0800 Subject: container: scholars portal kbart link; unknown type display --- python/fatcat_web/templates/container_view.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/fatcat_web/templates/container_view.html b/python/fatcat_web/templates/container_view.html index c5f68367..d88b44ab 100644 --- a/python/fatcat_web/templates/container_view.html +++ b/python/fatcat_web/templates/container_view.html @@ -76,7 +76,7 @@ {% if type_row == "_unknown" %} - Unknown + unknown-type {% else %} {{ type_row }} {% endif %} @@ -158,6 +158,8 @@ HathiTrust: {% elif k == "portico" and container.issnl %} Portico: + {% elif k == "scholarsportal" and container.issnl %} + Scholars Portal: {% else %} {{ k }}: {% endif %} -- cgit v1.2.3 From 6244c06abf8488fff87b30cb0a8433592f1f5d24 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 8 Feb 2022 14:35:39 -0800 Subject: container search: iterate on SERP page (including stats) --- python/fatcat_web/search.py | 16 +++++- python/fatcat_web/templates/container_search.html | 3 +- python/fatcat_web/templates/entity_macros.html | 63 ++++++++++++++++++++--- 3 files changed, 72 insertions(+), 10 deletions(-) (limited to 'python') diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 8cbe09f6..2a3515d4 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -178,7 +178,7 @@ def do_container_search(query: GenericQuery, deep_page_limit: int = 2000) -> Sea search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_CONTAINER_INDEX"]) - search = search.query( + basic_query = Q( "query_string", query=query.q, default_operator="AND", @@ -188,6 +188,20 @@ def do_container_search(query: GenericQuery, deep_page_limit: int = 2000) -> Sea fields=["biblio"], ) + search = search.query( + "boosting", + positive=Q( + "bool", + must=basic_query, + should=[ + Q("range", releases_total={"gte": 500}), + Q("range", releases_total={"gte": 5000}), + ], + ), + negative=Q("term", releases_total=0), + negative_boost=0.5, + ) + # Sanity checks limit = min((int(query.limit or 25), 100)) offset = max((int(query.offset or 0), 0)) diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html index 0eeed55e..f091fc3b 100644 --- a/python/fatcat_web/templates/container_search.html +++ b/python/fatcat_web/templates/container_search.html @@ -34,9 +34,10 @@ {% if found.results %} {{ search_macros.top_results(query, found) }} +
{% for entity in found.results %} - {{ entity_macros.container_search_result_row(entity) }} + {{ entity_macros.container_search_result_row(entity, show_stats=True) }} {% endfor %} {% if found.results|length > 8 %} diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html index 9b419c41..b801f3c5 100644 --- a/python/fatcat_web/templates/entity_macros.html +++ b/python/fatcat_web/templates/entity_macros.html @@ -267,23 +267,70 @@ {% endmacro %} -{% macro container_search_result_row(entity) -%} +{% macro container_search_result_row(entity, show_stats=False) -%}
-

+ {% if show_stats %} +
+ {% if entity.releases_total %} + ~{{ "{:,}".format(entity.releases_total) }} releases + {% elif entity.releases_total == 0 %} + (not indexed) + {% endif %} + {% if entity.releases_total %} + {{ preservation_bar({'bright': entity.preservation_bright, 'dark': entity.preservation_dark, 'none': entity.preservation_none, 'total': entity.releases_total}) }} + {% endif %} +
+ {% endif %} +

{{ entity['name'] }} - {% if entity.is_oa %}{% endif %}

+ {% if entity.original_name %} + {{ entity.original_name }}
+ {% endif %} {% if entity.publisher %} -
{{ entity.publisher }}
+ {{ entity.publisher }} +
{% endif %} {% if entity.issnl %} - issn:{{ entity.issnl }} + issnl:{{ entity.issnl }}   + {% endif %} + {% if entity.wikidata_qid %} + wikidata:{{ entity.wikidata_qid }}   + {% endif %} + {% if entity.dblp_prefix %} + dblp:{{ entity.dblp_prefix }}   + {% endif %} + {% if entity.ia_sim_collection %} + [archive.org]   + {% elif entity.sim_pubid %} + [archive.org]   + {% endif %} + {# too much clutter + {% if entity.country_code %} + country:{{ entity.country_code }}   + {% endif %} + {% for lang in entity.languages %} + lang:{{ lang }}   + {% endfor %} + {% if entity.any_kbart %} + [KBART]   + {% endif %} +
+ #} + {% if entity.in_doaj %} + [DOAJ]   + {% endif %} + {% if entity.in_road %} + [ROAD]   + {% endif %} + {% if entity.is_oa and not (entity.in_doaj or entity.in_road) %} + [open-access]   {% endif %} - {% if entity.container_type %} -  {{ entity.container_type }} + {% if entity.container_type and entity.container_type != "journal" %} + [{{ entity.container_type }}]   {% endif %} {% if entity.publication_status and entity.publication_status != "active" %} -  {{ entity.publication_status }} + {{ entity.publication_status }}   {% endif %}
{% endmacro %} -- cgit v1.2.3 From 6976b6868cdf0628aa79d47aab4e889a9ccfc0dc Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 8 Feb 2022 19:02:34 -0800 Subject: container SERP: fix layout on mobile --- python/fatcat_web/templates/container_search.html | 7 +- python/fatcat_web/templates/entity_macros.html | 109 +++++++++++----------- 2 files changed, 62 insertions(+), 54 deletions(-) (limited to 'python') diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html index f091fc3b..0031a0a8 100644 --- a/python/fatcat_web/templates/container_search.html +++ b/python/fatcat_web/templates/container_search.html @@ -33,7 +33,12 @@ {% if found %} {% if found.results %} - {{ search_macros.top_results(query, found) }} +
+
+ Content Status +
+ {{ search_macros.top_results(query, found) }} +

{% for entity in found.results %} diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html index b801f3c5..6ad63a14 100644 --- a/python/fatcat_web/templates/entity_macros.html +++ b/python/fatcat_web/templates/entity_macros.html @@ -268,9 +268,63 @@ {% macro container_search_result_row(entity, show_stats=False) -%} -
+
+
+

+ {{ entity['name'] }} +

+ {% if entity.original_name %} + {{ entity.original_name }}
+ {% endif %} + {% if entity.publisher %} + {{ entity.publisher }} +
+ {% endif %} + {% if entity.issnl %} + issnl:{{ entity.issnl }}   + {% endif %} + {% if entity.wikidata_qid %} + wikidata:{{ entity.wikidata_qid }}   + {% endif %} + {% if entity.dblp_prefix %} + dblp:{{ entity.dblp_prefix }}   + {% endif %} + {% if entity.ia_sim_collection %} + [archive.org]   + {% elif entity.sim_pubid %} + [archive.org]   + {% endif %} + {# too much clutter + {% if entity.country_code %} + country:{{ entity.country_code }}   + {% endif %} + {% for lang in entity.languages %} + lang:{{ lang }}   + {% endfor %} + {% if entity.any_kbart %} + [KBART]   + {% endif %} +
+ #} + {% if entity.in_doaj %} + [DOAJ]   + {% endif %} + {% if entity.in_road %} + [ROAD]   + {% endif %} + {% if entity.is_oa and not (entity.in_doaj or entity.in_road) %} + [open-access]   + {% endif %} + {% if entity.container_type and entity.container_type != "journal" %} + [{{ entity.container_type }}]   + {% endif %} + {% if entity.publication_status and entity.publication_status != "active" %} + {{ entity.publication_status }}   + {% endif %} +
+ {% if show_stats %} -
+
{% if entity.releases_total %} ~{{ "{:,}".format(entity.releases_total) }} releases {% elif entity.releases_total == 0 %} @@ -281,57 +335,6 @@ {% endif %}
{% endif %} -

- {{ entity['name'] }} -

- {% if entity.original_name %} - {{ entity.original_name }}
- {% endif %} - {% if entity.publisher %} - {{ entity.publisher }} -
- {% endif %} - {% if entity.issnl %} - issnl:{{ entity.issnl }}   - {% endif %} - {% if entity.wikidata_qid %} - wikidata:{{ entity.wikidata_qid }}   - {% endif %} - {% if entity.dblp_prefix %} - dblp:{{ entity.dblp_prefix }}   - {% endif %} - {% if entity.ia_sim_collection %} - [archive.org]   - {% elif entity.sim_pubid %} - [archive.org]   - {% endif %} - {# too much clutter - {% if entity.country_code %} - country:{{ entity.country_code }}   - {% endif %} - {% for lang in entity.languages %} - lang:{{ lang }}   - {% endfor %} - {% if entity.any_kbart %} - [KBART]   - {% endif %} -
- #} - {% if entity.in_doaj %} - [DOAJ]   - {% endif %} - {% if entity.in_road %} - [ROAD]   - {% endif %} - {% if entity.is_oa and not (entity.in_doaj or entity.in_road) %} - [open-access]   - {% endif %} - {% if entity.container_type and entity.container_type != "journal" %} - [{{ entity.container_type }}]   - {% endif %} - {% if entity.publication_status and entity.publication_status != "active" %} - {{ entity.publication_status }}   - {% endif %}
{% endmacro %} -- cgit v1.2.3 From 9ab3cd54aa039393d294cdc85871353651c35576 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 9 Feb 2022 00:23:04 -0800 Subject: search: improve container_id handling --- python/fatcat_web/search.py | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) (limited to 'python') diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 2a3515d4..9e9376cc 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -44,16 +44,6 @@ class ReleaseQuery: query_str = args.get("q") or "*" - container_id = args.get("container_id") - # TODO: as filter, not in query string - if container_id: - query_str += ' container_id:"{}"'.format(container_id) - - # TODO: where are container_issnl queries actually used? - issnl = args.get("container_issnl") - if issnl and query_str: - query_str += ' container_issnl:"{}"'.format(issnl) - offset = args.get("offset", "0") offset = max(0, int(offset)) if offset.isnumeric() else 0 @@ -61,7 +51,7 @@ class ReleaseQuery: q=query_str, offset=offset, fulltext_only=bool(args.get("fulltext_only")), - container_id=container_id, + container_id=args.get("container_id"), recent=bool(args.get("recent")), exclude_stubs=bool(args.get("exclude_stubs")), ) @@ -263,6 +253,9 @@ def do_release_search(query: ReleaseQuery, deep_page_limit: int = 2000) -> Searc ], ) + if query.container_id: + search = search.filter("term", container_id=query.container_id) + search = search.query( "boosting", positive=Q( @@ -657,11 +650,7 @@ def get_elastic_preservation_by_year(query: ReleaseQuery) -> List[Dict[str, Any] "biblio", ], ) - if query.container_id: - search = search.filter( - "term", - container_id=query.container_id, - ) + search = search.filter("term", container_id=query.container_id) if query.exclude_stubs: search = search.query( "bool", @@ -923,17 +912,7 @@ def get_elastic_preservation_by_type(query: ReleaseQuery) -> List[dict]: ], ) if query.container_id: - search = search.query( - "bool", - filter=[ - Q( - "bool", - must=[ - Q("match", container_id=query.container_id), - ], - ), - ], - ) + search = search.filter("term", container_id=query.container_id) if query.recent: date_today = datetime.date.today() start_date = str(date_today - datetime.timedelta(days=60)) -- cgit v1.2.3 From 5bc77c47eed20676cd3db162c9675311f77c6cf9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 9 Feb 2022 00:24:46 -0800 Subject: web: move search-in-container to dedicated tab --- python/fatcat_web/routes.py | 46 ++++++++++++++ python/fatcat_web/templates/container_view.html | 5 +- .../templates/container_view_search.html | 70 ++++++++++++++++++++++ python/fatcat_web/templates/entity_base.html | 1 + 4 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 python/fatcat_web/templates/container_view_search.html (limited to 'python') diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 186166bd..b25cd37c 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -1079,6 +1079,52 @@ def coverage_search() -> AnyResponse: ) +@app.route("/container//search", methods=["GET", "POST"]) +def container_view_search(ident: str) -> AnyResponse: + entity = generic_get_entity("container", ident) + + if entity.state == "redirect": + return redirect(f"/container/{entity.redirect}") + elif entity.state == "deleted": + return render_template("deleted_entity.html", entity_type="container", entity=entity) + + if "q" not in request.args.keys(): + return render_template( + "container_view_search.html", + query=ReleaseQuery(), + found=None, + entity_type="container", + entity=entity, + editgroup_id=None, + ) + + query = ReleaseQuery.from_args(request.args) + query.container_id = ident + try: + found = do_release_search(query) + except FatcatSearchError as fse: + return ( + render_template( + "container_view_search.html", + query=query, + es_error=fse, + entity_type="container", + entity=entity, + editgroup_id=None, + ), + fse.status_code, + ) + + return render_template( + "container_view_search.html", + query=query, + found=found, + entity_type="container", + entity=entity, + editgroup_id=None, + ) + + def get_changelog_stats() -> Dict[str, Any]: stats = {} latest_changelog = api.get_changelog(limit=1)[0] diff --git a/python/fatcat_web/templates/container_view.html b/python/fatcat_web/templates/container_view.html index d88b44ab..abb31e06 100644 --- a/python/fatcat_web/templates/container_view.html +++ b/python/fatcat_web/templates/container_view.html @@ -24,12 +24,11 @@ {% endif %} {% if container.state == "active" %} -

Search Releases from this Container

-
+

Search Content

+
-
diff --git a/python/fatcat_web/templates/container_view_search.html b/python/fatcat_web/templates/container_view_search.html new file mode 100644 index 00000000..289c8dad --- /dev/null +++ b/python/fatcat_web/templates/container_view_search.html @@ -0,0 +1,70 @@ +{% set container = entity %} +{% set entity_view = "search" %} +{% set entity_type = "container" %} +{% import "entity_macros.html" as entity_macros %} +{% import "search_macros.html" as search_macros %} +{% extends "entity_base.html" %} + +{% block entity_main %} +
+ +
+

Search inside Container

+ +
+
+ + +
+
Can also search all releases. +
+ +
+ +
+{% if found %} + {% if found.results %} + + {{ search_macros.top_results(query, found) }} + + {% for paper in found.results %} + {{ entity_macros.release_search_result_row(paper) }} + {% endfor %} + + {% if found.results|length > 8 %} +
+
+ {{ search_macros.bottom_results(query, found, endpoint='release_search') }} +
+ {% endif %} + + {% else %} + + Raw query was: {{ query.q }} + +
+
+
+ confused paper man +
+
+

No results found!

+

You could try elsewhere:

+ +
+
+
+ + {% endif %} + +{% elif es_error %} + {{ search_macros.es_error_msg(es_error) }} +{% endif %} + +
+{% endblock %} + diff --git a/python/fatcat_web/templates/entity_base.html b/python/fatcat_web/templates/entity_base.html index c3d6096b..2782edd5 100644 --- a/python/fatcat_web/templates/entity_base.html +++ b/python/fatcat_web/templates/entity_base.html @@ -83,6 +83,7 @@ {{ entity_tab("overview", "Overview", "") }} {% if entity_type == "container" and entity.state == 'active' and not editgroup %} {{ entity_tab("coverage", "Preservation", "/coverage") }} + {{ entity_tab("search", "Search", "/search") }} {% elif entity_type == "release" and entity.state != 'deleted' %} {{ entity_tab("contribs", "Authors", "/contribs", entity._authors|count ) }} {% if entity.state == 'active' %} -- cgit v1.2.3 From d73ab1f7cc45c122f321f0e717de2067554baabb Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 9 Feb 2022 17:16:11 -0800 Subject: containers: initial work on 'browse' feature --- python/fatcat_web/routes.py | 11 +++- python/fatcat_web/search.py | 64 ++++++++++++++++++++++ .../templates/container_view_browse.html | 32 +++++++++++ python/fatcat_web/templates/entity_base.html | 1 + 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 python/fatcat_web/templates/container_view_browse.html (limited to 'python') diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index b25cd37c..9f46c674 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -60,6 +60,7 @@ from fatcat_web.search import ( ReleaseQuery, do_container_search, do_release_search, + get_elastic_container_browse_year_volume, get_elastic_container_histogram_legacy, get_elastic_container_preservation_by_volume, get_elastic_container_random_releases, @@ -286,6 +287,8 @@ def generic_entity_view(entity_type: str, ident: str, view_template: str) -> Any entity._type_preservation = get_elastic_preservation_by_type( ReleaseQuery(container_id=ident), ) + if view_template == "container_view_browse.html": + entity._browse_volume_year = get_elastic_container_browse_year_volume(entity.ident) return render_template( view_template, entity_type=entity_type, entity=entity, editgroup_id=None @@ -346,6 +349,12 @@ def container_view_coverage(ident: str) -> AnyResponse: return generic_entity_view("container", ident, "container_view_coverage.html") +@app.route("/container//browse", methods=["GET"]) +def container_view_browser(ident: str) -> AnyResponse: + # note: there is a special hack to add entity._type_preservation for this endpoint + return generic_entity_view("container", ident, "container_view_browse.html") + + @app.route("/container//metadata", methods=["GET"]) def container_view_metadata(ident: str) -> AnyResponse: return generic_entity_view("container", ident, "entity_view_metadata.html") @@ -1079,7 +1088,7 @@ def coverage_search() -> AnyResponse: ) -@app.route("/container//search", methods=["GET", "POST"]) +@app.route("/container//search", methods=["GET", "POST"]) def container_view_search(ident: str) -> AnyResponse: entity = generic_get_entity("container", ident) diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 9e9376cc..ac4dc34e 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -327,6 +327,70 @@ def get_elastic_container_random_releases(ident: str, limit: int = 5) -> List[Di return results +def get_elastic_container_browse_year_volume(ident: str) -> List[Dict[int, Any]]: + """ + Returns a set of histogram buckets: + + container_ident: str + years{} + volumes{} + """ + + search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"]) + search = search.query( + "bool", + filter=[Q("bool", must_not=[Q("match", release_type="stub")])], + ) + search = search.filter("term", container_id=ident) + search.aggs.bucket( + "year_volume", + "composite", + size=1500, + sources=[ + { + "year": { + "histogram": { + "field": "release_year", + "interval": 1, + "missing_bucket": True, + # TODO: es-public-proxy support? + # "order": "asc", + # "missing_order": "last", + }, + } + }, + { + "volume": { + "terms": { + "field": "volume", + "missing_bucket": True, + # TODO: es-public-proxy support? + # "order": "asc", + # "missing_order": "last", + }, + } + }, + ], + ) + search = search[:0] + search = search.params(request_cache=True) + resp = wrap_es_execution(search) + buckets = resp.aggregations.year_volume.buckets + # print(buckets) + buckets = [h for h in buckets if h["key"]["year"]] + year_nums = set([int(h["key"]["year"]) for h in buckets]) + year_dicts: Dict[int, Dict[str, Any]] = dict() + if year_nums: + for year in year_nums: + year_dicts[year] = {} + for row in buckets: + year_dicts[int(row["key"]["year"])][row["key"]["volume"] or "_unknown"] = int( + row["doc_count"] + ) + # return sorted(year_dicts.values(), key=lambda x: x["year"]) + return year_dicts + + def get_elastic_entity_stats() -> dict: """ TODO: files, filesets, webcaptures (no schema yet) diff --git a/python/fatcat_web/templates/container_view_browse.html b/python/fatcat_web/templates/container_view_browse.html new file mode 100644 index 00000000..b5691899 --- /dev/null +++ b/python/fatcat_web/templates/container_view_browse.html @@ -0,0 +1,32 @@ +{% set container = entity %} +{% set entity_view = "browse" %} +{% set entity_type = "container" %} +{% import "entity_macros.html" as entity_macros %} +{% extends "entity_base.html" %} + +{% block entity_main %} + +{% if entity._browse_volume_year %} +

Browse by Year and Volume

+
    +{% for year in entity._browse_volume_year.keys()|sort|reverse %} + {% for volume in entity._browse_volume_year[year].keys()|sort|reverse %} + {% if volume == '_unknown' %} +
  • {{ year }} ({{ entity._browse_volume_year[year][volume] }} releases) + {% else %} +
  • {{ year }} | Vol. {{ volume }} ({{ entity._browse_volume_year[year][volume] }} releases) + {% endif %} + {% endfor %} +{% endfor %} +
+{% elif entity._browse_issues %} +{% for issue in entity._browse_issues.keys()|sort|reverse %} +

{{ issue }}

+ {% for paper in entity._browse_issues[issue] %} + {{ paper.title }}
+ {% endfor %} +{% endfor %} +{% endif %} + +{% endblock %} + diff --git a/python/fatcat_web/templates/entity_base.html b/python/fatcat_web/templates/entity_base.html index 2782edd5..633f3aee 100644 --- a/python/fatcat_web/templates/entity_base.html +++ b/python/fatcat_web/templates/entity_base.html @@ -82,6 +82,7 @@