From 3d7d90fae8550b061c75de27bad5b9ed86bd3d92 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 15 Feb 2022 14:30:44 -0800 Subject: container browse: refactor count data structure to fix sorting --- python/fatcat_web/routes.py | 8 +-- python/fatcat_web/search.py | 64 +++++++++++++++++++--- .../templates/container_view_browse.html | 37 +++++++------ 3 files changed, 79 insertions(+), 30 deletions(-) (limited to 'python') diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index f8182679..0f847cca 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -393,10 +393,10 @@ def container_view_browse(ident: str) -> AnyResponse: query_string = f"year:{year}" query_sort = ["release_date"] else: - entity._browse_volume_year = get_elastic_container_browse_year_volume_issue( + entity._browse_year_volume_issue = get_elastic_container_browse_year_volume_issue( entity.ident ) - print(entity._browse_volume_year) + # print(entity._browse_year_volume_issue) return render_template( "container_view_browse.html", entity_type="container", @@ -404,7 +404,7 @@ def container_view_browse(ident: str) -> AnyResponse: editgroup_id=None, ) - print(query_string) + # print(query_string) query = ReleaseQuery( q=query_string, limit=200, @@ -1093,7 +1093,7 @@ def release_search() -> AnyResponse: container_found = None filter_only_query = True for p in request.args.get("q", "").split(): - if not ":" in p: + if ":" not in p: filter_only_query = False break if request.args.get("generic") and not filter_only_query: diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index e79fcd8d..7528c3d4 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -247,9 +247,39 @@ def get_elastic_container_random_releases(ident: str, limit: int = 5) -> List[Di return results -def get_elastic_container_browse_year_volume_issue(ident: str) -> Dict[int, Dict[str, Any]]: +def _sort_vol_key(val: Optional[str]) -> Tuple[bool, bool, int, str]: """ - Returns a set of histogram buckets, by year (int), volume (str), issue (str) + Helper for sorting volume and issue strings. Defined order is: + + - None values first + - any non-integers next, in non-integer order + - any integers next, in integer sorted order (ascending) + + Note that the actual sort used/displayed is reversed + """ + if val is None: + return (False, False, 0, "") + if val.isdigit(): + return (True, True, int(val), "") + else: + return (True, False, 0, val) + + +def get_elastic_container_browse_year_volume_issue(ident: str) -> List[Dict[str, Any]]: + """ + Returns a set of histogram buckets, as nested dicts/lists: + + [ + { year: int, + volumes: [ + { volume: str|None + issues: [ + { issue: str|None + count: int + } + ] } + ] } + ] """ search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"]) @@ -269,9 +299,6 @@ def get_elastic_container_browse_year_volume_issue(ident: str) -> Dict[int, Dict "field": "release_year", "interval": 1, "missing_bucket": True, - # TODO: es-public-proxy support? - # "order": "asc", - # "missing_order": "last", }, } }, @@ -306,12 +333,31 @@ def get_elastic_container_browse_year_volume_issue(ident: str) -> Dict[int, Dict year_dicts[year] = {} for row in buckets: year = int(row["key"]["year"]) - volume = row["key"]["volume"] or "000_unknown" - issue = row["key"]["issue"] or "000_unknown" - if not volume in year_dicts[year]: + volume = row["key"]["volume"] or "" + issue = row["key"]["issue"] or "" + if volume not in year_dicts[year]: year_dicts[year][volume] = {} year_dicts[year][volume][issue] = int(row["doc_count"]) - return year_dicts + + # transform to lists-of-dicts + year_list = [] + for year in year_dicts.keys(): + volume_list = [] + for volume in year_dicts[year].keys(): + issue_list = [] + for issue in year_dicts[year][volume].keys(): + issue_list.append( + dict(issue=issue or None, count=year_dicts[year][volume][issue]) + ) + issue_list = sorted( + issue_list, key=lambda x: _sort_vol_key(x["issue"]), reverse=True + ) + volume_list.append(dict(volume=volume or None, issues=issue_list)) + volume_list = sorted( + volume_list, key=lambda x: _sort_vol_key(x["volume"]), reverse=True + ) + year_list.append(dict(year=year, volumes=volume_list)) + return sorted(year_list, key=lambda x: x["year"], reverse=True) def get_elastic_entity_stats() -> dict: diff --git a/python/fatcat_web/templates/container_view_browse.html b/python/fatcat_web/templates/container_view_browse.html index a2ad251b..6ea06df8 100644 --- a/python/fatcat_web/templates/container_view_browse.html +++ b/python/fatcat_web/templates/container_view_browse.html @@ -17,36 +17,36 @@ {# NOTE: this section is pretty nested, with complex behavior; it could be hard to edit and understand #} {# TODO: these "sorts" are lexical, not numeric, which causes problems #} - {% for year in data.keys()|sort|reverse %} + {% for year in data %} {% set year_loop = loop %} - {% for volume in data[year].keys()|sort|reverse %} + {% for volume in year.volumes %} {% set volume_loop = loop %} - {% for issue in data[year][volume].keys()|sort|reverse %} + {% for issue in volume.issues %} {% set issue_loop = loop %} {% if volume_loop.first and issue_loop.first %} - {% set year_rowspan = data[year].values()|map('length')|sum %} + {% set year_rowspan = year.volumes|map(attribute='issues')|map('length')|sum %} - {{ year }} + {{ year.year }} {% endif %} {% if issue_loop.first %} - - {% if volume != '000_unknown' %} - Vol. {{ volume }} + + {% if volume.volume %} + Vol. {{ volume.volume }} {% endif %} {% endif %} - {% if issue != '000_unknown' %} - Issue {{ issue }} + {% if issue.issue %} + Issue {{ issue.issue }} {% endif %} - {{ "{:,}".format(data[year][volume][issue]) }} releases + {{ "{:,}".format(issue.count) }} releases {% endfor %} @@ -58,10 +58,13 @@ {% macro browse_releases(found) %}

- Browsing: - {% if request.args.volume %}Volume {{ request.args.volume }}  {% endif %} - {% if request.args.issue %}Issue {{ request.args.issue }}  {% endif %} - {% if request.args.year %}Year {{ request.args.year }}  {% endif %} + {% if request.args.volume %} + Volume {{ request.args.volume }} + {%- if request.args.issue %}, Issue {{ request.args.issue }}{% endif -%} + {%- if request.args.year %} ({{ request.args.year }}){% endif %} + {% else %} + Year {{ request.args.year }} + {% endif %}


{% for release_doc in found.results %} @@ -88,13 +91,13 @@ {% if releases_found %} {{ browse_releases(releases_found) }} -{% elif entity._browse_volume_year %} +{% elif entity._browse_year_volume_issue %}

Publications by Year, Volume, and Issue

This table includes content which does not have article-level metadata about volume or issue, but at least the year of publication must be known. "Stub" releases (eg, spam or duplicate DOIs) are not listed. - {{ browse_year_volume_issue_table(entity, entity._browse_volume_year) }} + {{ browse_year_volume_issue_table(entity, entity._browse_year_volume_issue) }}

{% endif %} -- cgit v1.2.3