diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2022-02-15 14:30:44 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2022-02-15 14:30:44 -0800 |
commit | 3d7d90fae8550b061c75de27bad5b9ed86bd3d92 (patch) | |
tree | d17eccad3d5c0c1bc918ebeb46d63edf7813f458 /python/fatcat_web | |
parent | 063be7d54099f4899ffa66f421b1a0e107646b3c (diff) | |
download | fatcat-3d7d90fae8550b061c75de27bad5b9ed86bd3d92.tar.gz fatcat-3d7d90fae8550b061c75de27bad5b9ed86bd3d92.zip |
container browse: refactor count data structure to fix sorting
Diffstat (limited to 'python/fatcat_web')
-rw-r--r-- | python/fatcat_web/routes.py | 8 | ||||
-rw-r--r-- | python/fatcat_web/search.py | 64 | ||||
-rw-r--r-- | python/fatcat_web/templates/container_view_browse.html | 37 |
3 files changed, 79 insertions, 30 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index f8182679..0f847cca 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -393,10 +393,10 @@ def container_view_browse(ident: str) -> AnyResponse: query_string = f"year:{year}" query_sort = ["release_date"] else: - entity._browse_volume_year = get_elastic_container_browse_year_volume_issue( + entity._browse_year_volume_issue = get_elastic_container_browse_year_volume_issue( entity.ident ) - print(entity._browse_volume_year) + # print(entity._browse_year_volume_issue) return render_template( "container_view_browse.html", entity_type="container", @@ -404,7 +404,7 @@ def container_view_browse(ident: str) -> AnyResponse: editgroup_id=None, ) - print(query_string) + # print(query_string) query = ReleaseQuery( q=query_string, limit=200, @@ -1093,7 +1093,7 @@ def release_search() -> AnyResponse: container_found = None filter_only_query = True for p in request.args.get("q", "").split(): - if not ":" in p: + if ":" not in p: filter_only_query = False break if request.args.get("generic") and not filter_only_query: diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index e79fcd8d..7528c3d4 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -247,9 +247,39 @@ def get_elastic_container_random_releases(ident: str, limit: int = 5) -> List[Di return results -def get_elastic_container_browse_year_volume_issue(ident: str) -> Dict[int, Dict[str, Any]]: +def _sort_vol_key(val: Optional[str]) -> Tuple[bool, bool, int, str]: """ - Returns a set of histogram buckets, by year (int), volume (str), issue (str) + Helper for sorting volume and issue strings. Defined order is: + + - None values first + - any non-integers next, in non-integer order + - any integers next, in integer sorted order (ascending) + + Note that the actual sort used/displayed is reversed + """ + if val is None: + return (False, False, 0, "") + if val.isdigit(): + return (True, True, int(val), "") + else: + return (True, False, 0, val) + + +def get_elastic_container_browse_year_volume_issue(ident: str) -> List[Dict[str, Any]]: + """ + Returns a set of histogram buckets, as nested dicts/lists: + + [ + { year: int, + volumes: [ + { volume: str|None + issues: [ + { issue: str|None + count: int + } + ] } + ] } + ] """ search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"]) @@ -269,9 +299,6 @@ def get_elastic_container_browse_year_volume_issue(ident: str) -> Dict[int, Dict "field": "release_year", "interval": 1, "missing_bucket": True, - # TODO: es-public-proxy support? - # "order": "asc", - # "missing_order": "last", }, } }, @@ -306,12 +333,31 @@ def get_elastic_container_browse_year_volume_issue(ident: str) -> Dict[int, Dict year_dicts[year] = {} for row in buckets: year = int(row["key"]["year"]) - volume = row["key"]["volume"] or "000_unknown" - issue = row["key"]["issue"] or "000_unknown" - if not volume in year_dicts[year]: + volume = row["key"]["volume"] or "" + issue = row["key"]["issue"] or "" + if volume not in year_dicts[year]: year_dicts[year][volume] = {} year_dicts[year][volume][issue] = int(row["doc_count"]) - return year_dicts + + # transform to lists-of-dicts + year_list = [] + for year in year_dicts.keys(): + volume_list = [] + for volume in year_dicts[year].keys(): + issue_list = [] + for issue in year_dicts[year][volume].keys(): + issue_list.append( + dict(issue=issue or None, count=year_dicts[year][volume][issue]) + ) + issue_list = sorted( + issue_list, key=lambda x: _sort_vol_key(x["issue"]), reverse=True + ) + volume_list.append(dict(volume=volume or None, issues=issue_list)) + volume_list = sorted( + volume_list, key=lambda x: _sort_vol_key(x["volume"]), reverse=True + ) + year_list.append(dict(year=year, volumes=volume_list)) + return sorted(year_list, key=lambda x: x["year"], reverse=True) def get_elastic_entity_stats() -> dict: diff --git a/python/fatcat_web/templates/container_view_browse.html b/python/fatcat_web/templates/container_view_browse.html index a2ad251b..6ea06df8 100644 --- a/python/fatcat_web/templates/container_view_browse.html +++ b/python/fatcat_web/templates/container_view_browse.html @@ -17,36 +17,36 @@ <tbody> {# NOTE: this section is pretty nested, with complex behavior; it could be hard to edit and understand #} {# TODO: these "sorts" are lexical, not numeric, which causes problems #} - {% for year in data.keys()|sort|reverse %} + {% for year in data %} {% set year_loop = loop %} - {% for volume in data[year].keys()|sort|reverse %} + {% for volume in year.volumes %} {% set volume_loop = loop %} - {% for issue in data[year][volume].keys()|sort|reverse %} + {% for issue in volume.issues %} {% set issue_loop = loop %} <tr> {% if volume_loop.first and issue_loop.first %} - {% set year_rowspan = data[year].values()|map('length')|sum %} + {% set year_rowspan = year.volumes|map(attribute='issues')|map('length')|sum %} <td rowspan="{{ year_rowspan }}" class="top aligned"> - <a href="/container/{{ entity.ident }}/browse?year={{ year }}">{{ year }}</a> + <a href="/container/{{ entity.ident }}/browse?year={{ year.year }}">{{ year.year }}</a> </td> {% endif %} {% if issue_loop.first %} - <td rowspan="{{ data[year][volume]|length }}" class="top aligned"> - {% if volume != '000_unknown' %} - <a href="/container/{{ entity.ident }}/browse?volume={{ volume }}">Vol. {{ volume }}</a> + <td rowspan="{{ volume.issues|length }}" class="top aligned"> + {% if volume.volume %} + <a href="/container/{{ entity.ident }}/browse?volume={{ volume.volume }}">Vol. {{ volume.volume }}</a> {% endif %} </td> {% endif %} <td> - {% if issue != '000_unknown' %} - <a href="/container/{{ entity.ident }}/browse?year={{ year }}{% if volume != '000_unknown' %}&volume={{ volume }}{% endif %}&issue={{ issue }}">Issue {{ issue }}</a> + {% if issue.issue %} + <a href="/container/{{ entity.ident }}/browse?year={{ year.year }}{% if volume.volume %}&volume={{ volume.volume }}{% endif %}&issue={{ issue.issue }}">Issue {{ issue.issue }}</a> {% endif %} </td> <td class="right aligned"> - <a href="/container/{{ entity.ident }}/browse?year={{ year }}{% if volume != '000_unknown' %}&volume={{ volume }}{% endif %}{% if issue != '000_unknown' %}&issue={{ issue }}{% endif %}">{{ "{:,}".format(data[year][volume][issue]) }} releases</a> + <a href="/container/{{ entity.ident }}/browse?year={{ year.year }}{% if volume.volume %}&volume={{ volume.volume }}{% endif %}{% if issue.issue %}&issue={{ issue.issue }}{% endif %}">{{ "{:,}".format(issue.count) }} releases</a> </td> </tr> {% endfor %} @@ -58,10 +58,13 @@ {% macro browse_releases(found) %} <h2> - Browsing: - {% if request.args.volume %}Volume {{ request.args.volume }} {% endif %} - {% if request.args.issue %}Issue {{ request.args.issue }} {% endif %} - {% if request.args.year %}Year {{ request.args.year }} {% endif %} + {% if request.args.volume %} + Volume {{ request.args.volume }} + {%- if request.args.issue %}, Issue {{ request.args.issue }}{% endif -%} + {%- if request.args.year %} ({{ request.args.year }}){% endif %} + {% else %} + Year {{ request.args.year }} + {% endif %} </h2> <br> {% for release_doc in found.results %} @@ -88,13 +91,13 @@ {% if releases_found %} {{ browse_releases(releases_found) }} -{% elif entity._browse_volume_year %} +{% elif entity._browse_year_volume_issue %} <div class="ui container text"> <h3>Publications by Year, Volume, and Issue</h3> <p>This table includes content which does not have article-level metadata about volume or issue, but at least the year of publication must be known. "Stub" releases (eg, spam or duplicate DOIs) are not listed. - {{ browse_year_volume_issue_table(entity, entity._browse_volume_year) }} + {{ browse_year_volume_issue_table(entity, entity._browse_year_volume_issue) }} </div> {% endif %} |