aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2022-03-10 00:08:11 +0000
committerbnewbold <bnewbold@archive.org>2022-03-10 00:08:11 +0000
commite4cbe43692a9c26911ea54ee88d7df0980e1d9fe (patch)
tree66090d73714ed043bf6eec608aebe136704320c7 /python
parent72e3825893ae614fcd6c6ae8a513745bfefe36b2 (diff)
parentf9b69d0b4343403ecf9318dc6d66725f6144edad (diff)
downloadfatcat-e4cbe43692a9c26911ea54ee88d7df0980e1d9fe.tar.gz
fatcat-e4cbe43692a9c26911ea54ee88d7df0980e1d9fe.zip
Merge branch 'bnewbold-container-web' into 'master'
container web interface improvements See merge request webgroup/fatcat!140
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/search/common.py96
-rw-r--r--python/fatcat_tools/search/stats.py87
-rw-r--r--python/fatcat_tools/workers/elasticsearch.py4
-rwxr-xr-xpython/fatcat_transform.py4
-rw-r--r--python/fatcat_web/routes.py171
-rw-r--r--python/fatcat_web/search.py343
-rw-r--r--python/fatcat_web/templates/container_search.html10
-rw-r--r--python/fatcat_web/templates/container_view.html137
-rw-r--r--python/fatcat_web/templates/container_view_browse.html124
-rw-r--r--python/fatcat_web/templates/container_view_coverage.html134
-rw-r--r--python/fatcat_web/templates/container_view_search.html70
-rw-r--r--python/fatcat_web/templates/entity_base.html6
-rw-r--r--python/fatcat_web/templates/entity_macros.html108
-rw-r--r--python/fatcat_web/templates/entity_view_metadata.html10
14 files changed, 965 insertions, 339 deletions
diff --git a/python/fatcat_tools/search/common.py b/python/fatcat_tools/search/common.py
new file mode 100644
index 00000000..584757fd
--- /dev/null
+++ b/python/fatcat_tools/search/common.py
@@ -0,0 +1,96 @@
+import sys
+from typing import Any, Dict, List, Union
+
+import elasticsearch
+import elasticsearch_dsl.response
+from elasticsearch_dsl import Search
+
+
+class FatcatSearchError(Exception):
+ def __init__(self, status_code: Union[int, str], name: str, description: str = None):
+ if status_code == "TIMEOUT":
+ status_code = 504
+ elif isinstance(status_code, str):
+ try:
+ status_code = int(status_code)
+ except ValueError:
+ status_code = 503
+ self.status_code = status_code
+ self.name = name
+ self.description = description
+
+
+def _hits_total_int(val: Any) -> int:
+ """
+ Compatibility hack between ES 6.x and 7.x. In ES 6x, total is returned as
+ an int in many places, in ES 7 as a dict (JSON object) with 'value' key
+ """
+ if isinstance(val, int):
+ return val
+ else:
+ return int(val["value"])
+
+
+def results_to_dict(response: elasticsearch_dsl.response.Response) -> List[dict]:
+ """
+ Takes a response returns all the hits as JSON objects.
+
+ Also handles surrogate strings that elasticsearch returns sometimes,
+ probably due to mangled data processing in some pipeline. "Crimes against
+ Unicode"; production workaround
+ """
+
+ results = []
+ for h in response:
+ r = h._d_
+ # print(h.meta._d_)
+ results.append(r)
+
+ for h in results:
+ for key in h:
+ if type(h[key]) is str:
+ h[key] = h[key].encode("utf8", "ignore").decode("utf8")
+ return results
+
+
+def wrap_es_execution(search: Search) -> Any:
+ """
+ Executes a Search object, and converts various ES error types into
+ something we can pretty print to the user.
+ """
+ try:
+ resp = search.execute()
+ except elasticsearch.exceptions.RequestError as e:
+ # this is a "user" error
+ print("elasticsearch 400: " + str(e.info), file=sys.stderr)
+ description = None
+ assert isinstance(e.info, dict)
+ if e.info.get("error", {}).get("root_cause", {}):
+ description = str(e.info["error"]["root_cause"][0].get("reason"))
+ raise FatcatSearchError(e.status_code, str(e.error), description)
+ except elasticsearch.exceptions.ConnectionError as e:
+ raise FatcatSearchError(e.status_code, "ConnectionError: search engine not available")
+ except elasticsearch.exceptions.TransportError as e:
+ # all other errors
+ print("elasticsearch non-200 status code: {}".format(e.info), file=sys.stderr)
+ description = None
+ assert isinstance(e.info, dict)
+ if e.info and e.info.get("error", {}).get("root_cause", {}):
+ description = str(e.info["error"]["root_cause"][0].get("reason"))
+ raise FatcatSearchError(e.status_code, str(e.error), description)
+ return resp
+
+
+def agg_to_dict(agg: Any) -> Dict[str, Any]:
+ """
+ Takes a simple term aggregation result (with buckets) and returns a simple
+ dict with keys as terms and counts as values. Includes an extra value
+ '_other', and by convention aggregations should be written to have "missing"
+ values as '_unknown'.
+ """
+ result = dict()
+ for bucket in agg.buckets:
+ result[bucket.key] = bucket.doc_count
+ if agg.sum_other_doc_count:
+ result["_other"] = agg.sum_other_doc_count
+ return result
diff --git a/python/fatcat_tools/search/stats.py b/python/fatcat_tools/search/stats.py
new file mode 100644
index 00000000..5496b94a
--- /dev/null
+++ b/python/fatcat_tools/search/stats.py
@@ -0,0 +1,87 @@
+from typing import Any, Dict
+
+import elasticsearch
+from elasticsearch_dsl import Search
+
+from fatcat_tools.search.common import _hits_total_int, agg_to_dict, wrap_es_execution
+
+
+def query_es_container_stats(
+ ident: str,
+ es_client: elasticsearch.Elasticsearch,
+ es_index: str = "fatcat_release",
+ merge_shadows: bool = False,
+) -> Dict[str, Any]:
+ """
+ Returns dict:
+ ident
+ total: count
+ in_web: count
+ in_kbart: count
+ is_preserved: count
+ preservation{}
+ "histogram" by preservation status
+ release_type{}
+ "histogram" by release type
+ """
+
+ search = Search(using=es_client, index=es_index)
+ search = search.query(
+ "term",
+ container_id=ident,
+ )
+ search.aggs.bucket(
+ "container_stats",
+ "filters",
+ filters={
+ "in_web": {
+ "term": {"in_web": True},
+ },
+ "in_kbart": {
+ "term": {"in_kbart": True},
+ },
+ "is_preserved": {
+ "term": {"is_preserved": True},
+ },
+ },
+ )
+ search.aggs.bucket(
+ "preservation",
+ "terms",
+ field="preservation",
+ missing="_unknown",
+ )
+ search.aggs.bucket(
+ "release_type",
+ "terms",
+ field="release_type",
+ missing="_unknown",
+ )
+
+ search = search[:0]
+
+ search = search.params(request_cache=True)
+ search = search.params(track_total_hits=True)
+ resp = wrap_es_execution(search)
+
+ container_stats = resp.aggregations.container_stats.buckets
+ preservation_bucket = agg_to_dict(resp.aggregations.preservation)
+ preservation_bucket["total"] = _hits_total_int(resp.hits.total)
+ for k in ("bright", "dark", "shadows_only", "none"):
+ if k not in preservation_bucket:
+ preservation_bucket[k] = 0
+ if merge_shadows:
+ preservation_bucket["none"] += preservation_bucket["shadows_only"]
+ preservation_bucket["shadows_only"] = 0
+ release_type_bucket = agg_to_dict(resp.aggregations.release_type)
+ stats = {
+ "ident": ident,
+ "total": _hits_total_int(resp.hits.total),
+ "in_web": container_stats["in_web"]["doc_count"],
+ "in_kbart": container_stats["in_kbart"]["doc_count"],
+ "is_preserved": container_stats["is_preserved"]["doc_count"],
+ "preservation": preservation_bucket,
+ "release_type": release_type_bucket,
+ }
+
+ return stats
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py
index bfadea64..79071810 100644
--- a/python/fatcat_tools/workers/elasticsearch.py
+++ b/python/fatcat_tools/workers/elasticsearch.py
@@ -14,13 +14,13 @@ from fatcat_openapi_client import (
)
from fatcat_tools import entity_from_json, public_api
+from fatcat_tools.search.stats import query_es_container_stats
from fatcat_tools.transforms import (
changelog_to_elasticsearch,
container_to_elasticsearch,
file_to_elasticsearch,
release_to_elasticsearch,
)
-from fatcat_web.search import get_elastic_container_stats
from .worker_common import FatcatWorker
@@ -156,7 +156,7 @@ class ElasticsearchReleaseWorker(FatcatWorker):
continue
if self.entity_type == ContainerEntity and self.query_stats:
- stats = get_elastic_container_stats(
+ stats = query_es_container_stats(
entity.ident,
es_client=es_client,
es_index=self.elasticsearch_release_index,
diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py
index 67bf56c5..4f28951c 100755
--- a/python/fatcat_transform.py
+++ b/python/fatcat_transform.py
@@ -13,6 +13,7 @@ import elasticsearch
from fatcat_openapi_client import ChangelogEntry, ContainerEntity, FileEntity, ReleaseEntity
from fatcat_tools import public_api
+from fatcat_tools.search.stats import query_es_container_stats
from fatcat_tools.transforms import (
changelog_to_elasticsearch,
citeproc_csl,
@@ -22,7 +23,6 @@ from fatcat_tools.transforms import (
release_to_csl,
release_to_elasticsearch,
)
-from fatcat_web.search import get_elastic_container_stats
def run_elasticsearch_releases(args: argparse.Namespace) -> None:
@@ -50,7 +50,7 @@ def run_elasticsearch_containers(args: argparse.Namespace) -> None:
if args.query_stats:
es_doc = container_to_elasticsearch(
entity,
- stats=get_elastic_container_stats(
+ stats=query_es_container_stats(
entity.ident,
es_client=es_client,
es_index=es_release_index,
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 186166bd..1f6602d4 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -30,6 +30,7 @@ from fatcat_tools.normal import (
clean_sha1,
clean_sha256,
)
+from fatcat_tools.search.common import FatcatSearchError
from fatcat_tools.transforms import citeproc_csl, release_to_csl
from fatcat_web import AnyResponse, Config, api, app, auth_api, mwoauth, priv_api
from fatcat_web.auth import (
@@ -55,11 +56,11 @@ from fatcat_web.graphics import (
)
from fatcat_web.kafka import kafka_pixy_produce
from fatcat_web.search import (
- FatcatSearchError,
GenericQuery,
ReleaseQuery,
do_container_search,
do_release_search,
+ get_elastic_container_browse_year_volume_issue,
get_elastic_container_histogram_legacy,
get_elastic_container_preservation_by_volume,
get_elastic_container_random_releases,
@@ -265,6 +266,8 @@ def work_lookup() -> AnyResponse:
### More Generic Entity Views ###############################################
+GENERIC_ENTITY_FIELDS = ["extra", "edit_extra", "revision", "redirect", "state", "ident"]
+
def generic_entity_view(entity_type: str, ident: str, view_template: str) -> AnyResponse:
entity = generic_get_entity(entity_type, ident)
@@ -275,7 +278,8 @@ def generic_entity_view(entity_type: str, ident: str, view_template: str) -> Any
return render_template("deleted_entity.html", entity_type=entity_type, entity=entity)
metadata = entity.to_dict()
- metadata.pop("extra")
+ for k in GENERIC_ENTITY_FIELDS:
+ metadata.pop(k)
entity._metadata = metadata
if view_template == "container_view.html":
@@ -298,7 +302,8 @@ def generic_entity_revision_view(
entity = generic_get_entity_revision(entity_type, revision_id)
metadata = entity.to_dict()
- metadata.pop("extra")
+ for k in GENERIC_ENTITY_FIELDS:
+ metadata.pop(k)
entity._metadata = metadata
return render_template(
@@ -322,7 +327,8 @@ def generic_editgroup_entity_view(
)
metadata = entity.to_dict()
- metadata.pop("extra")
+ for k in GENERIC_ENTITY_FIELDS:
+ metadata.pop(k)
entity._metadata = metadata
return render_template(
@@ -346,6 +352,106 @@ def container_view_coverage(ident: str) -> AnyResponse:
return generic_entity_view("container", ident, "container_view_coverage.html")
+@app.route("/container/<string(length=26):ident>/browse", methods=["GET"])
+def container_view_browse(ident: str) -> AnyResponse:
+ entity = generic_get_entity("container", ident)
+
+ if entity.state == "redirect":
+ return redirect(f"/container/{entity.redirect}")
+ elif entity.state == "deleted":
+ return render_template("deleted_entity.html", entity_type="container", entity=entity)
+
+ query_sort: Optional[List[str]]
+ if request.args.get("year") and "volume" in request.args and "issue" in request.args:
+ # year, volume, issue specified; browse-by-page
+ year = int(request.args["year"])
+ volume = request.args.get("volume", "")
+ issue = request.args.get("issue", "")
+ if volume:
+ volume = f'volume:"{volume}"'
+ else:
+ volume = "!volume:*"
+ if issue:
+ issue = f'issue:"{issue}"'
+ else:
+ issue = "!issue:*"
+ query_string = f"year:{year} {volume} {issue}"
+ query_sort = ["first_page", "pages", "release_date"]
+ elif request.args.get("year") and "volume" in request.args:
+ # year, volume specified (no issue); browse-by-page
+ year = int(request.args["year"])
+ volume = request.args.get("volume", "")
+ if volume:
+ volume = f'volume:"{volume}"'
+ else:
+ volume = "!volume:*"
+ query_string = f"year:{year} {volume}"
+ query_sort = ["issue", "first_page", "pages", "release_date"]
+ elif request.args.get("year"):
+ # year specified, not anything else; browse-by-date
+ year = int(request.args["year"])
+ query_string = f"year:{year}"
+ query_sort = ["release_date"]
+ elif request.args.get("volume"):
+ # volume specified, not anything else; browse-by-page
+ query_string = f'volume:{request.args["volume"]}'
+ query_sort = ["issue", "first_page", "pages", "release_date"]
+ else:
+ entity._browse_year_volume_issue = get_elastic_container_browse_year_volume_issue(
+ entity.ident
+ )
+ # print(entity._browse_year_volume_issue)
+ return render_template(
+ "container_view_browse.html",
+ entity_type="container",
+ entity=entity,
+ editgroup_id=None,
+ )
+
+ # print(query_string)
+ query = ReleaseQuery(
+ q=query_string,
+ limit=300,
+ offset=0,
+ container_id=ident,
+ fulltext_only=False,
+ recent=False,
+ exclude_stubs=True,
+ sort=query_sort,
+ )
+
+ try:
+ found = do_release_search(query)
+ except FatcatSearchError as fse:
+ return (
+ render_template(
+ "container_view_search.html",
+ query=query,
+ es_error=fse,
+ entity_type="container",
+ entity=entity,
+ editgroup_id=None,
+ ),
+ fse.status_code,
+ )
+
+ # HACK: re-sort by first page *numerically*
+ if found.results and query_sort and "first_page" in query_sort:
+ for doc in found.results:
+ if doc.get("first_page") and doc["first_page"].isdigit():
+ doc["first_page"] = int(doc["first_page"])
+ found.results = sorted(found.results, key=lambda d: d.get("first_page") or 99999999)
+
+ return render_template(
+ "container_view_browse.html",
+ query=query,
+ releases_found=found,
+ entity_type="container",
+ entity=entity,
+ editgroup_id=None,
+ )
+
+
@app.route("/container/<string(length=26):ident>/metadata", methods=["GET"])
def container_view_metadata(ident: str) -> AnyResponse:
return generic_entity_view("container", ident, "entity_view_metadata.html")
@@ -984,8 +1090,17 @@ def release_search() -> AnyResponse:
if "q" not in request.args.keys():
return render_template("release_search.html", query=ReleaseQuery(), found=None)
+ # if this is a "generic" query (eg, from front page or top-of page bar),
+ # and the query is not all filters/paramters (aka, there is an actual
+ # term/phrase in the query), then also try querying containers, and display
+ # a "were you looking for" box with a single result
container_found = None
- if request.args.get("generic"):
+ filter_only_query = True
+ for p in request.args.get("q", "").split():
+ if ":" not in p:
+ filter_only_query = False
+ break
+ if request.args.get("generic") and not filter_only_query:
container_query = GenericQuery.from_args(request.args)
container_query.limit = 1
try:
@@ -1079,6 +1194,52 @@ def coverage_search() -> AnyResponse:
)
+@app.route("/container/<string(length=26):ident>/search", methods=["GET", "POST"])
+def container_view_search(ident: str) -> AnyResponse:
+ entity = generic_get_entity("container", ident)
+
+ if entity.state == "redirect":
+ return redirect(f"/container/{entity.redirect}")
+ elif entity.state == "deleted":
+ return render_template("deleted_entity.html", entity_type="container", entity=entity)
+
+ if "q" not in request.args.keys():
+ return render_template(
+ "container_view_search.html",
+ query=ReleaseQuery(),
+ found=None,
+ entity_type="container",
+ entity=entity,
+ editgroup_id=None,
+ )
+
+ query = ReleaseQuery.from_args(request.args)
+ query.container_id = ident
+ try:
+ found = do_release_search(query)
+ except FatcatSearchError as fse:
+ return (
+ render_template(
+ "container_view_search.html",
+ query=query,
+ es_error=fse,
+ entity_type="container",
+ entity=entity,
+ editgroup_id=None,
+ ),
+ fse.status_code,
+ )
+
+ return render_template(
+ "container_view_search.html",
+ query=query,
+ found=found,
+ entity_type="container",
+ entity=entity,
+ editgroup_id=None,
+ )
+
+
def get_changelog_stats() -> Dict[str, Any]:
stats = {}
latest_changelog = api.get_changelog(limit=1)[0]
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index 8cbe09f6..913d6696 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -4,31 +4,22 @@ the formal API)
"""
import datetime
-import sys
from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple
import elasticsearch
-import elasticsearch_dsl.response
from elasticsearch_dsl import Q, Search
+from fatcat_tools.search.common import (
+ _hits_total_int,
+ agg_to_dict,
+ results_to_dict,
+ wrap_es_execution,
+)
+from fatcat_tools.search.stats import query_es_container_stats
from fatcat_web import app
-class FatcatSearchError(Exception):
- def __init__(self, status_code: Union[int, str], name: str, description: str = None):
- if status_code == "TIMEOUT":
- status_code = 504
- elif isinstance(status_code, str):
- try:
- status_code = int(status_code)
- except ValueError:
- status_code = 503
- self.status_code = status_code
- self.name = name
- self.description = description
-
-
@dataclass
class ReleaseQuery:
q: Optional[str] = None
@@ -38,22 +29,13 @@ class ReleaseQuery:
container_id: Optional[str] = None
recent: bool = False
exclude_stubs: bool = False
+ sort: Optional[List[str]] = None
@staticmethod
def from_args(args: Dict[str, Any]) -> "ReleaseQuery":
query_str = args.get("q") or "*"
- container_id = args.get("container_id")
- # TODO: as filter, not in query string
- if container_id:
- query_str += ' container_id:"{}"'.format(container_id)
-
- # TODO: where are container_issnl queries actually used?
- issnl = args.get("container_issnl")
- if issnl and query_str:
- query_str += ' container_issnl:"{}"'.format(issnl)
-
offset = args.get("offset", "0")
offset = max(0, int(offset)) if offset.isnumeric() else 0
@@ -61,9 +43,10 @@ class ReleaseQuery:
q=query_str,
offset=offset,
fulltext_only=bool(args.get("fulltext_only")),
- container_id=container_id,
+ container_id=args.get("container_id"),
recent=bool(args.get("recent")),
exclude_stubs=bool(args.get("exclude_stubs")),
+ sort=None,
)
@@ -98,87 +81,11 @@ class SearchHits:
results: List[Any]
-def _hits_total_int(val: Any) -> int:
- """
- Compatibility hack between ES 6.x and 7.x. In ES 6x, total is returned as
- an int in many places, in ES 7 as a dict (JSON object) with 'value' key
- """
- if isinstance(val, int):
- return val
- else:
- return int(val["value"])
-
-
-def results_to_dict(response: elasticsearch_dsl.response.Response) -> List[dict]:
- """
- Takes a response returns all the hits as JSON objects.
-
- Also handles surrogate strings that elasticsearch returns sometimes,
- probably due to mangled data processing in some pipeline. "Crimes against
- Unicode"; production workaround
- """
-
- results = []
- for h in response:
- r = h._d_
- # print(h.meta._d_)
- results.append(r)
-
- for h in results:
- for key in h:
- if type(h[key]) is str:
- h[key] = h[key].encode("utf8", "ignore").decode("utf8")
- return results
-
-
-def wrap_es_execution(search: Search) -> Any:
- """
- Executes a Search object, and converts various ES error types into
- something we can pretty print to the user.
- """
- try:
- resp = search.execute()
- except elasticsearch.exceptions.RequestError as e:
- # this is a "user" error
- print("elasticsearch 400: " + str(e.info), file=sys.stderr)
- description = None
- assert isinstance(e.info, dict)
- if e.info.get("error", {}).get("root_cause", {}):
- description = str(e.info["error"]["root_cause"][0].get("reason"))
- raise FatcatSearchError(e.status_code, str(e.error), description)
- except elasticsearch.exceptions.ConnectionError as e:
- raise FatcatSearchError(e.status_code, "ConnectionError: search engine not available")
- except elasticsearch.exceptions.TransportError as e:
- # all other errors
- print("elasticsearch non-200 status code: {}".format(e.info), file=sys.stderr)
- description = None
- assert isinstance(e.info, dict)
- if e.info and e.info.get("error", {}).get("root_cause", {}):
- description = str(e.info["error"]["root_cause"][0].get("reason"))
- raise FatcatSearchError(e.status_code, str(e.error), description)
- return resp
-
-
-def agg_to_dict(agg: Any) -> Dict[str, Any]:
- """
- Takes a simple term aggregation result (with buckets) and returns a simple
- dict with keys as terms and counts as values. Includes an extra value
- '_other', and by convention aggregations should be written to have "missing"
- values as '_unknown'.
- """
- result = dict()
- for bucket in agg.buckets:
- result[bucket.key] = bucket.doc_count
- if agg.sum_other_doc_count:
- result["_other"] = agg.sum_other_doc_count
- return result
-
-
def do_container_search(query: GenericQuery, deep_page_limit: int = 2000) -> SearchHits:
search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_CONTAINER_INDEX"])
- search = search.query(
+ basic_query = Q(
"query_string",
query=query.q,
default_operator="AND",
@@ -188,8 +95,22 @@ def do_container_search(query: GenericQuery, deep_page_limit: int = 2000) -> Sea
fields=["biblio"],
)
+ search = search.query(
+ "boosting",
+ positive=Q(
+ "bool",
+ must=basic_query,
+ should=[
+ Q("range", releases_total={"gte": 500}),
+ Q("range", releases_total={"gte": 5000}),
+ ],
+ ),
+ negative=Q("term", releases_total=0),
+ negative_boost=0.5,
+ )
+
# Sanity checks
- limit = min((int(query.limit or 25), 100))
+ limit = min((int(query.limit or 25), 300))
offset = max((int(query.offset or 0), 0))
if offset > deep_page_limit:
# Avoid deep paging problem.
@@ -249,6 +170,9 @@ def do_release_search(query: ReleaseQuery, deep_page_limit: int = 2000) -> Searc
],
)
+ if query.container_id:
+ search = search.filter("term", container_id=query.container_id)
+
search = search.query(
"boosting",
positive=Q(
@@ -260,8 +184,11 @@ def do_release_search(query: ReleaseQuery, deep_page_limit: int = 2000) -> Searc
negative_boost=0.5,
)
+ if query.sort:
+ search = search.sort(*query.sort)
+
# Sanity checks
- limit = min((int(query.limit or 25), 100))
+ limit = min((int(query.limit or 25), 300))
offset = max((int(query.offset or 0), 0))
if offset > deep_page_limit:
# Avoid deep paging problem.
@@ -320,6 +247,122 @@ def get_elastic_container_random_releases(ident: str, limit: int = 5) -> List[Di
return results
+def _sort_vol_key(val: Optional[Any]) -> Tuple[bool, bool, int, str]:
+ """
+ Helper for sorting volume and issue strings. Defined order is:
+
+ - None values first
+ - any non-integers next, in non-integer order
+ - any integers next, in integer sorted order (ascending)
+
+ Note that the actual sort used/displayed is reversed.
+
+ TODO: 'val' should actually be Optional[str], but getting a mypy error I
+ don't know how to hack around quickly right now.
+ """
+ if val is None:
+ return (False, False, 0, "")
+ if val.isdigit():
+ return (True, True, int(val), "")
+ else:
+ return (True, False, 0, val)
+
+
+def get_elastic_container_browse_year_volume_issue(ident: str) -> List[Dict[str, Any]]:
+ """
+ Returns a set of histogram buckets, as nested dicts/lists:
+
+ [
+ { year: int,
+ volumes: [
+ { volume: str|None
+ issues: [
+ { issue: str|None
+ count: int
+ }
+ ] }
+ ] }
+ ]
+ """
+
+ search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"])
+ search = search.query(
+ "bool",
+ filter=[Q("bool", must_not=[Q("match", release_type="stub")])],
+ )
+ search = search.filter("term", container_id=ident)
+ search.aggs.bucket(
+ "year_volume",
+ "composite",
+ size=1500,
+ sources=[
+ {
+ "year": {
+ "histogram": {
+ "field": "release_year",
+ "interval": 1,
+ "missing_bucket": True,
+ },
+ }
+ },
+ {
+ "volume": {
+ "terms": {
+ "field": "volume",
+ "missing_bucket": True,
+ },
+ }
+ },
+ {
+ "issue": {
+ "terms": {
+ "field": "issue",
+ "missing_bucket": True,
+ },
+ }
+ },
+ ],
+ )
+ search = search[:0]
+ search = search.params(request_cache=True)
+ resp = wrap_es_execution(search)
+ buckets = resp.aggregations.year_volume.buckets
+ # print(buckets)
+ buckets = [h for h in buckets if h["key"]["year"]]
+ year_nums = set([int(h["key"]["year"]) for h in buckets])
+ year_dicts: Dict[int, Dict[str, Any]] = dict()
+ if year_nums:
+ for year in year_nums:
+ year_dicts[year] = {}
+ for row in buckets:
+ year = int(row["key"]["year"])
+ volume = row["key"]["volume"] or ""
+ issue = row["key"]["issue"] or ""
+ if volume not in year_dicts[year]:
+ year_dicts[year][volume] = {}
+ year_dicts[year][volume][issue] = int(row["doc_count"])
+
+ # transform to lists-of-dicts
+ year_list = []
+ for year in year_dicts.keys():
+ volume_list = []
+ for volume in year_dicts[year].keys():
+ issue_list = []
+ for issue in year_dicts[year][volume].keys():
+ issue_list.append(
+ dict(issue=issue or None, count=year_dicts[year][volume][issue])
+ )
+ issue_list = sorted(
+ issue_list, key=lambda x: _sort_vol_key(x["issue"]), reverse=True
+ )
+ volume_list.append(dict(volume=volume or None, issues=issue_list))
+ volume_list = sorted(
+ volume_list, key=lambda x: _sort_vol_key(x["volume"]), reverse=True
+ )
+ year_list.append(dict(year=year, volumes=volume_list))
+ return sorted(year_list, key=lambda x: x["year"], reverse=True)
+
+
def get_elastic_entity_stats() -> dict:
"""
TODO: files, filesets, webcaptures (no schema yet)
@@ -465,6 +508,9 @@ def get_elastic_container_stats(
merge_shadows: Optional[bool] = None,
) -> Dict[str, Any]:
"""
+ This is a DEPRECATED backwards-compatability wrapper around the new
+ query_es_container_stats() method from fatcat_tools.
+
Returns dict:
ident
issnl (optional)
@@ -485,66 +531,13 @@ def get_elastic_container_stats(
if merge_shadows is None:
merge_shadows = app.config["FATCAT_MERGE_SHADOW_PRESERVATION"]
- search = Search(using=es_client, index=es_index)
- search = search.query(
- "term",
- container_id=ident,
- )
- search.aggs.bucket(
- "container_stats",
- "filters",
- filters={
- "in_web": {
- "term": {"in_web": True},
- },
- "in_kbart": {
- "term": {"in_kbart": True},
- },
- "is_preserved": {
- "term": {"is_preserved": True},
- },
- },
+ stats = query_es_container_stats(
+ ident=ident,
+ es_client=es_client,
+ es_index=es_index,
+ merge_shadows=merge_shadows,
)
- search.aggs.bucket(
- "preservation",
- "terms",
- field="preservation",
- missing="_unknown",
- )
- search.aggs.bucket(
- "release_type",
- "terms",
- field="release_type",
- missing="_unknown",
- )
-
- search = search[:0]
-
- search = search.params(request_cache=True)
- search = search.params(track_total_hits=True)
- resp = wrap_es_execution(search)
-
- container_stats = resp.aggregations.container_stats.buckets
- preservation_bucket = agg_to_dict(resp.aggregations.preservation)
- preservation_bucket["total"] = _hits_total_int(resp.hits.total)
- for k in ("bright", "dark", "shadows_only", "none"):
- if k not in preservation_bucket:
- preservation_bucket[k] = 0
- if merge_shadows:
- preservation_bucket["none"] += preservation_bucket["shadows_only"]
- preservation_bucket["shadows_only"] = 0
- release_type_bucket = agg_to_dict(resp.aggregations.release_type)
- stats = {
- "ident": ident,
- "issnl": issnl,
- "total": _hits_total_int(resp.hits.total),
- "in_web": container_stats["in_web"]["doc_count"],
- "in_kbart": container_stats["in_kbart"]["doc_count"],
- "is_preserved": container_stats["is_preserved"]["doc_count"],
- "preservation": preservation_bucket,
- "release_type": release_type_bucket,
- }
-
+ stats["issnl"] = issnl
return stats
@@ -643,11 +636,7 @@ def get_elastic_preservation_by_year(query: ReleaseQuery) -> List[Dict[str, Any]
"biblio",
],
)
- if query.container_id:
- search = search.filter(
- "term",
- container_id=query.container_id,
- )
+ search = search.filter("term", container_id=query.container_id)
if query.exclude_stubs:
search = search.query(
"bool",
@@ -909,17 +898,7 @@ def get_elastic_preservation_by_type(query: ReleaseQuery) -> List[dict]:
],
)
if query.container_id:
- search = search.query(
- "bool",
- filter=[
- Q(
- "bool",
- must=[
- Q("match", container_id=query.container_id),
- ],
- ),
- ],
- )
+ search = search.filter("term", container_id=query.container_id)
if query.recent:
date_today = datetime.date.today()
start_date = str(date_today - datetime.timedelta(days=60))
diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html
index 0eeed55e..0031a0a8 100644
--- a/python/fatcat_web/templates/container_search.html
+++ b/python/fatcat_web/templates/container_search.html
@@ -33,10 +33,16 @@
{% if found %}
{% if found.results %}
- {{ search_macros.top_results(query, found) }}
+ <div>
+ <div class="mobile-hide" style="float: right; margin-left: 0.5em; margin-top:0px; width: 150px; text-align: right;">
+ <i>Content Status</i>
+ </div>
+ {{ search_macros.top_results(query, found) }}
+ </div>
+ <br clear="all">
{% for entity in found.results %}
- {{ entity_macros.container_search_result_row(entity) }}
+ {{ entity_macros.container_search_result_row(entity, show_stats=True) }}
{% endfor %}
{% if found.results|length > 8 %}
diff --git a/python/fatcat_web/templates/container_view.html b/python/fatcat_web/templates/container_view.html
index c5f68367..0432a12d 100644
--- a/python/fatcat_web/templates/container_view.html
+++ b/python/fatcat_web/templates/container_view.html
@@ -8,42 +8,47 @@
<div class="ui stackable mobile reversed grid centered">
<div class="column" style="font-size: 16px; flex: 1;">
-{% if (container.extra != None) and (container.extra['urls'] != None) and (container.extra['urls']|length > 0) %}
- <div style="text-overflow: ellipsis; overflow: hidden; width: 100%;">
- <a href="{{ container.extra['urls'][0] }}"><code>{{ container.extra['urls'][0] }}</code></a>
- </div>
-{% elif (container.extra != None) and (container.extra['webarchive_urls'] != None) and (container.extra['webarchive_urls']|length > 0) %}
-{# fallback to a webarchive URL if no valid/live homepage URL #}
- <div style="text-overflow: ellipsis; overflow: hidden; width: 100%;">
- <a href="{{ container.extra['webarchive_urls'][0] }}"><code>{{ container.extra['webarchive_urls'][0] }}</code></a>
- </div>
-{% endif %}
-<p>
-{% if container.publisher != None %}
- Published by {{ container.publisher }}
+{% if container.container_type %}
+ {% set pub_type_phrase = container.container_type %}
+{% else %}
+ {% set pub_type_phrase = '"container" (a publication venue)' %}
{% endif %}
-{% if container.state == "active" %}
-<h3>Search Releases from this Container</h3>
-<form class="" role="search" action="/release/search" method="get">
+{% if container.state == 'active' %}
+<form class="" role="search" action="/container/{{ container.ident }}/search" method="get">
<div class="ui form">
<div class="ui action input large fluid">
<input type="text" placeholder="Search Articles..." name="q" aria-label="search release metadata">
- <input type="hidden" name="container_id" value="{{ container.ident }}">
<button class="ui button">Search</button>
</div>
</div>
</form>
{% endif %}
+{% if (container.extra != None) and (container.extra['urls'] or container.extra['webarchive_urls']) %}
+ <h3 class="ui dividing header">Homepage URLs</h3>
+ <table class="ui very basic compact small single line fixed table">
+ <tbody>
+ {% for url in container.extra['urls'] or [] %}
+ <tr><td><a href="{{ url }}" rel="nofollow"><code>{{ url }}</code></a></td>
+ {% endfor %}
+ {% for url in container.extra['webarchive_urls'] or [] %}
+ <tr><td><a href="{{ url }}" rel="nofollow"><code>{{ url }}</code></a></td>
+ {% endfor %}
+ </tbody>
+ </table>
+{% endif %}
+
{% if container._random_releases %}
-<h3>Example Publications</h3>
+<h3 class="ui dividing header">Example Publications</h3>
{% for random_release in container._random_releases %}
{{ entity_macros.release_search_result_row(random_release) }}
{% endfor %}
{% endif %}
</div>
+
+{# start right-hand entity column #}
<div class="column" style="flex: 0 0 24em;">
{% if container._stats %}
@@ -55,37 +60,45 @@
</div>
</div>
</div>
+{% endif %}
{% if container._es and container._es.is_oa == True %}
<div class="ui segment center aligned attached">
<i class="icon unlock large orange"></i><b>Open Access Publication</b>
</div>
+{% endif %}
+{% if container._stats and container._stats.total >= 1 %}
+<div class="ui segment attached">
+ <b>Preservation Summary <a href="/container/{{ container.ident }}/coverage">[more]</a></b><br>
+ {{ entity_macros.preservation_bar(container._stats.preservation) }}
+ {{ entity_macros.preservation_small_table(container._stats.preservation) }}
+</div>
{% endif %}
-{% if container._stats.total >= 1 %}
- <div class="ui segment attached">
- <b>Preservation Status</b><br>
- {{ entity_macros.preservation_bar(container._stats.preservation) }}
- {{ entity_macros.preservation_small_table(container._stats.preservation) }}
- </div>
- <div class="ui segment attached">
- <b>Work Types</b><br>
- <table class="ui very basic very compact collapsing table">
+
+{% if container._stats and container._stats.total >= 1 %}
+<div class="ui segment attached">
+ <b>Release Types</b><br>
+ {% if container._stats.total >= 1 %}
+ <table class="ui very basic very compact collapsing table" style="margin-top: 0px;">
<tbody>
{% for type_row in container._stats.release_type %}
<tr>
<td class="three wide">
+ {% if type_row == "_unknown" %}<i>unknown-type</i>{% else %}<code>{{ type_row }}</code>{% endif %}
+ <td class="three wide right aligned">
{% if type_row == "_unknown" %}
- <i>Unknown</i>
+ <a href="/container/{{ container.ident }}/search?q=!release_type:*">
{% else %}
- <code>{{ type_row }}</code>
+ <a href="/container/{{ container.ident }}/search?q=release_type:{{ type_row }}">
{% endif %}
- <td class="three wide right aligned">{{ "{:,}".format(container._stats.release_type[type_row]) }}
+ {{ "{:,}".format(container._stats.release_type[type_row]) }}
+ </a>
{% endfor %}
</tbody>
</table>
- </div>
-{% endif %}
+ {% endif %}
+</div>
{% endif %}
{% if container.container_type != None or container.publication_status != None %}
@@ -96,6 +109,9 @@
{% if container.publication_status != None %}
<b>Publication Status</b> &nbsp;<code>{{ container.publication_status or 'unknown' }}</code><br>
{% endif %}
+ {% if container.extra and container.extra.country %}
+ <b>Country Code</b> &nbsp;<code>{{ container.extra.country }}</code><br>
+ {% endif %}
</div>
{% endif %}
@@ -103,17 +119,22 @@
<div class="ui segment attached">
{% if container.issnl != None %}
<b>ISSN-L<sup><a href="https://en.wikipedia.org/wiki/International_Standard_Serial_Number#Linking_ISSN">?</a></sup></b>
- &nbsp;<code>{{ container.issnl }}</code>
+ &nbsp;<code><a href="https://portal.issn.org/resource/issn/{{ container.issnl }}">{{ container.issnl }}</a></code><br>
{% endif %}
{% if container.issnp or (container.extra != None and container.extra.issnp != None and (container.extra.issnp|length > 0)) %}
- <br><i class="icon paperclip"></i>Print: &nbsp;<code>{{ container.issnp or container.extra.issnp }}</code>
+ &nbsp; &nbsp; <b>Print:</b> &nbsp;<code>{{ container.issnp or container.extra.issnp }}</code><br>
{% endif %}
{% if container.issne or (container.extra != None and container.extra.issne != None and (container.extra.issne|length > 0)) %}
- <br><i class="icon plug"></i>Electronic: &nbsp;<code>{{ container.issne or container.extra.issne }}</code>
+ &nbsp; &nbsp; <b>Electronic:</b> &nbsp;<code>{{ container.issne or container.extra.issne }}</code><br>
{% endif %}
- <br>
{% if container.wikidata_qid != None %}
- <b>Wikidata</b> &nbsp;<a href="https://wikidata.org/wiki/{{ container.wikidata_qid }}"><code>{{ container.wikidata_qid }}</code></a>
+ <b>Wikidata</b> &nbsp;<a href="https://wikidata.org/wiki/{{ container.wikidata_qid }}"><code>{{ container.wikidata_qid }}</code></a><br>
+ {% endif %}
+ {% if container.extra and container.extra.dblp %}
+ <b>dblp</b> &nbsp;<a href="https://dblp.org/{{ container.extra.dblp.prefix }}"><code>{{ container.extra.dblp.prefix }}</code></a><br>
+ {% endif %}
+ {% if container.extra and container.extra.ia and container.extra.ia.sim %}
+ <b>archive.org</b> &nbsp;<a href="https://archive.org/search.php?query=sim_pubid%3A{{ container.extra.ia.sim.sim_pubid }}%20mediatype%3Acollection"><code>sim_pubid:{{ container.extra.ia.sim.sim_pubid }}</code></a><br>
{% endif %}
</div>
{% endif %}
@@ -129,49 +150,11 @@
{% if container._es.in_road == True %}
<i class="icon check green"></i> In <a href="http://road.issn.org/issn/{{ container.issnl }}">ISSN ROAD</a><br>
- {% elif container._es.in_road == False %}
- <i class="icon times grey"></i> Not in <a href="https://road.issn.org">ISSN ROAD</a><br>
{% endif %}
- {% if container._es.any_kbart == True %}
- <i class="icon check green"></i> In <a href="https://keepers.issn.org/?q=api/search&search[]=MUST=allissn={{ container.issnl }}&search[]=MUST_EXIST=keepers">Keepers Registery</a>
- <br>
- {% elif container._es.any_kbart == False %}
- <i class="icon times grey"></i> Not in <a href="https://keepers.issn.org/?q=api/search&search[]=MUST=allissn={{ container.issnl }}&search[]=MUST_EXIST=keepers">Keepers Registry</a><br>
+ {% if container.extra and container.extra.szczepanski %}
+ <i class="icon check green"></i> In <a href="https://www.ebsco.com/open-access/szczepanski-list">Szczepanski List</a><br>
{% endif %}
-
- {% if container.extra and container.extra.sherpa_romeo and container.extra.sherpa_romeo.color %}
- SHERPA/RoMEO color: <code>{{ container.extra.sherpa_romeo.color }}</code>
- {% endif %}
-</div>
-{% endif %}
-
-{%- if container.extra and container.extra.kbart %}
-<div class="ui segment attached">
-<b>Preservation Holdings</b><br>
- {% for k, v in container.extra.kbart.items() %}
- {% if k == "lockss" %}
- <a href="https://www.lockss.org/">LOCKSS</a>:
- {% elif k == "clockss" %}
- <a href="https://clockss.org/">CLOCKSS</a>:
- {% elif k == "hathitrust" and container.issnl %}
- <a href="https://catalog.hathitrust.org/Search/Home?lookfor={{ container.issnl }}&searchtype=isn&ft=&setft=false">HathiTrust</a>:
- {% elif k == "portico" and container.issnl %}
- <a href="https://www.portico.org/coverage/titles/?keyword={{ container.issnl }}">Portico</a>:
- {% else %}
- <span style="text-transform: uppercase;">{{ k }}:</span>
- {% endif %}
- years
- {% for span in v.year_spans %}
- {% if span|length >= 2 -%}
- {{ span[0] }}-{{ span[1] -}}
- {% elif span|length == 1 -%}
- {{ span[0] -}}
- {% endif -%}
- {{- ", " if not loop.last }}
- {% endfor %}
- <br>
- {% endfor %}
</div>
{% endif %}
diff --git a/python/fatcat_web/templates/container_view_browse.html b/python/fatcat_web/templates/container_view_browse.html
new file mode 100644
index 00000000..d16502d0
--- /dev/null
+++ b/python/fatcat_web/templates/container_view_browse.html
@@ -0,0 +1,124 @@
+{% set container = entity %}
+{% set entity_view = "browse" %}
+{% set entity_type = "container" %}
+{% import "entity_macros.html" as entity_macros %}
+{% extends "entity_base.html" %}
+
+{% macro browse_year_volume_issue_table(entity, data) %}
+<table class="ui basic compact structured table">
+ <thead>
+ <tr>
+ <th>Year
+ <th>Volume
+ <th>Issue
+ <th class="right aligned">Indexed Content
+ </tr>
+ </thead>
+ <tbody>
+ {# NOTE: this section is pretty nested, with complex behavior; it could be hard to edit and understand #}
+ {# TODO: these "sorts" are lexical, not numeric, which causes problems #}
+ {% for year in data %}
+ {% set year_loop = loop %}
+ {% for volume in year.volumes %}
+ {% set volume_loop = loop %}
+ {% for issue in volume.issues %}
+ {% set issue_loop = loop %}
+ <tr>
+ {% if volume_loop.first and issue_loop.first %}
+ {% set year_rowspan = year.volumes|map(attribute='issues')|map('length')|sum %}
+ <td rowspan="{{ year_rowspan }}" class="top aligned">
+ <a href="/container/{{ entity.ident }}/browse?year={{ year.year }}">{{ year.year }}</a>
+ </td>
+ {% endif %}
+
+ {% if issue_loop.first %}
+ <td rowspan="{{ volume.issues|length }}" class="top aligned">
+ {% if volume.volume %}
+ <a href="/container/{{ entity.ident }}/browse?volume={{ volume.volume }}">Vol. {{ volume.volume }}</a>
+ {% else %}
+ -
+ {% endif %}
+ </td>
+ {% endif %}
+
+ <td>
+ {% if issue.issue %}
+ <a href="/container/{{ entity.ident }}/browse?year={{ year.year }}&volume={{ volume.volume or '' }}&issue={{ issue.issue or '' }}">Issue {{ issue.issue }}</a>
+ {% else %}
+ -
+ {% endif %}
+ </td>
+
+ <td class="right aligned">
+ <a href="/container/{{ entity.ident }}/browse?year={{ year.year }}&volume={{ volume.volume or '' }}&issue={{ issue.issue or '' }}">{{ "{:,}".format(issue.count) }} releases</a>
+ </td>
+ </tr>
+ {% endfor %}
+ {% endfor %}
+ {% endfor %}
+ </tbody>
+</table>
+{% endmacro %}
+
+{% macro browse_releases(found) %}
+ <h2 style="margin-bottom: 1em;">
+ {% if request.args.volume %}
+ Volume {{ request.args.volume }}
+ {%- if request.args.issue %}, Issue {{ request.args.issue }}{% endif -%}
+ {%- if request.args.year %} ({{ request.args.year }}){% endif %}
+ {% else %}
+ Year {{ request.args.year }}
+ {% endif %}
+ </h2>
+ {% if not found.results %}
+ <p><i>No publications found!</i>
+ {% else %}
+ <table class="ui very basic compact structured table">
+ <thead>
+ <tr><th class="one wide center aligned">{% if request.args.volume %}Page(s){% else %}Date{% endif %}</th>
+ <th class="nine wide">Publication</th>
+ </thead>
+ <tbody>
+ {% for release_doc in found.results %}
+ <tr><td class="center aligned">
+ {% if request.args.volume %}
+ {% if release_doc.pages %}
+ {{ release_doc.pages }}
+ {% else %}
+ -
+ {% endif %}
+ {% elif release_doc.release_date %}
+ {{ release_doc.release_date }}
+ {% else %}
+ -
+ {% endif %}
+ </td>
+ <td>
+ {{ entity_macros.release_search_result_row(release_doc, margin_top=False) }}
+ </td>
+ {% endfor %}
+ </tbody>
+ </table>
+ {% if found.count_found > found.count_returned %}
+ <p><i>Showing only the first {{ found.count_returned }} out of {{ found.count_found }} releases</i>
+ {% endif %}
+ <p><i><a href="/container/{{ entity.ident }}/browse">Back to full listing</a></i>
+ {% endif %}
+{% endmacro %}
+
+{% block entity_main %}
+
+{% if releases_found %}
+ {{ browse_releases(releases_found) }}
+{% elif entity._browse_year_volume_issue %}
+ <div class="ui container text">
+ <h3>Publications by Year, Volume, and Issue</h3>
+ <p>This table includes content which does not have article-level metadata
+ about volume or issue, but at least the year of publication must be known.
+ "Stub" releases (eg, spam or duplicate DOIs) are not listed.
+ {{ browse_year_volume_issue_table(entity, entity._browse_year_volume_issue) }}
+ </div>
+{% endif %}
+
+{% endblock %}
+
diff --git a/python/fatcat_web/templates/container_view_coverage.html b/python/fatcat_web/templates/container_view_coverage.html
index 3022c0d9..865723d5 100644
--- a/python/fatcat_web/templates/container_view_coverage.html
+++ b/python/fatcat_web/templates/container_view_coverage.html
@@ -6,35 +6,79 @@
{% block entity_main %}
-<div class="ui centered grid">
- <div class="row">
- <div class="twelve wide column">
- <div class="ui large horizontal statistic">
- <div class="value">{{ "{:,}".format(container._stats.total) }}</div>
- <div class="label" style="text-transform: none;">Known Releases</div>
- </div>
- {% if container._stats.total >= 1 %}
- {{ entity_macros.preservation_bar(container._stats.preservation, extra_class="large") }}
- {{ entity_macros.preservation_table(container._stats.preservation) }}
- {% endif %}
- <p>
- {% if container.extra and container.extra.kbart %}
- There seem to be at least some "dark" preservation holdings in:
- {% for k, v in container.extra.kbart.items() %}
- <code>{{ k }}</code>{{ ", " if not loop.last }}
- {% endfor %}
- .
- {% endif %}
- {% if container.issnl %}
- Our metadata may not be up to date, but you can verify preservation holdings in <a href="https://keepers.issn.org/?q=api/search&search[]=MUST=allissn={{ container.issnl }}&search[]=MUST_EXIST=keepers">Keepers Registery</a> (click "Archival Status").
- {% endif %}
- </div>
+<div class="ui container text" style="margin-bottom: 2em;">
+ <div class="ui message">
+ <p>This page summarizes and visualizes the article-level (or output-level) preservation status for a single publication venue (<i>{{ container.name }}</i>). Fatcat is a preservation-oriented catalog, and metadata is aggregated from many <a href="https://guide.fatcat.wiki/sources.html">sources</a>.
+ <p>However, metadata quality and consistency is difficult at scale and there may be preservation coverage not recorded here, or in some rare cases we may have content incorrectly matched and marked as preserved. Please <a href="/about">contact us</a> or <a href="https://guide.fatcat.wiki/editing_quickstart.html">submit corrections directly</a> if you find any mistakes or inaccuracies.
</div>
</div>
{% if container._stats.total >= 1 %}
- <br><br>
- <h2>Preservation Coverage by Year</h2>
+ <div class="ui container text" style="margin-bottom: 3em;">
+ <h2 class="ui header">Overall Preservation Coverage</h2>
+ {{ entity_macros.preservation_bar(container._stats.preservation, extra_class="large") }}
+ <br>
+ <center>
+ {{ entity_macros.preservation_table(container._stats.preservation) }}
+ </center>
+ </div>
+{% endif %}
+
+<div class="ui container text" style="margin-bottom: 3em;">
+ <h2 class="ui header">Known Holdings Elsewhere</h2>
+ <p>This table is based on KBART reports from large, independent, long-term digital preservation projects. We use the start and stop years of fulltext coverage, then count individuals works as included or not on the basis of year alone (not considering volume or issue metadata). These are mostly "dark" archives, with no direct public access to holdings.
+ <p>The Keeper's Registry project, currently run by <a href="https://issn.org">issn.org</a>, is a more authoritative source for aggregated KBART reports, and includes more archives.
+ {% if container.issnl %}
+ You can double check the <a href="https://portal.issn.org/?q=api/search&search[]=MUST=allissnbis=%22{{ container.issnl }}%22">Keeper's entry for this ISSN on portal.issn.org</a>; click through to the "Archival Status" link to see holdings information.
+ {% endif %}
+
+ {% if container.extra and container.extra.kbart %}
+ <table class="ui very compact table">
+ <thead>
+ <tr>
+ <th>Archive</th>
+ <th>Year Span(s)</th>
+ </thead>
+ <tbody>
+ {% for k, v in container.extra.kbart.items() %}
+ <tr>
+ <td>
+ {% if k == "lockss" %}
+ <a href="https://www.lockss.org/">LOCKSS</a>
+ {% elif k == "clockss" %}
+ <a href="https://clockss.org/">CLOCKSS</a>
+ {% elif k == "hathitrust" and container.issnl %}
+ <a href="https://catalog.hathitrust.org/Search/Home?lookfor={{ container.issnl }}&searchtype=isn&ft=&setft=false">HathiTrust</a>
+ {% elif k == "portico" and container.issnl %}
+ <a href="https://www.portico.org/coverage/titles/?keyword={{ container.issnl }}">Portico</a>
+ {% elif k == "scholarsportal" and container.issnl %}
+ <a href="https://journals.scholarsportal.info/search?q={{ container.issnl }}&search_in=ISSN">Scholars Portal</a>
+ {% else %}
+ <span style="text-transform: uppercase;">{{ k }}</span>
+ {% endif %}
+ </td>
+ <td>
+ {% for span in v.year_spans %}
+ {% if span|length >= 2 -%}
+ {{ span[0] }} to {{ span[1] -}}
+ {% elif span|length == 1 -%}
+ {{ span[0] -}}
+ {% endif -%}
+ <br>
+ {% endfor %}
+ {% if not v.year_spans %}-{% endif %}
+ {% endfor %}
+ </tbody>
+ </table>
+ {% else %}
+ <p>No holdings at any other locations recorded.
+ {% endif %}
+</div>
+
+{% if container._stats.total >= 1 %}
+ <div class="ui container text">
+ <h2 class="ui header">Preservation Coverage by Year</h2>
+ </div>
<figure style="margin: 0 0 0 0;">
<embed type="image/svg+xml" src="/container/{{ container.ident }}/preservation_by_year.svg" />
@@ -45,7 +89,9 @@
</div>
<br><br>
- <h2>Preservation Coverage by Volume</h2>
+ <div class="ui container text">
+ <h2 class="ui header">Preservation Coverage by Volume Number</h2>
+ </div>
<figure style="margin: 0 0 0 0;">
<embed type="image/svg+xml" src="/container/{{ container.ident }}/preservation_by_volume.svg" />
@@ -56,23 +102,25 @@
</div>
<br><br>
- <h2>Preservation Coverage by Release Type</h2>
- <table class="ui table">
- <thead>
- <tr>
- <th>Release Type
- <th class="right aligned">Total Count
- <th>Coverage
- </thead>
- <tbody>
- {% for type_row in container._type_preservation %}
- <tr>
- <td class="two wide">{{ type_row.release_type }}
- <td class="two wide right aligned">{{ "{:,}".format(type_row.total) }}
- <td class="twelve wide">{{ entity_macros.preservation_bar(type_row) }}
- {% endfor %}
- </tbody>
- </table>
+ <div class="ui container text">
+ <h2 class="ui header">Preservation Coverage by Release Type</h2>
+ <table class="ui table">
+ <thead>
+ <tr>
+ <th>Release Type
+ <th class="right aligned">Total Count
+ <th>Coverage
+ </thead>
+ <tbody>
+ {% for type_row in container._type_preservation %}
+ <tr>
+ <td class="three wide">{{ type_row.release_type }}
+ <td class="three wide right aligned">{{ "{:,}".format(type_row.total) }}
+ <td class="ten wide">{{ entity_macros.preservation_bar(type_row) }}
+ {% endfor %}
+ </tbody>
+ </table>
+ </div>
{% endif %}
{% endblock %}
diff --git a/python/fatcat_web/templates/container_view_search.html b/python/fatcat_web/templates/container_view_search.html
new file mode 100644
index 00000000..289c8dad
--- /dev/null
+++ b/python/fatcat_web/templates/container_view_search.html
@@ -0,0 +1,70 @@
+{% set container = entity %}
+{% set entity_view = "search" %}
+{% set entity_type = "container" %}
+{% import "entity_macros.html" as entity_macros %}
+{% import "search_macros.html" as search_macros %}
+{% extends "entity_base.html" %}
+
+{% block entity_main %}
+<div class="ui container text">
+
+<div class="ui message">
+<h3>Search inside Container</h3>
+<form class="" role="search" action="/container/{{ entity.ident }}/search" method="get">
+ <div class="ui form">
+ <div class="ui action input huge fluid">
+ <input type="text" placeholder="Query..." name="q" value="{% if query.q %}{{ query.q }}{% endif %}" aria-label="search release metadata">
+ <button class="ui primary button">Search</button>
+ </div>
+ <br>Can also search <b><a href="/release/search?q={{ query.q or "" }}">all releases</a></b>.
+ </div>
+</form>
+</div>
+
+<br>
+{% if found %}
+ {% if found.results %}
+
+ {{ search_macros.top_results(query, found) }}
+
+ {% for paper in found.results %}
+ {{ entity_macros.release_search_result_row(paper) }}
+ {% endfor %}
+
+ {% if found.results|length > 8 %}
+ <div class="ui divider"></div>
+ <div style="text-align: center">
+ {{ search_macros.bottom_results(query, found, endpoint='release_search') }}
+ </div>
+ {% endif %}
+
+ {% else %}
+
+ Raw query was: <i>{{ query.q }}</i>
+
+ <div class="ui centered stackable grid" style="padding-top: 15%;">
+ <div class="row">
+ <div class="four wide column">
+ <img src="/static/paper_man_confused.gif" alt="confused paper man">
+ </div>
+ <div class="six wide column">
+ <h2>No results found!</h2>
+ <p>You could try elsewhere:</p>
+ <ul>
+ <li>Search <a href="https://dissem.in/search?q={{ query.q | urlencode }}">dissem.in</a></li>
+ <li>Search <a href="https://www.base-search.net/Search/Results?lookfor={{ query.q | urlencode }}">BASE</a></li>
+ <li>Search <a href="https://scholar.google.com/scholar?q={{ query.q | urlencode }}">Google Scholar</a></li>
+ </ul>
+ </div>
+ </div>
+ </div>
+
+ {% endif %}
+
+{% elif es_error %}
+ {{ search_macros.es_error_msg(es_error) }}
+{% endif %}
+
+</div>
+{% endblock %}
+
diff --git a/python/fatcat_web/templates/entity_base.html b/python/fatcat_web/templates/entity_base.html
index c3d6096b..626c102a 100644
--- a/python/fatcat_web/templates/entity_base.html
+++ b/python/fatcat_web/templates/entity_base.html
@@ -75,6 +75,10 @@
{% endif %}
<br>
{% endif %}
+ {% elif entity_type == "container" %}
+ {% if entity.publisher %}
+ <p style="font-size: larger;">{{ entity.publisher }}
+ {% endif %}
{% endif %}
<!-- these nested div make tabs scrollable on mobile -->
<div class="ui container" style="min-width: 100%; overflow-x: auto;">
@@ -82,7 +86,9 @@
<div class="ui small tabular compact menu">
{{ entity_tab("overview", "Overview", "") }}
{% if entity_type == "container" and entity.state == 'active' and not editgroup %}
+ {{ entity_tab("browse", "Browse", "/browse") }}
{{ entity_tab("coverage", "Preservation", "/coverage") }}
+ {{ entity_tab("search", "Search", "/search") }}
{% elif entity_type == "release" and entity.state != 'deleted' %}
{{ entity_tab("contribs", "Authors", "/contribs", entity._authors|count ) }}
{% if entity.state == 'active' %}
diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html
index 9b419c41..5f8f6e0a 100644
--- a/python/fatcat_web/templates/entity_macros.html
+++ b/python/fatcat_web/templates/entity_macros.html
@@ -142,9 +142,9 @@
</form>
{%- endmacro %}
-{% macro release_search_result_row(paper) -%}
+{% macro release_search_result_row(paper, margin_top=True) -%}
<div>
- <h4 style="margin-top: 1em; margin-bottom: 0px; font-size: 1.1em;">
+ <h4 style="{% if margin_top %}margin-top: 1em; {% endif %}margin-bottom: 0px; font-size: 1.1em;">
<a href="/release/{{ paper.ident }}" style="color: #2224c7;">
{% if paper.title %}
{{ paper.title[:512] }}
@@ -224,7 +224,7 @@
{% endif %}
{% if paper.withdrawn_status %}
- <b style="color: red; text-transform: uppercase;">{{ paper.withdrawn_status }}</b>
+ <b style="color: red; text-transform: uppercase;">&nbsp; [{{ paper.withdrawn_status }}]</b>
{% endif %}
{# ### IDENTIFIERS #}
@@ -257,6 +257,9 @@
{% if paper.wikidata_qid %}
<a href="https://wikidata.org/wiki/{{ paper.wikidata_qid }}" style="color: green;">wikidata:{{ paper.wikidata_qid }}</a> &nbsp;
{% endif %}
+ {% if paper.version %}
+ <span style="color: green;">version:{{ paper.version }}</span> &nbsp;
+ {% endif %}
{# WIP: elastic release work grouping searches
<br>
@@ -267,23 +270,73 @@
{% endmacro %}
-{% macro container_search_result_row(entity) -%}
- <div>
- <h4 style="margin-top: 1em; margin-bottom: 4px; font-size: 1.1em;">
- <a href="/container/{{ entity.ident }}" style="color: #2224c7;">{{ entity['name'] }}</a>
- {% if entity.is_oa %}<i class="icon unlock orange small"></i>{% endif %}
- </h4>
- {% if entity.publisher %}
- <h5 style="margin-top: 4px; margin-bottom: 0px; font-size: 1em;">{{ entity.publisher }}</h5>
- {% endif %}
- {% if entity.issnl %}
- <a href="https://portal.issn.org/resource/ISSN/{{entity.issnl }}" style="color: green;">issn:{{ entity.issnl }}</a>
- {% endif %}
- {% if entity.container_type %}
- &nbsp;{{ entity.container_type }}
- {% endif %}
- {% if entity.publication_status and entity.publication_status != "active" %}
- &nbsp;{{ entity.publication_status }}
+{% macro container_search_result_row(entity, show_stats=False) -%}
+ <div class="ui stackable doubling grid">
+ <div class="twelve wide column">
+ <h4 style="margin-bottom: 0em; font-size: 1.1em;">
+ <a href="/container/{{ entity.ident }}" style="color: #2224c7;">{{ entity['name'] }}</a>
+ </h4>
+ {% if entity.original_name %}
+ <i>{{ entity.original_name }}</i></br>
+ {% endif %}
+ {% if entity.publisher %}
+ <a href="/container/search?q=publisher:%22{{ entity.publisher }}%22" style="color: black; margin-top: 0em;">{{ entity.publisher }}</a>
+ <br>
+ {% endif %}
+ {% if entity.issnl %}
+ <a href="https://portal.issn.org/resource/ISSN/{{ entity.issnl }}" style="color: green;">issnl:{{ entity.issnl }}</a> &nbsp;
+ {% endif %}
+ {% if entity.wikidata_qid %}
+ <a href="https://wikidata.org/wiki/{{ entity.wikidata_qid }}" style="color: green;">wikidata:{{ entity.wikidata_qid }}</a> &nbsp;
+ {% endif %}
+ {% if entity.dblp_prefix %}
+ <a href="https://dblp.org/db/{{ entity.dblp_prefix }}/index.html" style="color: green;">dblp:{{ entity.dblp_prefix }}</a> &nbsp;
+ {% endif %}
+ {% if entity.ia_sim_collection %}
+ <a href="https://archive.org/details/{{ entity.ia_sim_collection }}" style="color: green;">[archive.org]</a> &nbsp;
+ {% elif entity.sim_pubid %}
+ <a href="https://archive.org/search.php?query=sim_pubid%3A{{ entity.sim_pubid }}%20mediatype%3Acollection" style="color: green;">[archive.org]</a> &nbsp;
+ {% endif %}
+ {# too much clutter
+ {% if entity.country_code %}
+ <span style="color: black;">country:{{ entity.country_code }}</span> &nbsp;
+ {% endif %}
+ {% for lang in entity.languages %}
+ <span style="color: black;">lang:{{ lang }}</span> &nbsp;
+ {% endfor %}
+ {% if entity.any_kbart %}
+ <span style="color: black;">[KBART]</span> &nbsp;
+ {% endif %}
+ <br>
+ #}
+ {% if entity.in_doaj %}
+ <span style="color: #c54e00;">[DOAJ]</span> &nbsp;
+ {% endif %}
+ {% if entity.in_road %}
+ <span style="color: #c54e00;">[ROAD]</span> &nbsp;
+ {% endif %}
+ {% if entity.is_oa and not (entity.in_doaj or entity.in_road) %}
+ <span style="color: #c54e00;">[open-access]</span> &nbsp;
+ {% endif %}
+ {% if entity.container_type and entity.container_type != "journal" %}
+ <b>[{{ entity.container_type }}]</b> &nbsp;
+ {% endif %}
+ {% if entity.publication_status and entity.publication_status != "active" %}
+ <span style="color: brown;">{{ entity.publication_status }}</span> &nbsp;
+ {% endif %}
+ </div>
+
+ {% if show_stats %}
+ <div class="four wide right aligned column">
+ {% if entity.releases_total %}
+ <b>~{{ "{:,}".format(entity.releases_total) }}</b> releases
+ {% elif entity.releases_total == 0 %}
+ <i>(not indexed)</i>
+ {% endif %}
+ {% if entity.releases_total %}
+ {{ preservation_bar({'bright': entity.preservation_bright, 'dark': entity.preservation_dark, 'none': entity.preservation_none, 'total': entity.releases_total}) }}
+ {% endif %}
+ </div>
{% endif %}
</div>
{% endmacro %}
@@ -335,31 +388,36 @@ yellow
{% set frac_dark = stats.dark/stats.total %}
{% set frac_none = stats.none/stats.total %}
- <table class="ui very basic very compact collapsing table" style="font-weight: bold; margin-left: 1em;">
+ <table class="ui basic very compact collapsing table" style="margin-left: 1em;">
<tbody>
<tr>
<td style="background-color: #21ba45;">
- <td class="right aligned" >{{ "{:,}".format(stats.bright) }}
+ <td class="right aligned" style="font-weight: bold;">{{ "{:,}".format(stats.bright) }}
<td class="right aligned" >{{ (frac_bright*100)|round(2,method='ceil') }}%
<td>preserved and publicly accessible (bright)
<tr>
<td style="background-color: darkgreen;">
- <td class="right aligned" >{{ "{:,}".format(stats.dark) }}
+ <td class="right aligned" style="font-weight: bold;">{{ "{:,}".format(stats.dark) }}
<td class="right aligned" >{{ (frac_dark*100)|round(2,method='ceil') }}%
<td>preserved but not publicly accessible (dark)
{% if stats.shadows_only %}
{% set frac_shadows_only = stats.shadows_only/stats.total %}
<tr>
<td style="background-color: darkred;">
- <td class="right aligned" >{{ "{:,}".format(stats.shadows_only) }}
+ <td class="right aligned" style="font-weight: bold;">{{ "{:,}".format(stats.shadows_only) }}
<td class="right aligned" >{{ (frac_shadows_only*100)|round(2,method='ceil') }}%
<td>only independently preserved in "shadow" libraries
{% endif %}
<tr>
<td style="background-color: #db2828;">
- <td class="right aligned" >{{ "{:,}".format(stats.none) }}
+ <td class="right aligned" style="font-weight: bold;">{{ "{:,}".format(stats.none) }}
<td class="right aligned" >{{ (frac_none*100)|round(2,method='ceil') }}%
<td>no known independent preservation
+ <tr>
+ <td style="background-color: grey;">
+ <td class="right aligned" style="font-weight: bold;">{{ "{:,}".format(stats.total) }}
+ <td class="right aligned" >
+ <td>total
</tbody>
</table>
diff --git a/python/fatcat_web/templates/entity_view_metadata.html b/python/fatcat_web/templates/entity_view_metadata.html
index d1bd3c30..80aaf19c 100644
--- a/python/fatcat_web/templates/entity_view_metadata.html
+++ b/python/fatcat_web/templates/entity_view_metadata.html
@@ -5,7 +5,15 @@
{% block entity_main %}
<h3>Entity Metadata (schema)</h3>
-{{ entity_macros.extra_metadata(entity._metadata) }}
+{% if entity._metadata %}
+ {{ entity_macros.extra_metadata(entity._metadata) }}
+{% else %}
+ <table class="ui definition single line fixed compact small unstackable table">
+ <tbody>
+ <tr><td class="center aligned"><i>No specific metadata for this entity</i></td>
+ </tbody>
+ </table>
+{% endif %}
<div style="float: right;">
<a href="{{ config.FATCAT_PUBLIC_API_HOST }}/{{ entity_type }}/{{ entity.ident }}">As JSON via API</a>
</div>