From 71e5662365892d32a5f92e2733b7ae804c833f57 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 4 Jun 2020 13:59:57 -0700 Subject: re-use 'best pdf url' for release green button I thought this was the existing behavior, but it looks like we were just taking the first link from the first file. In the future may refactor this out even further. --- python/fatcat_web/templates/release_view.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'python') diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index 60e4624e..2aba47ba 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -253,8 +253,8 @@ accessible version.
-{% if entity.state == 'active' and entity.files != [] and entity.files[0].urls != [] %} -Read Full Text +{% if entity.state == 'active' and entity._es and entity._es.best_pdf_url %} +Read Full Text {% elif entity.state == 'active' and entity.webcaptures != [] and entity.webcaptures[0].archive_urls != [] and entity.webcaptures[0].archive_urls[0].rel == "wayback" %} View Web Archive {% elif entity.state == 'active' %} -- cgit v1.2.3 From a42d5f0d00e76bf8474647fae4e1d9d61693a7d9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 4 Jun 2020 14:01:34 -0700 Subject: ES schema: add best_url to file schema This will increase index size (URLs are often long in our corpus, and we have many file entities), but seems worth it. Initially added `ia_url` as a second field, guaranteed to always be an *.archive.org URL, but `best_url` defaults to that anyways so didn't seem worthwhile. --- extra/elasticsearch/file_schema.json | 1 + python/fatcat_tools/transforms/elasticsearch.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'python') diff --git a/extra/elasticsearch/file_schema.json b/extra/elasticsearch/file_schema.json index 9c8ee64c..0fa25c3a 100644 --- a/extra/elasticsearch/file_schema.json +++ b/extra/elasticsearch/file_schema.json @@ -44,6 +44,7 @@ "rels": { "type": "keyword", "normalizer": "default" }, "in_ia": { "type": "boolean" }, "in_ia_petabox": { "type": "boolean" }, + "best_url": { "type": "keyword", "normalizer": "default" }, "release_id": { "type": "alias", "path": "release_ids" }, "sha1hex": { "type": "alias", "path": "sha1" }, diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 87e054ec..1d35141b 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -508,4 +508,16 @@ def file_to_elasticsearch(entity): t['in_ia'] = bool('archive.org' in t['domains']) t['in_ia_petabox'] = bool('archive.org' in t['hosts']) + any_url = None + good_url = None + best_url = None + for release_url in (entity.urls or []): + any_url = release_url.url + if release_url.rel in ('webarchive', 'repository'): + good_url = release_url.url + if '//web.archive.org/' in release_url.url or '//archive.org/' in release_url.url: + best_url = release_url.url + # here is where we bake-in priority; IA-specific + t['best_url'] = best_url or good_url or any_url + return t -- cgit v1.2.3 From 2f233a3b8c00385d4b215361a0fa09f93a05f8d9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 4 Jun 2020 14:05:56 -0700 Subject: use ES 'best_url' in file download pages Similar to recent change for release download pages. --- python/fatcat_web/entity_helpers.py | 2 ++ python/fatcat_web/templates/file_view.html | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'python') diff --git a/python/fatcat_web/entity_helpers.py b/python/fatcat_web/entity_helpers.py index d82ea0e9..e3d538e0 100644 --- a/python/fatcat_web/entity_helpers.py +++ b/python/fatcat_web/entity_helpers.py @@ -30,6 +30,8 @@ def enrich_creator_entity(entity): return entity def enrich_file_entity(entity): + if entity.state == "active": + entity._es = file_to_elasticsearch(entity) return entity def enrich_fileset_entity(entity): diff --git a/python/fatcat_web/templates/file_view.html b/python/fatcat_web/templates/file_view.html index d60ea49d..02f47a91 100644 --- a/python/fatcat_web/templates/file_view.html +++ b/python/fatcat_web/templates/file_view.html @@ -44,8 +44,8 @@ No known public URL, mirror, or archive for this file.
-{% if file.urls != None and file.urls != [] %} -Download File +{% if file._es and file._es.best_url %} +Download File {% else %} No Download Available {% endif %} -- cgit v1.2.3