From ff4d9cf06955aaa21a429ff10456468e1fae84fc Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 9 Apr 2019 13:07:05 -0700 Subject: have webcapture wayback links go right to capture (not calendar) --- python/fatcat_web/hacks.py | 12 ++++++++++++ python/fatcat_web/routes.py | 13 +++++++------ python/fatcat_web/templates/entity_macros.html | 14 ++++++++++---- python/fatcat_web/templates/release_view.html | 4 ++-- python/fatcat_web/templates/webcapture_view.html | 18 +++++++++--------- 5 files changed, 40 insertions(+), 21 deletions(-) diff --git a/python/fatcat_web/hacks.py b/python/fatcat_web/hacks.py index 51b97142..9e6f6ab5 100644 --- a/python/fatcat_web/hacks.py +++ b/python/fatcat_web/hacks.py @@ -13,3 +13,15 @@ def test_strip_extlink_xml(): assert strip_extlink_xml("""LOCKSS (2014) Available: http://lockss.org/. Accessed: 2014 November 1.""") == \ """LOCKSS (2014) Available: http://lockss.org/. Accessed: 2014 November 1.""" +def wayback_suffix(entity): + """ + Takes a webcapture entity and returns a suffix to be appended to wayback URLs + """ + ret = "" + if entity.original_url: + if entity.timestamp: + ret = entity.timestamp.strftime("%Y%m%d%H%M%S/") + else: + ret = "*/" + ret += entity.original_url + return ret diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index eb53c2ef..f7f10f44 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -13,7 +13,7 @@ from fatcat_web import app, api, auth_api, priv_api from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth from fatcat_web.cors import crossdomain from fatcat_web.search import * -from fatcat_web.hacks import strip_extlink_xml +from fatcat_web.hacks import strip_extlink_xml, wayback_suffix ### Views ################################################################### @@ -232,6 +232,8 @@ def webcapture_view(ident): entity.releases.append(api.get_release(r)) except ApiException as ae: abort(ae.status) + entity.wayback_suffix = wayback_suffix(entity) + print("SUFFIX: {}".format(entity.wayback_suffix)) return render_template('webcapture_view.html', webcapture=entity) @app.route('/release/lookup', methods=['GET']) @@ -267,8 +269,6 @@ def release_history(ident): def release_view(ident): try: entity = api.get_release(ident, expand="container,files,filesets,webcaptures") - container = entity.container - filesets = entity.filesets except ApiException as ae: abort(ae.status) if entity.state == "redirect": @@ -279,8 +279,10 @@ def release_view(ident): entity.container.es = container_to_elasticsearch(entity.container, force_bool=False) if entity.state == "active": entity.es = release_to_elasticsearch(entity, force_bool=False) - for fs in filesets: + for fs in entity.filesets: fs.total_size = sum([f.size for f in fs.manifest]) + for wc in entity.webcaptures: + wc.wayback_suffix = wayback_suffix(wc) for ref in entity.refs: # this is a UI hack to get rid of XML crud in unstructured refs like: # LOCKSS (2014) Available: /history', methods=['GET']) def work_history(ident): diff --git a/python/fatcat_web/templates/entity_macros.html b/python/fatcat_web/templates/entity_macros.html index 6d9ceed0..813a419e 100644 --- a/python/fatcat_web/templates/entity_macros.html +++ b/python/fatcat_web/templates/entity_macros.html @@ -74,16 +74,22 @@ {%- endmacro %} -{% macro url_list(urls) -%} +{% macro url_list(urls, wayback_suffix="") -%} {% for url in urls %} + {% if url.rel == "wayback" %} + {% set suffix = wayback_suffix %} + {% else %} + {% set suffix = "" %} + {% endif %} + {% set entity = release %}
{{ url.rel }} - + {% if url.url.count('/') >= 3 and url.rel != "dweb" %} - {{ '/'.join(url.url.split('/')[0:2]) }}/{{ ''.join(url.url.split('/')[2]) }}/{{ '/'.join(url.url.split('/')[3:]) }} + {{ '/'.join(url.url.split('/')[0:2]) }}/{{ ''.join(url.url.split('/')[2]) }}/{{ '/'.join(url.url.split('/')[3:]) }}{{ suffix }} {% else %} - {{ url.url }} + {{ url.url }}{{ suffix }} {% endif %} {% endfor %} diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index ff044c49..2a4f2e1b 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -266,7 +266,7 @@
webcapture:{{ webcapture.ident }}
{% for url in webcapture.archive_urls[:5] %} - {{ url.url.split('/')[2] }} ({{ url.rel }})
+ {{ url.url.split('/')[2] }} ({{ url.rel }})
{% endfor %} {% if webcapture.urls|length > 5 %} + {{ file.urls|length - 5 }} more URLs @@ -317,7 +317,7 @@ {% if entity.files != [] and entity.files[0].urls != [] %} Download Full Text {% elif entity.webcaptures != [] and entity.webcaptures[0].archive_urls != [] and entity.webcaptures[0].archive_urls[0].rel == "wayback" %} -View Web Archive +View Web Archive {% else %} No Full Text Available {% endif %} diff --git a/python/fatcat_web/templates/webcapture_view.html b/python/fatcat_web/templates/webcapture_view.html index 921d5d48..ee28cdf9 100644 --- a/python/fatcat_web/templates/webcapture_view.html +++ b/python/fatcat_web/templates/webcapture_view.html @@ -32,6 +32,14 @@ This Web Capture is not associated with any fatcat release. {% endif %} +
+

Archive URLs

+{% if webcapture.archive_urls != None %} + {{ entity_macros.url_list(webcapture.archive_urls, webcapture.wayback_suffix) }} +{% else %} +No known public archive for this webcapture. +{% endif %} +

CDX Rows ({{ webcapture.cdx|count }})

{% if webcapture.cdx %} @@ -56,15 +64,7 @@ {% endfor %} {% else %} -This File Set is empty (contains no files). -{% endif %} - -
-

Archive URLs

-{% if webcapture.archive_urls != None %} - {{ entity_macros.url_list(webcapture.archive_urls) }} -{% else %} -No known public archive for this webcapture. +This web capture is empty (contains no resources). {% endif %} -- cgit v1.2.3