diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-06-11 15:13:17 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-06-11 15:13:17 -0700 |
commit | 388eeaf4fa137522ec90c96e57581ced33205e57 (patch) | |
tree | ea6832c4fe8026373e9c3b712e1b68687d7e8eb6 | |
parent | 3bae05c4a4cd7d6d9b892b952b7ca35454319479 (diff) | |
download | fatcat-scholar-388eeaf4fa137522ec90c96e57581ced33205e57.tar.gz fatcat-scholar-388eeaf4fa137522ec90c96e57581ced33205e57.zip |
update citation_pdf_url HTML meta tag to new access URL style
-rw-r--r-- | fatcat_scholar/hacks.py | 29 | ||||
-rw-r--r-- | fatcat_scholar/templates/work.html | 4 | ||||
-rw-r--r-- | tests/test_web.py | 1 |
3 files changed, 21 insertions, 13 deletions
diff --git a/fatcat_scholar/hacks.py b/fatcat_scholar/hacks.py index 0f16fc7..e7d4566 100644 --- a/fatcat_scholar/hacks.py +++ b/fatcat_scholar/hacks.py @@ -126,15 +126,14 @@ def test_wayback_direct_url() -> None: ) -def make_access_redirect_url(access_type: str, access_url: str) -> str: +def make_access_redirect_url(work_ident: str, access_type: str, access_url: str) -> str: if access_type == "wayback" and "://web.archive.org/" in access_url: segments = access_url.split("/") - dt = segments[4] original_url = "/".join(segments[5:]) - return f"https://scholar.archive.org/access/wayback/{dt}/{original_url}" + return f"https://scholar.archive.org/work/{work_ident}/access/wayback/{original_url}" elif access_type == "ia_file" and "://archive.org/download/" in access_url: suffix = "/".join(access_url.split("/")[4:]) - return f"https://scholar.archive.org/access/ia_file/{suffix}" + return f"https://scholar.archive.org/work/{work_ident}/access/ia_file/{suffix}" else: return access_url @@ -142,31 +141,39 @@ def make_access_redirect_url(access_type: str, access_url: str) -> str: def test_make_access_redirect_url() -> None: assert ( make_access_redirect_url( - "wayback", "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf" + "lmobci36t5aelogzjsazuwxpie", + "wayback", + "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf", ) - == "https://scholar.archive.org/access/wayback/1234/http://fatcat.wiki/thing.pdf" + == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://fatcat.wiki/thing.pdf" ) assert ( make_access_redirect_url( + "lmobci36t5aelogzjsazuwxpie", "wayback", "https://web.archive.org/web/1234/http://fatcat.wiki/thing.pdf?param=asdf", ) - == "https://scholar.archive.org/access/wayback/1234/http://fatcat.wiki/thing.pdf?param=asdf" + == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://fatcat.wiki/thing.pdf?param=asdf" ) assert ( make_access_redirect_url( - "ia_file", "https://archive.org/download/something/file.pdf" + "lmobci36t5aelogzjsazuwxpie", + "ia_file", + "https://archive.org/download/something/file.pdf", ) - == "https://scholar.archive.org/access/ia_file/something/file.pdf" + == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/ia_file/something/file.pdf" ) assert ( - make_access_redirect_url("blah", "https://mit.edu/file.pdf") + make_access_redirect_url( + "lmobci36t5aelogzjsazuwxpie", "blah", "https://mit.edu/file.pdf" + ) == "https://mit.edu/file.pdf" ) assert ( make_access_redirect_url( + "lmobci36t5aelogzjsazuwxpie", "wayback", "https://web.archive.org/web/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf", ) - == "https://scholar.archive.org/access/wayback/20170811115414/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf" + == "https://scholar.archive.org/work/lmobci36t5aelogzjsazuwxpie/access/wayback/http://sudjms.net/issues/5-4/pdf/8)A%20comparison%20study%20of%20histochemical%20staining%20of%20various%20tissues%20after.pdf" ) diff --git a/fatcat_scholar/templates/work.html b/fatcat_scholar/templates/work.html index 67c87e0..693b1a6 100644 --- a/fatcat_scholar/templates/work.html +++ b/fatcat_scholar/templates/work.html @@ -30,9 +30,9 @@ {% if work.biblio.doi %} <meta name="citation_doi" content="{{ work.biblio.doi }}"> {% endif %} -{% if work.fulltext.access_url and work.biblio.release_ident == work.fulltext.release_ident and work.fulltext.access_type in ['wayback', 'ia_file'] and work.fulltext.file_mimetype in ["application/pdf", None] and work.fulltext.file_sha1 %} +{% if work.work_ident and work.fulltext.access_url and work.biblio.release_ident == work.fulltext.release_ident and work.fulltext.access_type in ['wayback', 'ia_file'] and work.fulltext.file_mimetype in ["application/pdf", None] and work.fulltext.file_sha1 %} <!-- single PDF access redirect URL --> - <meta name="citation_pdf_url" content="{{ make_access_redirect_url(work.fulltext.access_type, work.fulltext.access_url) }}"> + <meta name="citation_pdf_url" content="{{ make_access_redirect_url(work.work_ident, work.fulltext.access_type, work.fulltext.access_url) }}"> <!-- direct URL: {{ work.fulltext.access_url | safe }} --> {% endif %} diff --git a/tests/test_web.py b/tests/test_web.py index 7da5880..7f1f72a 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -122,6 +122,7 @@ def test_basic_work_landing_page(client: Any, mocker: Any) -> None: rv = client.get("/zh/work/2x5qvct2dnhrbctqa2q2uyut6a") assert rv.status_code == 200 + assert b"citation_pdf_url" in rv.content def test_basic_access_redirect(client: Any, mocker: Any) -> None: |