summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-04-27 20:53:24 -0700
committerBryan Newbold <bnewbold@archive.org>2021-04-27 20:53:26 -0700
commit9907d45e4f54fe70e8e062f47f75197a3ae1b58e (patch)
treed5654721d401a9b96e7ce803b0039e0d140b1cb4
parente35e99bceff3277afaac8f2d5519aa4f07aabe49 (diff)
downloadfatcat-scholar-9907d45e4f54fe70e8e062f47f75197a3ae1b58e.tar.gz
fatcat-scholar-9907d45e4f54fe70e8e062f47f75197a3ae1b58e.zip
iterate on access redirects and landing page implementation
Small code refactors and minimal test coverage
-rw-r--r--fatcat_scholar/search.py11
-rw-r--r--fatcat_scholar/templates/work.html38
-rw-r--r--fatcat_scholar/web.py1
-rw-r--r--tests/files/elastic_fulltext_get.json83
-rw-r--r--tests/test_web.py40
5 files changed, 150 insertions, 23 deletions
diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py
index c5fca35..a0cad2c 100644
--- a/fatcat_scholar/search.py
+++ b/fatcat_scholar/search.py
@@ -469,14 +469,17 @@ def lookup_fulltext_pdf(sha1: str) -> Optional[dict]:
sha1 = sha1.lower()
assert len(sha1) == 40 and sha1.isalnum()
hits = do_lookup_query(
- f'fulltext.file_sha1:{sha1} fulltext.file_mimetype:"application/pdf"'
+ f'fulltext.file_sha1:{sha1} fulltext.file_mimetype:"application/pdf" fulltext.access_url:*'
)
if not hits.results:
return None
fulltext = ScholarFulltext.parse_obj(hits.results[0]["fulltext"])
if not fulltext.access_type in ("ia_file", "wayback"):
return None
- assert fulltext.file_sha1 == sha1
- assert fulltext.file_mimetype == "application/pdf"
- assert fulltext.access_url
+ if fulltext.file_sha1 != sha1:
+ return None
+ if fulltext.file_mimetype != "application/pdf":
+ return None
+ if not fulltext.access_url:
+ return None
return fulltext
diff --git a/fatcat_scholar/templates/work.html b/fatcat_scholar/templates/work.html
index 92e334e..067d23c 100644
--- a/fatcat_scholar/templates/work.html
+++ b/fatcat_scholar/templates/work.html
@@ -2,39 +2,39 @@
{% extends "base.html" %}
{% block title %}
-{{ doc.title }}
+{{ work.title }}
{% endblock %}
{% block extra_head %}
- <link rel="canonical" href="/work/{{ doc.work_ident }}">
+ <link rel="canonical" href="/work/{{ work.work_ident }}">
- <meta name="citation_title" content="{{ doc.biblio.title }}">
-{% for contrib in doc.biblio.contrib_names %}
+ <meta name="citation_title" content="{{ work.biblio.title }}">
+{% for contrib in work.biblio.contrib_names %}
<meta name="citation_author" content="{{ contrib }}">
{% endfor %}
-{% if doc.biblio.release_date or doc.biblio.release_year %}
- <meta name="citation_publication_date" content="{{ doc.biblio.release_date or doc.biblio.release_year }}">
+{% if work.biblio.release_date or work.biblio.release_year %}
+ <meta name="citation_publication_date" content="{{ work.biblio.release_date or work.biblio.release_year }}">
{% endif %}
-{% if doc.biblio.container_name %}
- <meta name="citation_journal_title" content="{{ doc.biblio.container_name }}">
+{% if work.biblio.container_name %}
+ <meta name="citation_journal_title" content="{{ work.biblio.container_name }}">
{% endif %}
-{% if doc.biblio.volume %}
- <meta name="citation_volume" content="{{ doc.biblio.volume }}">
+{% if work.biblio.volume %}
+ <meta name="citation_volume" content="{{ work.biblio.volume }}">
{% endif %}
-{% if doc.biblio.issue %}
- <meta name="citation_issue" content="{{ doc.biblio.issue }}">
+{% if work.biblio.issue %}
+ <meta name="citation_issue" content="{{ work.biblio.issue }}">
{% endif %}
-{% if doc.biblio.pages %}
- <meta name="citation_first_page" content="{{ doc.biblio.pages }}">
+{% if work.biblio.pages %}
+ <meta name="citation_first_page" content="{{ work.biblio.pages }}">
{% endif %}
-{% if doc.biblio.doi %}
- <meta name="citation_doi" content="{{ doc.biblio.doi }}">
+{% if work.biblio.doi %}
+ <meta name="citation_doi" content="{{ work.biblio.doi }}">
{% endif %}
-{% if doc.fulltext.access_url and doc.biblio.release_ident == doc.fulltext.release_ident and doc.fulltext.access_type in ['wayback', 'ia_file'] and doc.fulltext.file_mimetype == "application/pdf" and doc.fulltext.file_sha1 %}
+{% if work.fulltext.access_url and work.biblio.release_ident == work.fulltext.release_ident and work.fulltext.access_type in ['wayback', 'ia_file'] and work.fulltext.file_mimetype == "application/pdf" and work.fulltext.file_sha1 %}
<!-- PDF access redirect URL, as requested by, eg, scholar.google.com -->
-<meta name="citation_pdf_url" content="/access-redirect/{{ doc.fulltext.file_sha1 }}.pdf">
-<!-- <meta name="citation_pdf_url" content="{{ doc.fulltext.access_url }}"> -->
+<meta name="citation_pdf_url" content="/access-redirect/{{ work.fulltext.file_sha1 }}.pdf">
+<!-- <meta name="citation_pdf_url" content="{{ work.fulltext.access_url }}"> -->
{% endif %}
{% endblock %}
diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py
index 56f2561..adddcbd 100644
--- a/fatcat_scholar/web.py
+++ b/fatcat_scholar/web.py
@@ -380,6 +380,7 @@ def web_work(
"locale": lang.code,
"lang_prefix": lang.prefix,
"doc": doc,
+ "work": doc["_obj"],
},
)
diff --git a/tests/files/elastic_fulltext_get.json b/tests/files/elastic_fulltext_get.json
new file mode 100644
index 0000000..07c53b1
--- /dev/null
+++ b/tests/files/elastic_fulltext_get.json
@@ -0,0 +1,83 @@
+{
+ "_index" : "scholar_fulltext_v01",
+ "_type" : "_doc",
+ "_id" : "work_2x5qvct2dnhrbctqa2q2uyut6a",
+ "_score" : 128.72282,
+ "_source" : {
+ "collapse_key" : "2x5qvct2dnhrbctqa2q2uyut6a",
+ "work_ident" : "2x5qvct2dnhrbctqa2q2uyut6a",
+ "access" : [
+ {
+ "access_type" : "wayback",
+ "access_url" : "https://web.archive.org/web/20200206164725/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf",
+ "file_ident" : "2ya6svbh7be6jcy5w7tuwqod4u",
+ "mimetype" : "application/pdf",
+ "release_ident" : "vtfxc3mibrcpxnv6q5xmr46o5a"
+ }
+ ],
+ "abstracts" : [
+ {
+ "body" : "The U.S. federal government enacted fuel efficiency standards for medium and heavy trucks for the first time in September 2011. Rationales for using this policy tool typically depend upon frictions existing in the marketplace or consumers being myopic, such that vehicle purchasers undervalue the future fuel savings from increased fuel efficiency. We measure by how much long-haul truck owners undervalue future fuel savings by employing recent advances to the classic hedonic approach to estimate the distribution of willingness-to-pay for fuel efficiency. We find significant heterogeneity in truck owners' willingness to pay for fuel efficiency, with the elasticity of fuel efficiency to price ranging from 0.51 at the 10th percentile to 1.33 at the 90th percentile, and an average of 0.91. Combining these results with estimates of future fuel savings from increases in fuel efficiency, we find that long-haul truck owners' willingness-to-pay for a 1 percent increase in fuel efficiency is, on average, just 29.5 percent of the expected future fuel savings. These results suggest that introducing fuel efficiency standards for heavy trucks might be an effective policy tool to raise medium and heavy trucks' fuel economy."
+ }
+ ],
+ "biblio" : {
+ "affiliations" : [ ],
+ "container_ident" : "57xsxj4wvrgofg57esefycoeuu",
+ "container_issnl" : "1936-2854",
+ "container_name" : "Finance and Economics Discussion Series",
+ "contrib_count" : 3,
+ "contrib_names" : [
+ "Jacob Adenbaum",
+ "Adam Copeland",
+ "John J. Stevens"
+ ],
+ "doi" : "10.17016/feds.2015.118",
+ "doi_prefix" : "10.17016",
+ "doi_registrar" : "crossref",
+ "first_page" : "1",
+ "first_page_int" : 1,
+ "issns" : [
+ "1936-2854"
+ ],
+ "issue" : "118",
+ "issue_int" : 118,
+ "pages" : "1-42",
+ "publisher" : "Board of Governors of the Federal Reserve System",
+ "release_ident" : "vtfxc3mibrcpxnv6q5xmr46o5a",
+ "release_stage" : "published",
+ "release_type" : "article-journal",
+ "release_year" : 2015,
+ "title" : "Do long-haul truckers undervalue future fuel savings?",
+ "volume" : "2015",
+ "volume_int" : 2015
+ },
+ "fulltext" : {
+ "file_mimetype" : "application/pdf",
+ "access_type" : "wayback",
+ "file_sha1" : "f81f84e23c9ba5d364c70f01fa26e645d29c0427",
+ "file_ident" : "2ya6svbh7be6jcy5w7tuwqod4u",
+ "access_url" : "https://web.archive.org/web/20200206164725/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf",
+ "release_ident" : "vtfxc3mibrcpxnv6q5xmr46o5a"
+ },
+ "doc_type" : "work",
+ "doc_index_ts" : "2020-08-16T07:01:10.483107",
+ "key" : "work_2x5qvct2dnhrbctqa2q2uyut6a",
+ "releases" : [
+ {
+ "container_ident" : "57xsxj4wvrgofg57esefycoeuu",
+ "container_issnl" : "1936-2854",
+ "container_name" : "Finance and Economics Discussion Series",
+ "doi" : "10.17016/feds.2015.118",
+ "doi_prefix" : "10.17016",
+ "doi_registrar" : "crossref",
+ "ident" : "vtfxc3mibrcpxnv6q5xmr46o5a",
+ "release_stage" : "published",
+ "release_type" : "article-journal",
+ "release_year" : 2015,
+ "revision" : "00193f29-047a-4d00-9037-d7a2f93c3a71",
+ "title" : "Do long-haul truckers undervalue future fuel savings?"
+ }
+ ],
+ "tags" : [ ]
+ }
+}
diff --git a/tests/test_web.py b/tests/test_web.py
index df8b832..6c6632d 100644
--- a/tests/test_web.py
+++ b/tests/test_web.py
@@ -101,3 +101,43 @@ def test_basic_search(client: Any, mocker: Any) -> None:
rv = client.get("/zh/search?q=blood")
assert rv.status_code == 200
+
+def test_basic_work_landing_page(client: Any, mocker: Any) -> None:
+
+ with open("tests/files/elastic_fulltext_get.json") as f:
+ elastic_resp = json.loads(f.read())
+
+ es_raw = mocker.patch(
+ "elasticsearch.connection.Urllib3HttpConnection.perform_request"
+ )
+ es_raw.side_effect = [
+ (200, {}, json.dumps(elastic_resp)),
+ (200, {}, json.dumps(elastic_resp)),
+ ]
+
+ rv = client.get("/work/2x5qvct2dnhrbctqa2q2uyut6a")
+ assert rv.status_code == 200
+ assert b"citation_pdf_url" in rv.content
+
+ rv = client.get("/zh/work/2x5qvct2dnhrbctqa2q2uyut6a")
+ assert rv.status_code == 200
+
+def test_basic_access_redirect(client: Any, mocker: Any) -> None:
+
+ with open("tests/files/elastic_fulltext_search.json") as f:
+ elastic_resp = json.loads(f.read())
+
+ es_raw = mocker.patch(
+ "elasticsearch.connection.Urllib3HttpConnection.perform_request"
+ )
+ es_raw.side_effect = [
+ (200, {}, json.dumps(elastic_resp)),
+ (200, {}, json.dumps(elastic_resp)),
+ ]
+
+ rv = client.get("/access-redirect/f81f84e23c9ba5d364c70f01fa26e645d29c0427.pdf", allow_redirects=False)
+ assert rv.status_code == 302
+ assert rv.headers['Location'] == "https://web.archive.org/web/20200206164725id_/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf"
+
+ rv = client.get("/access-redirect/aaaaaaaaaaaaaaaaaaaaaa01fa26e645d29c0427.pdf", allow_redirects=False)
+ assert rv.status_code == 404