aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-04-30 14:18:18 -0700
committerBryan Newbold <bnewbold@archive.org>2021-04-30 14:18:18 -0700
commit2e75c11c093a690297c323e869e2edb9e53b3564 (patch)
treed199c0b00b714357ec41d5d637ca43d4a2c4ceb6
parentc367d54fe47cf71ada73fa9ad16495824e07abfc (diff)
downloadfatcat-scholar-2e75c11c093a690297c323e869e2edb9e53b3564.tar.gz
fatcat-scholar-2e75c11c093a690297c323e869e2edb9e53b3564.zip
web: use absolute URLs to sitemaps and in citation_pdf_url
-rw-r--r--fatcat_scholar/static/robots.allow.txt4
-rw-r--r--fatcat_scholar/templates/work.html7
-rw-r--r--proposals/2021-04-28_indexability.md6
3 files changed, 10 insertions, 7 deletions
diff --git a/fatcat_scholar/static/robots.allow.txt b/fatcat_scholar/static/robots.allow.txt
index 35f13a3..fb0c251 100644
--- a/fatcat_scholar/static/robots.allow.txt
+++ b/fatcat_scholar/static/robots.allow.txt
@@ -13,5 +13,5 @@ User-agent: *
Allow: /search
Crawl-delay: 5
-Sitemap: /sitemap.xml
-Sitemap: /sitemap-index-works.xml
+Sitemap: https://scholar.archive.org/sitemap.xml
+Sitemap: https://scholar.archive.org/sitemap-index-works.xml
diff --git a/fatcat_scholar/templates/work.html b/fatcat_scholar/templates/work.html
index 067d23c..8829ec2 100644
--- a/fatcat_scholar/templates/work.html
+++ b/fatcat_scholar/templates/work.html
@@ -18,7 +18,6 @@
{% if work.biblio.container_name %}
<meta name="citation_journal_title" content="{{ work.biblio.container_name }}">
{% endif %}
-
{% if work.biblio.volume %}
<meta name="citation_volume" content="{{ work.biblio.volume }}">
{% endif %}
@@ -32,9 +31,9 @@
<meta name="citation_doi" content="{{ work.biblio.doi }}">
{% endif %}
{% if work.fulltext.access_url and work.biblio.release_ident == work.fulltext.release_ident and work.fulltext.access_type in ['wayback', 'ia_file'] and work.fulltext.file_mimetype == "application/pdf" and work.fulltext.file_sha1 %}
-<!-- PDF access redirect URL, as requested by, eg, scholar.google.com -->
-<meta name="citation_pdf_url" content="/access-redirect/{{ work.fulltext.file_sha1 }}.pdf">
-<!-- <meta name="citation_pdf_url" content="{{ work.fulltext.access_url }}"> -->
+ <!-- PDF access redirect URL, as requested by, eg, scholar.google.com -->
+ <meta name="citation_pdf_url" content="https://scholar.archive.org/access-redirect/{{ work.fulltext.file_sha1 }}.pdf">
+ <!-- Multiple URLs allowed? <meta name="citation_pdf_url" content="{{ work.fulltext.access_url }}"> -->
{% endif %}
{% endblock %}
diff --git a/proposals/2021-04-28_indexability.md b/proposals/2021-04-28_indexability.md
index cfa928f..a58d23d 100644
--- a/proposals/2021-04-28_indexability.md
+++ b/proposals/2021-04-28_indexability.md
@@ -46,6 +46,8 @@ release will be linked, not earlier pre-print or accepted manuscript versions.
This behavior may change at some point to include "green" access links from the
"work" landing page.
+The `citation_pdf_url` tag should contain an absolute URL, not a relative URL.
+
Alternatively, we could have landing pages only for "releases" (versions), like
already exist on fatcat.wiki. This would make the decision about which files to
link to simpler.
@@ -101,6 +103,8 @@ will include:
/robots.txt - updated to include sitemap references
/sitemap.xml - basic generic list of pages (homepage, about, userguide)
/sitemap-index-works.xml - XML file pointing to many sub-sitemap files; includes lastmod metadata
- /sitemap-works-YYYY-MM-DD-NNNNN.txt.gz - series of timestamped "simple" sitemaps (URL list files)
+ /sitemap-works-YYYY-MM-DD-NNNNN.txt - series of timestamped "simple" sitemaps (URL list files)
Only works for which there is an appropriate fulltext access URL
+
+The sitemap links from robots.txt should be absolute URLs, not relative URLs.