aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-08-19 23:57:31 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-08-19 23:57:31 -0700
commitc15cbf3568f7d91774e1cb82a39474c0ff874616 (patch)
treebe5dffb5f735ab497c9affe155fd82267ed4b412
parentc6b33542398c933a6272586e6280f7026b63a124 (diff)
downloadfatcat-c15cbf3568f7d91774e1cb82a39474c0ff874616.tar.gz
fatcat-c15cbf3568f7d91774e1cb82a39474c0ff874616.zip
sitemap fixes from testing
-rw-r--r--extra/sitemap/README.md11
-rwxr-xr-xextra/sitemap/generate_sitemap_indices.py2
-rwxr-xr-xextra/sitemap/release_url_lists.sh6
-rw-r--r--python/fatcat_web/routes.py10
4 files changed, 20 insertions, 9 deletions
diff --git a/extra/sitemap/README.md b/extra/sitemap/README.md
index 735ac925..f72893cd 100644
--- a/extra/sitemap/README.md
+++ b/extra/sitemap/README.md
@@ -1,4 +1,15 @@
+## HOWTO: Update
+
+After a container dump, as `fatcat` user on prod server:
+
+ cd /srv/fatcat/sitemap
+ export DATE=`date --iso-8601` # or whatever
+ /srv/fatcat/src/extra/sitemap/container_url_lists.sh $DATE /srv/fatcat/snapshots/container_export.json.gz
+ /srv/fatcat/src/extra/sitemap/release_url_lists.sh $DATE /srv/fatcat/snapshots/release_export_expanded.json.gz
+ # delete old sitemap url lists
+ /srv/fatcat/src/extra/sitemap/generate_sitemap_indices.py
+
## Background
Google has a limit of 50k lines / 10 MByte for text sitemap files, and 50K
diff --git a/extra/sitemap/generate_sitemap_indices.py b/extra/sitemap/generate_sitemap_indices.py
index 9766ac1f..0a5624a1 100755
--- a/extra/sitemap/generate_sitemap_indices.py
+++ b/extra/sitemap/generate_sitemap_indices.py
@@ -6,7 +6,7 @@ import datetime
def index_entity(entity_type, output):
- now = datetime.datetime.now().isoformat()
+ now = datetime.date.today().isoformat()
print("""<?xml version="1.0" encoding="UTF-8"?>""", file=output)
print("""<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">""", file=output)
diff --git a/extra/sitemap/release_url_lists.sh b/extra/sitemap/release_url_lists.sh
index 4190011f..d5c8d4ef 100755
--- a/extra/sitemap/release_url_lists.sh
+++ b/extra/sitemap/release_url_lists.sh
@@ -6,14 +6,14 @@ set -o pipefail # fail if part of a '|' command fails
: ${1?' You you did not supply a date argument'}
: ${2?' You you did not supply an input file (JSON gzip)'}
-if [ -f $2 ] ; then
+if [ ! -f $2 ] ; then
echo "Input file not found: $2" && exit 1;
fi
# eg, 2020-08-19
-DATE = "$1"
+DATE="$1"
# eg, release_export_expanded.json.gz
-EXPORT_FILE_GZ = "$2"
+EXPORT_FILE_GZ="$2"
# filter to fulltext releases only, then filter to only one hit per work
zcat $EXPORT_FILE_GZ \
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index c66c7f0c..9ae2eaa9 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -1158,7 +1158,7 @@ def page_rfc():
@app.route('/robots.txt', methods=['GET'])
def page_robots_txt():
- if conf.FATCAT_DOMAIN == "fatcat.wiki":
+ if app.config['FATCAT_DOMAIN'] == "fatcat.wiki":
robots_path = "robots.txt"
else:
robots_path = "robots.deny_all.txt"
@@ -1168,7 +1168,7 @@ def page_robots_txt():
@app.route('/sitemap.xml', methods=['GET'])
def page_sitemap_xml():
- if conf.FATCAT_DOMAIN == "fatcat.wiki":
- return redirect('/sitemaps/sitemap.xml')
- else:
- abort(404)
+ return send_from_directory(os.path.join(app.root_path, 'static'),
+ "sitemap.xml",
+ mimetype='text/xml')
+