diff options
-rw-r--r-- | extra/sitemap/README.md | 11 | ||||
-rwxr-xr-x | extra/sitemap/generate_sitemap_indices.py | 2 | ||||
-rwxr-xr-x | extra/sitemap/release_url_lists.sh | 6 | ||||
-rw-r--r-- | python/fatcat_web/routes.py | 10 |
4 files changed, 20 insertions, 9 deletions
diff --git a/extra/sitemap/README.md b/extra/sitemap/README.md index 735ac925..f72893cd 100644 --- a/extra/sitemap/README.md +++ b/extra/sitemap/README.md @@ -1,4 +1,15 @@ +## HOWTO: Update + +After a container dump, as `fatcat` user on prod server: + + cd /srv/fatcat/sitemap + export DATE=`date --iso-8601` # or whatever + /srv/fatcat/src/extra/sitemap/container_url_lists.sh $DATE /srv/fatcat/snapshots/container_export.json.gz + /srv/fatcat/src/extra/sitemap/release_url_lists.sh $DATE /srv/fatcat/snapshots/release_export_expanded.json.gz + # delete old sitemap url lists + /srv/fatcat/src/extra/sitemap/generate_sitemap_indices.py + ## Background Google has a limit of 50k lines / 10 MByte for text sitemap files, and 50K diff --git a/extra/sitemap/generate_sitemap_indices.py b/extra/sitemap/generate_sitemap_indices.py index 9766ac1f..0a5624a1 100755 --- a/extra/sitemap/generate_sitemap_indices.py +++ b/extra/sitemap/generate_sitemap_indices.py @@ -6,7 +6,7 @@ import datetime def index_entity(entity_type, output): - now = datetime.datetime.now().isoformat() + now = datetime.date.today().isoformat() print("""<?xml version="1.0" encoding="UTF-8"?>""", file=output) print("""<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">""", file=output) diff --git a/extra/sitemap/release_url_lists.sh b/extra/sitemap/release_url_lists.sh index 4190011f..d5c8d4ef 100755 --- a/extra/sitemap/release_url_lists.sh +++ b/extra/sitemap/release_url_lists.sh @@ -6,14 +6,14 @@ set -o pipefail # fail if part of a '|' command fails : ${1?' You you did not supply a date argument'} : ${2?' You you did not supply an input file (JSON gzip)'} -if [ -f $2 ] ; then +if [ ! -f $2 ] ; then echo "Input file not found: $2" && exit 1; fi # eg, 2020-08-19 -DATE = "$1" +DATE="$1" # eg, release_export_expanded.json.gz -EXPORT_FILE_GZ = "$2" +EXPORT_FILE_GZ="$2" # filter to fulltext releases only, then filter to only one hit per work zcat $EXPORT_FILE_GZ \ diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index c66c7f0c..9ae2eaa9 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -1158,7 +1158,7 @@ def page_rfc(): @app.route('/robots.txt', methods=['GET']) def page_robots_txt(): - if conf.FATCAT_DOMAIN == "fatcat.wiki": + if app.config['FATCAT_DOMAIN'] == "fatcat.wiki": robots_path = "robots.txt" else: robots_path = "robots.deny_all.txt" @@ -1168,7 +1168,7 @@ def page_robots_txt(): @app.route('/sitemap.xml', methods=['GET']) def page_sitemap_xml(): - if conf.FATCAT_DOMAIN == "fatcat.wiki": - return redirect('/sitemaps/sitemap.xml') - else: - abort(404) + return send_from_directory(os.path.join(app.root_path, 'static'), + "sitemap.xml", + mimetype='text/xml') + |