diff options
-rw-r--r-- | extra/sitemap/README.md | 2 | ||||
-rwxr-xr-x | extra/sitemap/generate_sitemap_indices.py | 2 | ||||
-rwxr-xr-x | extra/sitemap/work_urls_query.sh | 2 |
3 files changed, 1 insertions, 5 deletions
diff --git a/extra/sitemap/README.md b/extra/sitemap/README.md index 1e1938a..6c03095 100644 --- a/extra/sitemap/README.md +++ b/extra/sitemap/README.md @@ -7,8 +7,6 @@ installed. Run these commands on a production machine. cd /srv/fatcat_scholar/sitemap export DATE=`date --iso-8601` /srv/fatcat_scholar/src/extra/sitemap/work_urls_query.sh $DATE - rm *.txt.gz - gzip sitemap-*.txt /srv/fatcat_scholar/src/extra/sitemap/generate_sitemap_indices.py ## Background diff --git a/extra/sitemap/generate_sitemap_indices.py b/extra/sitemap/generate_sitemap_indices.py index 5b5cad2..f1ec494 100755 --- a/extra/sitemap/generate_sitemap_indices.py +++ b/extra/sitemap/generate_sitemap_indices.py @@ -10,7 +10,7 @@ def index_entity(entity_type, output): print("""<?xml version="1.0" encoding="UTF-8"?>""", file=output) print("""<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">""", file=output) - for filename in glob.glob(f"sitemap-{entity_type}-*.txt.gz"): + for filename in glob.glob(f"sitemap-{entity_type}-*.txt"): print(" <sitemap>", file=output) print(f" <loc>https://scholar.archive.org/{filename}</loc>", file=output) print(f" <lastmod>{now}</lastmod>", file=output) diff --git a/extra/sitemap/work_urls_query.sh b/extra/sitemap/work_urls_query.sh index 2971f47..98475ff 100755 --- a/extra/sitemap/work_urls_query.sh +++ b/extra/sitemap/work_urls_query.sh @@ -16,5 +16,3 @@ fatcat-cli search scholar 'doc_type:work (fulltext.access_type:ia_file OR fullte | tr '_' '/' \ | awk '{print "https://scholar.archive.org/" $1}' \ | split --lines 20000 - sitemap-works-$DATE- -d -a 5 --additional-suffix .txt - -gzip sitemap-works-*.txt |