diff options
Diffstat (limited to 'extra')
| -rw-r--r-- | extra/sitemap/README.md | 2 | ||||
| -rwxr-xr-x | extra/sitemap/generate_sitemap_indices.py | 2 | ||||
| -rwxr-xr-x | extra/sitemap/work_urls_query.sh | 2 | 
3 files changed, 1 insertions, 5 deletions
| diff --git a/extra/sitemap/README.md b/extra/sitemap/README.md index 1e1938a..6c03095 100644 --- a/extra/sitemap/README.md +++ b/extra/sitemap/README.md @@ -7,8 +7,6 @@ installed. Run these commands on a production machine.      cd /srv/fatcat_scholar/sitemap      export DATE=`date --iso-8601`      /srv/fatcat_scholar/src/extra/sitemap/work_urls_query.sh $DATE -    rm *.txt.gz -    gzip sitemap-*.txt      /srv/fatcat_scholar/src/extra/sitemap/generate_sitemap_indices.py  ## Background diff --git a/extra/sitemap/generate_sitemap_indices.py b/extra/sitemap/generate_sitemap_indices.py index 5b5cad2..f1ec494 100755 --- a/extra/sitemap/generate_sitemap_indices.py +++ b/extra/sitemap/generate_sitemap_indices.py @@ -10,7 +10,7 @@ def index_entity(entity_type, output):      print("""<?xml version="1.0" encoding="UTF-8"?>""", file=output)      print("""<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">""", file=output) -    for filename in glob.glob(f"sitemap-{entity_type}-*.txt.gz"): +    for filename in glob.glob(f"sitemap-{entity_type}-*.txt"):          print("  <sitemap>", file=output)          print(f"    <loc>https://scholar.archive.org/{filename}</loc>", file=output)          print(f"    <lastmod>{now}</lastmod>", file=output) diff --git a/extra/sitemap/work_urls_query.sh b/extra/sitemap/work_urls_query.sh index 2971f47..98475ff 100755 --- a/extra/sitemap/work_urls_query.sh +++ b/extra/sitemap/work_urls_query.sh @@ -16,5 +16,3 @@ fatcat-cli search scholar 'doc_type:work (fulltext.access_type:ia_file OR fullte      | tr '_' '/' \      | awk '{print "https://scholar.archive.org/" $1}' \      | split --lines 20000 - sitemap-works-$DATE- -d -a 5 --additional-suffix .txt - -gzip sitemap-works-*.txt | 
