diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2021-04-30 14:17:12 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2021-04-30 14:17:12 -0700 | 
| commit | c367d54fe47cf71ada73fa9ad16495824e07abfc (patch) | |
| tree | dd07d6ec55b06c85a75f0f7e2663598ca2d57a66 | |
| parent | 0b453b0f74cc88507a7176ec5d749e9bbeb49176 (diff) | |
| download | fatcat-scholar-c367d54fe47cf71ada73fa9ad16495824e07abfc.tar.gz fatcat-scholar-c367d54fe47cf71ada73fa9ad16495824e07abfc.zip | |
sitemaps: not gzip compressed
| -rw-r--r-- | extra/sitemap/README.md | 2 | ||||
| -rwxr-xr-x | extra/sitemap/generate_sitemap_indices.py | 2 | ||||
| -rwxr-xr-x | extra/sitemap/work_urls_query.sh | 2 | 
3 files changed, 1 insertions, 5 deletions
| diff --git a/extra/sitemap/README.md b/extra/sitemap/README.md index 1e1938a..6c03095 100644 --- a/extra/sitemap/README.md +++ b/extra/sitemap/README.md @@ -7,8 +7,6 @@ installed. Run these commands on a production machine.      cd /srv/fatcat_scholar/sitemap      export DATE=`date --iso-8601`      /srv/fatcat_scholar/src/extra/sitemap/work_urls_query.sh $DATE -    rm *.txt.gz -    gzip sitemap-*.txt      /srv/fatcat_scholar/src/extra/sitemap/generate_sitemap_indices.py  ## Background diff --git a/extra/sitemap/generate_sitemap_indices.py b/extra/sitemap/generate_sitemap_indices.py index 5b5cad2..f1ec494 100755 --- a/extra/sitemap/generate_sitemap_indices.py +++ b/extra/sitemap/generate_sitemap_indices.py @@ -10,7 +10,7 @@ def index_entity(entity_type, output):      print("""<?xml version="1.0" encoding="UTF-8"?>""", file=output)      print("""<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">""", file=output) -    for filename in glob.glob(f"sitemap-{entity_type}-*.txt.gz"): +    for filename in glob.glob(f"sitemap-{entity_type}-*.txt"):          print("  <sitemap>", file=output)          print(f"    <loc>https://scholar.archive.org/{filename}</loc>", file=output)          print(f"    <lastmod>{now}</lastmod>", file=output) diff --git a/extra/sitemap/work_urls_query.sh b/extra/sitemap/work_urls_query.sh index 2971f47..98475ff 100755 --- a/extra/sitemap/work_urls_query.sh +++ b/extra/sitemap/work_urls_query.sh @@ -16,5 +16,3 @@ fatcat-cli search scholar 'doc_type:work (fulltext.access_type:ia_file OR fullte      | tr '_' '/' \      | awk '{print "https://scholar.archive.org/" $1}' \      | split --lines 20000 - sitemap-works-$DATE- -d -a 5 --additional-suffix .txt - -gzip sitemap-works-*.txt | 
