aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-04-30 14:17:12 -0700
committerBryan Newbold <bnewbold@archive.org>2021-04-30 14:17:12 -0700
commitc367d54fe47cf71ada73fa9ad16495824e07abfc (patch)
treedd07d6ec55b06c85a75f0f7e2663598ca2d57a66
parent0b453b0f74cc88507a7176ec5d749e9bbeb49176 (diff)
downloadfatcat-scholar-c367d54fe47cf71ada73fa9ad16495824e07abfc.tar.gz
fatcat-scholar-c367d54fe47cf71ada73fa9ad16495824e07abfc.zip
sitemaps: not gzip compressed
-rw-r--r--extra/sitemap/README.md2
-rwxr-xr-xextra/sitemap/generate_sitemap_indices.py2
-rwxr-xr-xextra/sitemap/work_urls_query.sh2
3 files changed, 1 insertions, 5 deletions
diff --git a/extra/sitemap/README.md b/extra/sitemap/README.md
index 1e1938a..6c03095 100644
--- a/extra/sitemap/README.md
+++ b/extra/sitemap/README.md
@@ -7,8 +7,6 @@ installed. Run these commands on a production machine.
cd /srv/fatcat_scholar/sitemap
export DATE=`date --iso-8601`
/srv/fatcat_scholar/src/extra/sitemap/work_urls_query.sh $DATE
- rm *.txt.gz
- gzip sitemap-*.txt
/srv/fatcat_scholar/src/extra/sitemap/generate_sitemap_indices.py
## Background
diff --git a/extra/sitemap/generate_sitemap_indices.py b/extra/sitemap/generate_sitemap_indices.py
index 5b5cad2..f1ec494 100755
--- a/extra/sitemap/generate_sitemap_indices.py
+++ b/extra/sitemap/generate_sitemap_indices.py
@@ -10,7 +10,7 @@ def index_entity(entity_type, output):
print("""<?xml version="1.0" encoding="UTF-8"?>""", file=output)
print("""<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">""", file=output)
- for filename in glob.glob(f"sitemap-{entity_type}-*.txt.gz"):
+ for filename in glob.glob(f"sitemap-{entity_type}-*.txt"):
print(" <sitemap>", file=output)
print(f" <loc>https://scholar.archive.org/{filename}</loc>", file=output)
print(f" <lastmod>{now}</lastmod>", file=output)
diff --git a/extra/sitemap/work_urls_query.sh b/extra/sitemap/work_urls_query.sh
index 2971f47..98475ff 100755
--- a/extra/sitemap/work_urls_query.sh
+++ b/extra/sitemap/work_urls_query.sh
@@ -16,5 +16,3 @@ fatcat-cli search scholar 'doc_type:work (fulltext.access_type:ia_file OR fullte
| tr '_' '/' \
| awk '{print "https://scholar.archive.org/" $1}' \
| split --lines 20000 - sitemap-works-$DATE- -d -a 5 --additional-suffix .txt
-
-gzip sitemap-works-*.txt