summaryrefslogtreecommitdiffstats
path: root/extra/sitemap/generate_sitemap_indices.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-04-29 10:03:47 -0700
committerBryan Newbold <bnewbold@archive.org>2021-04-29 10:03:47 -0700
commit6b1f87c12f7d40a3016910b214579a368c747df4 (patch)
treed52e555663a3fe395fe0024098735adaf8e10494 /extra/sitemap/generate_sitemap_indices.py
parent4b152e02d1a0d0d7a9a391ed211ecd6f304d6962 (diff)
downloadfatcat-scholar-6b1f87c12f7d40a3016910b214579a368c747df4.tar.gz
fatcat-scholar-6b1f87c12f7d40a3016910b214579a368c747df4.zip
sitemap generation
Diffstat (limited to 'extra/sitemap/generate_sitemap_indices.py')
-rwxr-xr-xextra/sitemap/generate_sitemap_indices.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/extra/sitemap/generate_sitemap_indices.py b/extra/sitemap/generate_sitemap_indices.py
new file mode 100755
index 0000000..5b5cad2
--- /dev/null
+++ b/extra/sitemap/generate_sitemap_indices.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+import sys
+import glob
+import datetime
+
+def index_entity(entity_type, output):
+
+ now = datetime.date.today().isoformat()
+ print("""<?xml version="1.0" encoding="UTF-8"?>""", file=output)
+ print("""<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">""", file=output)
+
+ for filename in glob.glob(f"sitemap-{entity_type}-*.txt.gz"):
+ print(" <sitemap>", file=output)
+ print(f" <loc>https://scholar.archive.org/{filename}</loc>", file=output)
+ print(f" <lastmod>{now}</lastmod>", file=output)
+ print(" </sitemap>", file=output)
+
+ print("</sitemapindex>", file=output)
+
+def main():
+ with open('sitemap-index-works.xml', 'w') as output:
+ index_entity("works", output)
+
+if __name__=="__main__":
+ main()