aboutsummaryrefslogtreecommitdiffstats
path: root/extra/sitemap/transform_access_url.py
diff options
context:
space:
mode:
Diffstat (limited to 'extra/sitemap/transform_access_url.py')
-rwxr-xr-xextra/sitemap/transform_access_url.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/extra/sitemap/transform_access_url.py b/extra/sitemap/transform_access_url.py
new file mode 100755
index 0000000..b00bd82
--- /dev/null
+++ b/extra/sitemap/transform_access_url.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+import sys
+
+# NOTE: copied from fatcat_scholar/hacks.py
+def make_access_redirect_url(access_type: str, access_url: str) -> str:
+ if access_type == "wayback" and "://web.archive.org/" in access_url:
+ segments = access_url.split("/")
+ dt = segments[4]
+ original_url = "/".join(segments[5:])
+ return f"https://scholar.archive.org/access/wayback/{dt}/{original_url}"
+ elif access_type == "ia_file" and "://archive.org/download/" in access_url:
+ suffix = "/".join(access_url.split("/")[4:])
+ return f"https://scholar.archive.org/access/ia_file/{suffix}"
+ else:
+ return access_url
+
+def run() -> None:
+ for line in sys.stdin:
+ (access_type, access_url) = line.strip().split('\t')
+ print(make_access_redirect_url(access_type, access_url))
+
+if __name__ == "__main__":
+ run()