From 016d6d28c24f616897bdb7587205cfe2cc32ec89 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 14 Feb 2020 00:12:23 -0800 Subject: remove arabesque short wayback URL hack --- python/fatcat_tools/importers/shadow.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py index 1a76299e..4cd22775 100644 --- a/python/fatcat_tools/importers/shadow.py +++ b/python/fatcat_tools/importers/shadow.py @@ -155,12 +155,6 @@ class ShadowLibraryImporter(EntityImporter): if u.rel == 'social': u.rel = 'academicsocial' - # new wayback URLs, could replace bad old short wayback URLs (from arabesque bug) - new_wb_urls = [u.url for u in fe.urls] - new_short_wb_urls = ['https://web.archive.org/web/{}/{}'.format( - u.split('/')[4][:12], '/'.join(u.split('/')[5:])) for u in new_wb_urls] - existing.urls = [u for u in existing.urls if not u.url in new_short_wb_urls] - # merge the existing into this one and update merged_urls = {} for u in fe.urls + existing.urls: -- cgit v1.2.3