summaryrefslogtreecommitdiffstats
path: root/python/fatcat_web/hacks.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_web/hacks.py')
-rw-r--r--python/fatcat_web/hacks.py27
1 files changed, 27 insertions, 0 deletions
diff --git a/python/fatcat_web/hacks.py b/python/fatcat_web/hacks.py
new file mode 100644
index 00000000..9e6f6ab5
--- /dev/null
+++ b/python/fatcat_web/hacks.py
@@ -0,0 +1,27 @@
+
+import re
+
+STRIP_EXTLINK_XML_RE = re.compile(r"<ext-link.*xlink:type=\"simple\">")
+
+def strip_extlink_xml(unstr):
+ unstr = unstr.replace("</ext-link>", "")
+ unstr = STRIP_EXTLINK_XML_RE.sub("", unstr)
+ return unstr
+
+def test_strip_extlink_xml():
+ assert strip_extlink_xml("asdf") == "asdf"
+ assert strip_extlink_xml("""LOCKSS (2014) Available: <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:href="http://lockss.org/" xlink:type="simple">http://lockss.org/</ext-link>. Accessed: 2014 November 1.""") == \
+ """LOCKSS (2014) Available: http://lockss.org/. Accessed: 2014 November 1."""
+
+def wayback_suffix(entity):
+ """
+ Takes a webcapture entity and returns a suffix to be appended to wayback URLs
+ """
+ ret = ""
+ if entity.original_url:
+ if entity.timestamp:
+ ret = entity.timestamp.strftime("%Y%m%d%H%M%S/")
+ else:
+ ret = "*/"
+ ret += entity.original_url
+ return ret