import re STRIP_EXTLINK_XML_RE = re.compile(r"") def strip_extlink_xml(unstr): unstr = unstr.replace("", "") unstr = STRIP_EXTLINK_XML_RE.sub("", unstr) return unstr def test_strip_extlink_xml(): assert strip_extlink_xml("asdf") == "asdf" assert strip_extlink_xml("""LOCKSS (2014) Available: http://lockss.org/. Accessed: 2014 November 1.""") == \ """LOCKSS (2014) Available: http://lockss.org/. Accessed: 2014 November 1.""" def wayback_suffix(entity): """ Takes a webcapture entity and returns a suffix to be appended to wayback URLs """ ret = "" if entity.original_url: if entity.timestamp: ret = entity.timestamp.strftime("%Y%m%d%H%M%S/") else: ret = "*/" ret += entity.original_url return ret