From 935e9aab72be5d7aee081ac6249cfdcf2fb9e7cd Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 12 May 2021 21:42:15 +0200 Subject: sanitize: pre check before building suffix array --- skate/url.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/skate/url.go b/skate/url.go index af4ac0c..92c9f61 100644 --- a/skate/url.go +++ b/skate/url.go @@ -19,14 +19,16 @@ var ( // http://agingmind.utdallas.edu/facedb/view/neutral-faces.doi:10.3758/BF03206543. // http://10.3386/w20634https://doi.org/10.3386/w20634 func SanitizeURL(s string) string { - var ( - index = suffixarray.New([]byte(s)) - indices = index.Lookup([]byte("http"), -1) - ) - if len(indices) > 1 { - s = s[0:indices[1]] // only use the first - s = strings.TrimRight(s, ":") - s = strings.TrimRight(s, ";") + if len(s) > 14 && strings.Contains(s[14:], "http") { + var ( + index = suffixarray.New([]byte(s)) + indices = index.Lookup([]byte("http"), -1) + ) + if len(indices) > 1 { + s = s[0:indices[1]] // only use the first + s = strings.TrimRight(s, ":") + s = strings.TrimRight(s, ";") + } } // http://!!!: // http://! -- cgit v1.2.3