diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-05-12 21:42:15 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-05-12 21:42:15 +0200 |
commit | 935e9aab72be5d7aee081ac6249cfdcf2fb9e7cd (patch) | |
tree | 1e4c3b57b0a609fee6562f430a5c5e1b67fdbe36 | |
parent | ac8ff5599fc2ae33541a16877da006baafd1e341 (diff) | |
download | refcat-935e9aab72be5d7aee081ac6249cfdcf2fb9e7cd.tar.gz refcat-935e9aab72be5d7aee081ac6249cfdcf2fb9e7cd.zip |
sanitize: pre check before building suffix array
-rw-r--r-- | skate/url.go | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/skate/url.go b/skate/url.go index af4ac0c..92c9f61 100644 --- a/skate/url.go +++ b/skate/url.go @@ -19,14 +19,16 @@ var ( // http://agingmind.utdallas.edu/facedb/view/neutral-faces.doi:10.3758/BF03206543. // http://10.3386/w20634https://doi.org/10.3386/w20634 func SanitizeURL(s string) string { - var ( - index = suffixarray.New([]byte(s)) - indices = index.Lookup([]byte("http"), -1) - ) - if len(indices) > 1 { - s = s[0:indices[1]] // only use the first - s = strings.TrimRight(s, ":") - s = strings.TrimRight(s, ";") + if len(s) > 14 && strings.Contains(s[14:], "http") { + var ( + index = suffixarray.New([]byte(s)) + indices = index.Lookup([]byte("http"), -1) + ) + if len(indices) > 1 { + s = s[0:indices[1]] // only use the first + s = strings.TrimRight(s, ":") + s = strings.TrimRight(s, ";") + } } // http://!!!: // http://! |