diff options
-rw-r--r-- | skate/url.go | 19 | ||||
-rw-r--r-- | skate/url_test.go | 6 |
2 files changed, 15 insertions, 10 deletions
diff --git a/skate/url.go b/skate/url.go index 92c9f61..f0edddf 100644 --- a/skate/url.go +++ b/skate/url.go @@ -19,16 +19,15 @@ var ( // http://agingmind.utdallas.edu/facedb/view/neutral-faces.doi:10.3758/BF03206543. // http://10.3386/w20634https://doi.org/10.3386/w20634 func SanitizeURL(s string) string { - if len(s) > 14 && strings.Contains(s[14:], "http") { - var ( - index = suffixarray.New([]byte(s)) - indices = index.Lookup([]byte("http"), -1) - ) - if len(indices) > 1 { - s = s[0:indices[1]] // only use the first - s = strings.TrimRight(s, ":") - s = strings.TrimRight(s, ";") - } + var ( + // seems to only be about 15% of total time spent + index = suffixarray.New([]byte(s)) + indices = index.Lookup([]byte("http"), -1) + ) + if len(indices) > 1 { + s = s[0:indices[1]] // only use the first + s = strings.TrimRight(s, ":") + s = strings.TrimRight(s, ";") } // http://!!!: // http://! diff --git a/skate/url_test.go b/skate/url_test.go index 8dddb09..c138a0e 100644 --- a/skate/url_test.go +++ b/skate/url_test.go @@ -106,3 +106,9 @@ func TestSanitizeURL(t *testing.T) { } } } + +func BenchmarkSanitizeURL(b *testing.B) { + for n := 0; n < b.N; n++ { + SanitizeURL(`http://acrf.com.au/2012/world-firsthpv-vaccina-tion-plan-will-protect-young-australian-men-from-cancer/`) + } +} |