diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-05-12 21:50:05 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-05-12 21:50:05 +0200 |
commit | ae3f3f3fba70dbf986f3a79a06fef8f09263360b (patch) | |
tree | cfae85ffeccb379259d3eae32a61d965a64430d3 | |
parent | 935e9aab72be5d7aee081ac6249cfdcf2fb9e7cd (diff) | |
download | refcat-ae3f3f3fba70dbf986f3a79a06fef8f09263360b.tar.gz refcat-ae3f3f3fba70dbf986f3a79a06fef8f09263360b.zip |
url: add benchmark
-rw-r--r-- | skate/url.go | 19 | ||||
-rw-r--r-- | skate/url_test.go | 6 |
2 files changed, 15 insertions, 10 deletions
diff --git a/skate/url.go b/skate/url.go index 92c9f61..f0edddf 100644 --- a/skate/url.go +++ b/skate/url.go @@ -19,16 +19,15 @@ var ( // http://agingmind.utdallas.edu/facedb/view/neutral-faces.doi:10.3758/BF03206543. // http://10.3386/w20634https://doi.org/10.3386/w20634 func SanitizeURL(s string) string { - if len(s) > 14 && strings.Contains(s[14:], "http") { - var ( - index = suffixarray.New([]byte(s)) - indices = index.Lookup([]byte("http"), -1) - ) - if len(indices) > 1 { - s = s[0:indices[1]] // only use the first - s = strings.TrimRight(s, ":") - s = strings.TrimRight(s, ";") - } + var ( + // seems to only be about 15% of total time spent + index = suffixarray.New([]byte(s)) + indices = index.Lookup([]byte("http"), -1) + ) + if len(indices) > 1 { + s = s[0:indices[1]] // only use the first + s = strings.TrimRight(s, ":") + s = strings.TrimRight(s, ";") } // http://!!!: // http://! diff --git a/skate/url_test.go b/skate/url_test.go index 8dddb09..c138a0e 100644 --- a/skate/url_test.go +++ b/skate/url_test.go @@ -106,3 +106,9 @@ func TestSanitizeURL(t *testing.T) { } } } + +func BenchmarkSanitizeURL(b *testing.B) { + for n := 0; n < b.N; n++ { + SanitizeURL(`http://acrf.com.au/2012/world-firsthpv-vaccina-tion-plan-will-protect-young-australian-men-from-cancer/`) + } +} |