aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-12 21:50:05 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-12 21:50:05 +0200
commitae3f3f3fba70dbf986f3a79a06fef8f09263360b (patch)
treecfae85ffeccb379259d3eae32a61d965a64430d3 /skate
parent935e9aab72be5d7aee081ac6249cfdcf2fb9e7cd (diff)
downloadrefcat-ae3f3f3fba70dbf986f3a79a06fef8f09263360b.tar.gz
refcat-ae3f3f3fba70dbf986f3a79a06fef8f09263360b.zip
url: add benchmark
Diffstat (limited to 'skate')
-rw-r--r--skate/url.go19
-rw-r--r--skate/url_test.go6
2 files changed, 15 insertions, 10 deletions
diff --git a/skate/url.go b/skate/url.go
index 92c9f61..f0edddf 100644
--- a/skate/url.go
+++ b/skate/url.go
@@ -19,16 +19,15 @@ var (
// http://agingmind.utdallas.edu/facedb/view/neutral-faces.doi:10.3758/BF03206543.
// http://10.3386/w20634https://doi.org/10.3386/w20634
func SanitizeURL(s string) string {
- if len(s) > 14 && strings.Contains(s[14:], "http") {
- var (
- index = suffixarray.New([]byte(s))
- indices = index.Lookup([]byte("http"), -1)
- )
- if len(indices) > 1 {
- s = s[0:indices[1]] // only use the first
- s = strings.TrimRight(s, ":")
- s = strings.TrimRight(s, ";")
- }
+ var (
+ // seems to only be about 15% of total time spent
+ index = suffixarray.New([]byte(s))
+ indices = index.Lookup([]byte("http"), -1)
+ )
+ if len(indices) > 1 {
+ s = s[0:indices[1]] // only use the first
+ s = strings.TrimRight(s, ":")
+ s = strings.TrimRight(s, ";")
}
// http://!!!:
// http://!
diff --git a/skate/url_test.go b/skate/url_test.go
index 8dddb09..c138a0e 100644
--- a/skate/url_test.go
+++ b/skate/url_test.go
@@ -106,3 +106,9 @@ func TestSanitizeURL(t *testing.T) {
}
}
}
+
+func BenchmarkSanitizeURL(b *testing.B) {
+ for n := 0; n < b.N; n++ {
+ SanitizeURL(`http://acrf.com.au/2012/world-firsthpv-vaccina-tion-plan-will-protect-young-australian-men-from-cancer/`)
+ }
+}