aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/url.go2
-rw-r--r--skate/url_test.go4
2 files changed, 6 insertions, 0 deletions
diff --git a/skate/url.go b/skate/url.go
index 539613d..0f7ad7f 100644
--- a/skate/url.go
+++ b/skate/url.go
@@ -3,6 +3,7 @@ package skate
import (
"index/suffixarray"
"regexp"
+ "strings"
)
var (
@@ -22,6 +23,7 @@ func SanitizeURL(s string) string {
indices := index.Lookup([]byte("http"), -1)
if len(indices) > 1 {
s = s[0:indices[1]] // only use the first
+ s = strings.TrimRight(s, ":")
}
// http://!!!:
diff --git a/skate/url_test.go b/skate/url_test.go
index 30462d7..76992c1 100644
--- a/skate/url_test.go
+++ b/skate/url_test.go
@@ -92,6 +92,10 @@ func TestSanitizeURL(t *testing.T) {
`http://aem.asm.org/content/67/6/2766.full.pdf+html`},
{`http://agris.fao.org/agris-search/search.do?recordID=BR2013800115https://doi.org/10.5747/ca.2010.v06.n1.a044`,
`http://agris.fao.org/agris-search/search.do?recordID=BR2013800115`},
+ {`http://ailab.ist.psu.edu/bcpred/SVMTriP:http://sysbio.unl.edu/SVMTriP/prediction.phpBcell`,
+ `http://ailab.ist.psu.edu/bcpred/SVMTriP`},
+ {`http://aim.bmj.com/content/31/1/23.full.pdf+htmlAcessoem:15Dez`,
+ `http://aim.bmj.com/content/31/1/23.full.pdf+html`},
}
for _, c := range cases {
out := SanitizeURL(c.in)