diff options
-rw-r--r-- | skate/url.go | 2 | ||||
-rw-r--r-- | skate/url_test.go | 4 |
2 files changed, 6 insertions, 0 deletions
diff --git a/skate/url.go b/skate/url.go index 539613d..0f7ad7f 100644 --- a/skate/url.go +++ b/skate/url.go @@ -3,6 +3,7 @@ package skate import ( "index/suffixarray" "regexp" + "strings" ) var ( @@ -22,6 +23,7 @@ func SanitizeURL(s string) string { indices := index.Lookup([]byte("http"), -1) if len(indices) > 1 { s = s[0:indices[1]] // only use the first + s = strings.TrimRight(s, ":") } // http://!!!: diff --git a/skate/url_test.go b/skate/url_test.go index 30462d7..76992c1 100644 --- a/skate/url_test.go +++ b/skate/url_test.go @@ -92,6 +92,10 @@ func TestSanitizeURL(t *testing.T) { `http://aem.asm.org/content/67/6/2766.full.pdf+html`}, {`http://agris.fao.org/agris-search/search.do?recordID=BR2013800115https://doi.org/10.5747/ca.2010.v06.n1.a044`, `http://agris.fao.org/agris-search/search.do?recordID=BR2013800115`}, + {`http://ailab.ist.psu.edu/bcpred/SVMTriP:http://sysbio.unl.edu/SVMTriP/prediction.phpBcell`, + `http://ailab.ist.psu.edu/bcpred/SVMTriP`}, + {`http://aim.bmj.com/content/31/1/23.full.pdf+htmlAcessoem:15Dez`, + `http://aim.bmj.com/content/31/1/23.full.pdf+html`}, } for _, c := range cases { out := SanitizeURL(c.in) |