diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-05-12 21:10:02 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-05-12 21:10:02 +0200 |
commit | 506b959daa6a822378570826c670a658a1cf75d4 (patch) | |
tree | b8c1f1903b0298ac5095883f403258ca3dbc9ad1 | |
parent | d28e0ceb6eb1b7e96236d9b311a6d4c9f2fa0f73 (diff) | |
download | refcat-506b959daa6a822378570826c670a658a1cf75d4.tar.gz refcat-506b959daa6a822378570826c670a658a1cf75d4.zip |
url cleanup: more tests
-rw-r--r-- | skate/url.go | 2 | ||||
-rw-r--r-- | skate/url_test.go | 4 |
2 files changed, 6 insertions, 0 deletions
diff --git a/skate/url.go b/skate/url.go index 539613d..0f7ad7f 100644 --- a/skate/url.go +++ b/skate/url.go @@ -3,6 +3,7 @@ package skate import ( "index/suffixarray" "regexp" + "strings" ) var ( @@ -22,6 +23,7 @@ func SanitizeURL(s string) string { indices := index.Lookup([]byte("http"), -1) if len(indices) > 1 { s = s[0:indices[1]] // only use the first + s = strings.TrimRight(s, ":") } // http://!!!: diff --git a/skate/url_test.go b/skate/url_test.go index 30462d7..76992c1 100644 --- a/skate/url_test.go +++ b/skate/url_test.go @@ -92,6 +92,10 @@ func TestSanitizeURL(t *testing.T) { `http://aem.asm.org/content/67/6/2766.full.pdf+html`}, {`http://agris.fao.org/agris-search/search.do?recordID=BR2013800115https://doi.org/10.5747/ca.2010.v06.n1.a044`, `http://agris.fao.org/agris-search/search.do?recordID=BR2013800115`}, + {`http://ailab.ist.psu.edu/bcpred/SVMTriP:http://sysbio.unl.edu/SVMTriP/prediction.phpBcell`, + `http://ailab.ist.psu.edu/bcpred/SVMTriP`}, + {`http://aim.bmj.com/content/31/1/23.full.pdf+htmlAcessoem:15Dez`, + `http://aim.bmj.com/content/31/1/23.full.pdf+html`}, } for _, c := range cases { out := SanitizeURL(c.in) |