diff options
-rw-r--r-- | skate/url.go | 5 | ||||
-rw-r--r-- | skate/url_test.go | 4 |
2 files changed, 8 insertions, 1 deletions
diff --git a/skate/url.go b/skate/url.go index b37fe4d..d079e8a 100644 --- a/skate/url.go +++ b/skate/url.go @@ -34,7 +34,10 @@ func SanitizeURL(s string) string { index = suffixarray.New([]byte(s)) indices = index.Lookup([]byte("http"), -1) ) - if len(indices) > 1 { + if len(indices) == 1 { + // ISSN-2177-4129periodicos.ufpel.edu.br/ojs2/index.php/Memoriahttp://dx.doi.org/10.15210/rmr.v8i14.7485 + s = s[indices[0]:] + } else if len(indices) > 1 { // http://ailab.ist.psu.edu/bcpred/SVMTriP:http://sysbio.unl.edu/SVMTriP/prediction.phpBcell s = s[indices[0]:indices[1]] // only use the first s = strings.TrimRight(s, ":") diff --git a/skate/url_test.go b/skate/url_test.go index 8f50162..e1e9419 100644 --- a/skate/url_test.go +++ b/skate/url_test.go @@ -125,6 +125,10 @@ func TestSanitizeURL(t *testing.T) { {`httpwww.sun.com`, `http://www.sun.com`}, {`httpswww.unos.org`, `http://www.unos.org`}, {`ics.uci.edu/pub/ietf/`, `http://ics.uci.edu/pub/ietf/`}, + {`ISSN-2177-4129periodicos.ufpel.edu.br/ojs2/index.php/Memoriahttp://dx.doi.org/10.15210/rmr.v8i14.7485`, + `http://dx.doi.org/10.15210/rmr.v8i14.7485`}, + {`Shttp://hdl.handle.net/1765/1163`, + `http://hdl.handle.net/1765/1163`}, } for _, c := range cases { out := SanitizeURL(c.in) |