aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-05-14 01:16:26 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-05-14 01:16:26 +0200
commit9faa41e6399ac2ab24b6a10088b1ec0157665959 (patch)
treef82b5736312b8f2dd7c16fee294ea90f0ed791a5
parent6b8f2f8d448cb3b89d8f7ad147e6a5eb1d3c4da9 (diff)
downloadrefcat-9faa41e6399ac2ab24b6a10088b1ec0157665959.tar.gz
refcat-9faa41e6399ac2ab24b6a10088b1ec0157665959.zip
cleanup: tweaks
-rw-r--r--skate/url.go5
-rw-r--r--skate/url_test.go4
2 files changed, 8 insertions, 1 deletions
diff --git a/skate/url.go b/skate/url.go
index b37fe4d..d079e8a 100644
--- a/skate/url.go
+++ b/skate/url.go
@@ -34,7 +34,10 @@ func SanitizeURL(s string) string {
index = suffixarray.New([]byte(s))
indices = index.Lookup([]byte("http"), -1)
)
- if len(indices) > 1 {
+ if len(indices) == 1 {
+ // ISSN-2177-4129periodicos.ufpel.edu.br/ojs2/index.php/Memoriahttp://dx.doi.org/10.15210/rmr.v8i14.7485
+ s = s[indices[0]:]
+ } else if len(indices) > 1 {
// http://ailab.ist.psu.edu/bcpred/SVMTriP:http://sysbio.unl.edu/SVMTriP/prediction.phpBcell
s = s[indices[0]:indices[1]] // only use the first
s = strings.TrimRight(s, ":")
diff --git a/skate/url_test.go b/skate/url_test.go
index 8f50162..e1e9419 100644
--- a/skate/url_test.go
+++ b/skate/url_test.go
@@ -125,6 +125,10 @@ func TestSanitizeURL(t *testing.T) {
{`httpwww.sun.com`, `http://www.sun.com`},
{`httpswww.unos.org`, `http://www.unos.org`},
{`ics.uci.edu/pub/ietf/`, `http://ics.uci.edu/pub/ietf/`},
+ {`ISSN-2177-4129periodicos.ufpel.edu.br/ojs2/index.php/Memoriahttp://dx.doi.org/10.15210/rmr.v8i14.7485`,
+ `http://dx.doi.org/10.15210/rmr.v8i14.7485`},
+ {`Shttp://hdl.handle.net/1765/1163`,
+ `http://hdl.handle.net/1765/1163`},
}
for _, c := range cases {
out := SanitizeURL(c.in)