aboutsummaryrefslogtreecommitdiffstats
path: root/skate/url.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/url.go')
-rw-r--r--skate/url.go13
1 files changed, 5 insertions, 8 deletions
diff --git a/skate/url.go b/skate/url.go
index 0f7ad7f..af4ac0c 100644
--- a/skate/url.go
+++ b/skate/url.go
@@ -19,24 +19,23 @@ var (
// http://agingmind.utdallas.edu/facedb/view/neutral-faces.doi:10.3758/BF03206543.
// http://10.3386/w20634https://doi.org/10.3386/w20634
func SanitizeURL(s string) string {
- index := suffixarray.New([]byte(s))
- indices := index.Lookup([]byte("http"), -1)
+ var (
+ index = suffixarray.New([]byte(s))
+ indices = index.Lookup([]byte("http"), -1)
+ )
if len(indices) > 1 {
s = s[0:indices[1]] // only use the first
s = strings.TrimRight(s, ":")
+ s = strings.TrimRight(s, ";")
}
-
// http://!!!:
// http://!
// http://"
s = patNonWordDomain.ReplaceAllString(s, `$1$3`)
-
// http:///en.m.wikipedia.org/ChenLong
s = patRepeatedHttpSlashes.ReplaceAllString(s, `$1$3`)
-
// http://10.1113/jphysiol.2002.026047
s = patHttpDOI.ReplaceAllString(s, `https://doi.org/$2`)
-
// .diaksestanggal27-03-2017.10.30Wib
// accessedon15
// .Accessed
@@ -45,11 +44,9 @@ func SanitizeURL(s string) string {
if patAccessedOn.MatchString(s) {
s = patAccessedOn.ReplaceAllString(s, `$1`)
}
-
// http://140.120.197.173/Ecology/Download/Timing-MSChart.zipJournalofInsectScience
if patFileExtraSuffix.MatchString(s) {
s = patFileExtraSuffix.ReplaceAllString(s, `$1`)
}
-
return s
}