aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/url.go2
-rw-r--r--skate/url_test.go14
2 files changed, 15 insertions, 1 deletions
diff --git a/skate/url.go b/skate/url.go
index ed36b73..3ca4163 100644
--- a/skate/url.go
+++ b/skate/url.go
@@ -8,7 +8,7 @@ var (
patNonWordDomain = regexp.MustCompile(`(https?:\/\/)([^\w]*)(.*)`)
patRepeatedHttpSlashes = regexp.MustCompile(`(https?:\/\/)(\/)*(.*)`)
patHttpDOI = regexp.MustCompile(`(https?:\/\/)(10[.][0-9]{1,8}\/.*)`)
- patAccessedOn = regexp.MustCompile(`(?iU)(.*)[.]?(accessedon|consultado|diaksestanggal|diaksespadatanggal|acesso|accessoem|accessed).*$`)
+ patAccessedOn = regexp.MustCompile(`(?iU)(.*)[.;]?(abgerufen|adresinden|sayfasındanulaşıl|accessedon|consultéle|consultad|diaksestanggal|diaksespadatanggal|lastaccessed|acesso|accessoem|accessed).*$`)
patFileExtraSuffix = regexp.MustCompile(`(http.*[.](zip|pdf|html|doc|docx|rar))(.*)$`)
)
diff --git a/skate/url_test.go b/skate/url_test.go
index 5b3992e..ffa315c 100644
--- a/skate/url_test.go
+++ b/skate/url_test.go
@@ -29,6 +29,20 @@ func TestSanitizeURL(t *testing.T) {
`http://129.3.20.41/eps/hew/papers/0512/0512001.pdf`},
{`http://140.120.197.173/Ecology/Download/Timing-MSChart.zipJournalofInsectScience`,
`http://140.120.197.173/Ecology/Download/Timing-MSChart.zip`},
+ {`141.213.232.243/bitstream/handle/2027.42/86336/apterc_1.pdf?sequence=1`,
+ `141.213.232.243/bitstream/handle/2027.42/86336/apterc_1.pdf?sequence=1`},
+ {`http://141.232.10.32/pm/recover/recover_docs/perf_measures/062812_rec_pm_scs_salinity_flbay.pdfRECOVER`,
+ `http://141.232.10.32/pm/recover/recover_docs/perf_measures/062812_rec_pm_scs_salinity_flbay.pdf`},
+ {`http://2010.census.gov/news/releases/operations/cb11-cn125.html.lastaccessed4`,
+ `http://2010.census.gov/news/releases/operations/cb11-cn125.html`},
+ {`http://2014hit.blogspot.com.tr/2014/12/george-gerbnerin-tv-arastrmas-ve-ekme.htmladresindenedinilmiştir`,
+ `http://2014hit.blogspot.com.tr/2014/12/george-gerbnerin-tv-arastrmas-ve-ekme.html`},
+ {`http://2015.ses.org.tr/wp-ontent/uploads/toplumsalcinsiyetrolleri.pdfsayfasındanulaşıl-mıştır`,
+ `http://2015.ses.org.tr/wp-ontent/uploads/toplumsalcinsiyetrolleri.pdf`},
+ {`http://2015.veneziabiennale-japanpavilion.jp/en/Consultadael20deoctubrealas14`,
+ `http://2015.veneziabiennale-japanpavilion.jp/en/`},
+ {`http://-annalsofneurosciences.org/journal/index.php/annal/article/view/43/67`,
+ `http://annalsofneurosciences.org/journal/index.php/annal/article/view/43/67`},
}
for _, c := range cases {
out := SanitizeURL(c.in)