diff options
-rw-r--r-- | skate/url.go | 2 | ||||
-rw-r--r-- | skate/url_test.go | 14 |
2 files changed, 15 insertions, 1 deletions
diff --git a/skate/url.go b/skate/url.go index ed36b73..3ca4163 100644 --- a/skate/url.go +++ b/skate/url.go @@ -8,7 +8,7 @@ var ( patNonWordDomain = regexp.MustCompile(`(https?:\/\/)([^\w]*)(.*)`) patRepeatedHttpSlashes = regexp.MustCompile(`(https?:\/\/)(\/)*(.*)`) patHttpDOI = regexp.MustCompile(`(https?:\/\/)(10[.][0-9]{1,8}\/.*)`) - patAccessedOn = regexp.MustCompile(`(?iU)(.*)[.]?(accessedon|consultado|diaksestanggal|diaksespadatanggal|acesso|accessoem|accessed).*$`) + patAccessedOn = regexp.MustCompile(`(?iU)(.*)[.;]?(abgerufen|adresinden|sayfasındanulaşıl|accessedon|consultéle|consultad|diaksestanggal|diaksespadatanggal|lastaccessed|acesso|accessoem|accessed).*$`) patFileExtraSuffix = regexp.MustCompile(`(http.*[.](zip|pdf|html|doc|docx|rar))(.*)$`) ) diff --git a/skate/url_test.go b/skate/url_test.go index 5b3992e..ffa315c 100644 --- a/skate/url_test.go +++ b/skate/url_test.go @@ -29,6 +29,20 @@ func TestSanitizeURL(t *testing.T) { `http://129.3.20.41/eps/hew/papers/0512/0512001.pdf`}, {`http://140.120.197.173/Ecology/Download/Timing-MSChart.zipJournalofInsectScience`, `http://140.120.197.173/Ecology/Download/Timing-MSChart.zip`}, + {`141.213.232.243/bitstream/handle/2027.42/86336/apterc_1.pdf?sequence=1`, + `141.213.232.243/bitstream/handle/2027.42/86336/apterc_1.pdf?sequence=1`}, + {`http://141.232.10.32/pm/recover/recover_docs/perf_measures/062812_rec_pm_scs_salinity_flbay.pdfRECOVER`, + `http://141.232.10.32/pm/recover/recover_docs/perf_measures/062812_rec_pm_scs_salinity_flbay.pdf`}, + {`http://2010.census.gov/news/releases/operations/cb11-cn125.html.lastaccessed4`, + `http://2010.census.gov/news/releases/operations/cb11-cn125.html`}, + {`http://2014hit.blogspot.com.tr/2014/12/george-gerbnerin-tv-arastrmas-ve-ekme.htmladresindenedinilmiştir`, + `http://2014hit.blogspot.com.tr/2014/12/george-gerbnerin-tv-arastrmas-ve-ekme.html`}, + {`http://2015.ses.org.tr/wp-ontent/uploads/toplumsalcinsiyetrolleri.pdfsayfasındanulaşıl-mıştır`, + `http://2015.ses.org.tr/wp-ontent/uploads/toplumsalcinsiyetrolleri.pdf`}, + {`http://2015.veneziabiennale-japanpavilion.jp/en/Consultadael20deoctubrealas14`, + `http://2015.veneziabiennale-japanpavilion.jp/en/`}, + {`http://-annalsofneurosciences.org/journal/index.php/annal/article/view/43/67`, + `http://annalsofneurosciences.org/journal/index.php/annal/article/view/43/67`}, } for _, c := range cases { out := SanitizeURL(c.in) |