1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
package skate
import "testing"
func TestSanitizeURL(t *testing.T) {
var cases = []struct {
in string
out string
}{
{"", ""},
{"a", ""},
{"???", ""},
{"???***", ""},
{"???***___123", ""},
{"http://abc.com", "http://abc.com"},
{"http://!!abc.com", "http://abc.com"},
{`http://"www.phaelos.com/oubre.html`, `http://www.phaelos.com/oubre.html`},
{`http://!www.rkm=journal.de/archives/13383`, `http://www.rkm=journal.de/archives/13383`},
{`http:///en.m.wikipedia.org/ChenLong`, `http://en.m.wikipedia.org/ChenLong`},
{`http://10.1111/joim.12348`, `https://doi.org/10.1111/joim.12348`},
{`http://10.1113/jphysiol.2002.026047`, `https://doi.org/10.1113/jphysiol.2002.026047`},
{`http://10.30.3.16/moodle/course/view.php?id=25`, `http://10.30.3.16/moodle/course/view.php?id=25`},
{`http://10.3266/RevEspEndocrinolPediatr.pre2015.Nov.330`, `https://doi.org/10.3266/RevEspEndocrinolPediatr.pre2015.Nov.330`},
{`http://120.107.180.177/1832/9901/099-2-07p.pdf.Accessed`, `http://120.107.180.177/1832/9901/099-2-07p.pdf`},
{`http://120cartas.ig.com.br/wp/maio-de-2008-um-aniversario-de-120-anos/.Acessoem:set`,
`http://120cartas.ig.com.br/wp/maio-de-2008-um-aniversario-de-120-anos/`},
{`http://122.53.86.125/NNS/8thNNS.pdf.Accessed`, `http://122.53.86.125/NNS/8thNNS.pdf`},
{`http://122.53.86.125/facts_figures2011.pdf.Accessedon`,
`http://122.53.86.125/facts_figures2011.pdf`},
{`http://129.3.20.41/eps/fin/papers/0507/0507016.pdf.diaksespadatanggal23Januari`,
`http://129.3.20.41/eps/fin/papers/0507/0507016.pdf`},
{`http://129.3.20.41/eps/hew/papers/0512/0512001.pdfAccessed1`,
`http://129.3.20.41/eps/hew/papers/0512/0512001.pdf`},
{`http://140.120.197.173/Ecology/Download/Timing-MSChart.zipJournalofInsectScience`,
`http://140.120.197.173/Ecology/Download/Timing-MSChart.zip`},
{`141.213.232.243/bitstream/handle/2027.42/86336/apterc_1.pdf?sequence=1`,
`http://141.213.232.243/bitstream/handle/2027.42/86336/apterc_1.pdf?sequence=1`},
{`http://141.232.10.32/pm/recover/recover_docs/perf_measures/062812_rec_pm_scs_salinity_flbay.pdfRECOVER`,
`http://141.232.10.32/pm/recover/recover_docs/perf_measures/062812_rec_pm_scs_salinity_flbay.pdf`},
{`http://2010.census.gov/news/releases/operations/cb11-cn125.html.lastaccessed4`,
`http://2010.census.gov/news/releases/operations/cb11-cn125.html`},
{`http://2014hit.blogspot.com.tr/2014/12/george-gerbnerin-tv-arastrmas-ve-ekme.htmladresindenedinilmiştir`,
`http://2014hit.blogspot.com.tr/2014/12/george-gerbnerin-tv-arastrmas-ve-ekme.html`},
{`http://2015.ses.org.tr/wp-ontent/uploads/toplumsalcinsiyetrolleri.pdfsayfasındanulaşıl-mıştır`,
`http://2015.ses.org.tr/wp-ontent/uploads/toplumsalcinsiyetrolleri.pdf`},
{`http://2015.veneziabiennale-japanpavilion.jp/en/Consultadael20deoctubrealas14`,
`http://2015.veneziabiennale-japanpavilion.jp/en/`},
{`http://-annalsofneurosciences.org/journal/index.php/annal/article/view/43/67`,
`http://annalsofneurosciences.org/journal/index.php/annal/article/view/43/67`},
{`http://-www.gifted.uconn.edu/Siegle/Dissertations/Eric%20Mann.pdf.Diunduh15`,
`http://www.gifted.uconn.edu/Siegle/Dissertations/Eric%20Mann.pdf`},
{`http://-www.suparlan.com/pages/posts/.Diakses15Pebruari`,
`http://www.suparlan.com/pages/posts/`},
{`http://...books.google.com/books?isbn=0873552601`,
`http://books.google.com/books?isbn=0873552601`},
{`http://.R-project.org`,
`http://R-project.org`},
{`http://.amazona.com/academia.edu.documents//autogestion.pdfRecibido:24demayode2017`,
`http://amazona.com/academia.edu.documents//autogestion.pdf`},
{`http://10.1007/s00779-012-0615-1`,
`https://doi.org/10.1007/s00779-012-0615-1`},
{`http://20.132.48.254/PDFS/ED495503.pdf.Accessedat`,
`http://20.132.48.254/PDFS/ED495503.pdf`},
{`http://82.198.195.82/presse/mitteilungen/2007/Stellungnahme_dsn_BDAG_Internet_20071219.pdf,abgerufenam19`,
`http://82.198.195.82/presse/mitteilungen/2007/Stellungnahme_dsn_BDAG_Internet_20071219.pdf`},
{`http://CRAN.R-project.org/package=RTextTools.Zugegriffen:6Juni`,
`http://CRAN.R-project.org/package=RTextTools`},
{`http://189.28.128.99/provab/docs/geral/edital_28_02_2012_resultado_provab.pdf.Acessoem19/11/2014`,
`http://189.28.128.99/provab/docs/geral/edital_28_02_2012_resultado_provab.pdf`},
{`http://195.20.232.142/img/Schwerpunktnewsletter_Oesterreich_Bibliotheken.pdf.Stanzdnia13.04`,
`http://195.20.232.142/img/Schwerpunktnewsletter_Oesterreich_Bibliotheken.pdf`},
{`http://aalc07.psu.edu/papers/jn_typol_class3.pdf.Stanford`,
`http://aalc07.psu.edu/papers/jn_typol_class3.pdf`},
{`http://aboriginalhealth.flinders.edu.au/Newsletters/2010/Downloads/SHRP%20FINAL%20REPORT%20PART%20TWO%20July%202009.pdfAccessed14/12/2012`,
`http://aboriginalhealth.flinders.edu.au/Newsletters/2010/Downloads/SHRP%20FINAL%20REPORT%20PART%20TWO%20July%202009.pdf`},
{`http://about-air.ru/svojstva-vozduha/davlenie-vozduha/normalnoe-atmosfernoe-davlenie.html,доступ-свободный,датаобращения15.04.2017`,
`http://about-air.ru/svojstva-vozduha/davlenie-vozduha/normalnoe-atmosfernoe-davlenie.html`},
{`http://acl.ldc.upenn.edu/W/W98/W98-1120.pdfDateofaccess`,
`http://acl.ldc.upenn.edu/W/W98/W98-1120.pdf`},
{`http://acl.mit.edu/pa-pers/2012-uber-conference-submitted.pdf//49thIEEE`,
`http://acl.mit.edu/pa-pers/2012-uber-conference-submitted.pdf`},
{`http://acoss.org.au/policy/community_services/emergency_relief_handbook/,accessed1st`,
`http://acoss.org.au/policy/community_services/emergency_relief_handbook/`},
{`http://acrf.com.au/2012/world-firsthpv-vaccina-tion-plan-will-protect-young-australian-men-from-cancer/.Ac-cessedon06/12/2016`,
`http://acrf.com.au/2012/world-firsthpv-vaccina-tion-plan-will-protect-young-australian-men-from-cancer/`},
{`http://acta.uta.fi/pdf/951-44-4701-6.pdfRHEINDORF`,
`http://acta.uta.fi/pdf/951-44-4701-6.pdf`},
{`http://admi.net/jo/20080423/ECE-C0771649A.html.Pageconsultéele25septembre`,
`http://admi.net/jo/20080423/ECE-C0771649A.html`},
{`http://admin.localgov.co.uk/his_localgov/view/images/uploaded/Image/childrensblackpool.PDF.Lastaccess8`,
`http://admin.localgov.co.uk/his_localgov/view/images/uploaded/Image/childrensblackpool.PDF`},
{`http://aec.ifas.ufl.edu/abrams/step/critical_litreview.pdfİndirme`,
`http://aec.ifas.ufl.edu/abrams/step/critical_litreview.pdf`},
{`http://aem.asm.org/Downloadedfrom`, `http://aem.asm.org/`},
{`http://aem.asm.org/content/67/6/2766.full.pdf+htmlWITTWER`,
`http://aem.asm.org/content/67/6/2766.full.pdf+html`},
{`http://agris.fao.org/agris-search/search.do?recordID=BR2013800115https://doi.org/10.5747/ca.2010.v06.n1.a044`,
`http://agris.fao.org/agris-search/search.do?recordID=BR2013800115`},
{`http://ailab.ist.psu.edu/bcpred/SVMTriP:http://sysbio.unl.edu/SVMTriP/prediction.phpBcell`,
`http://ailab.ist.psu.edu/bcpred/SVMTriP`},
{`http://aim.bmj.com/content/31/1/23.full.pdf+htmlAcessoem:15Dez`,
`http://aim.bmj.com/content/31/1/23.full.pdf+html`},
{`http://ainfo.cnptia.embrapa.br/digital/bitstream/CNPAT-2010/8608/1/Ci-017.pdfAcessed06`,
`http://ainfo.cnptia.embrapa.br/digital/bitstream/CNPAT-2010/8608/1/Ci-017.pdf`},
{`12s`, ``},
{`12spoaspdop`, ``},
{`0.0.www.epcglobalinc.org/standards_technology/Secure/v1.0/UHF-class1.pdf`,
`http://www.epcglobalinc.org/standards_technology/Secure/v1.0/UHF-class1.pdf`},
{`CRAN.R-project.org/package=vegan`,
`http://CRAN.R-project.org/package=vegan`},
{`CRD42014009228.www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42014009228`,
`http://www.crd.york.ac.uk/PROSPERO/display_record.asp?ID=CRD42014009228`},
{`ftp://ftp.ncbi.nih.gov/genomes/Bacteria/`,
`ftp://ftp.ncbi.nih.gov/genomes/Bacteria/`},
{`ftp-eng.cisco.com/sobgp/index.html`,
`http://ftp-eng.cisco.com/sobgp/index.html`},
{`ftp.cdc.gov/pub/Publications/mmwr/SS/SS4703.pdf`,
`http://ftp.cdc.gov/pub/Publications/mmwr/SS/SS4703.pdf`},
{`ftpftp.inria.fr`,
`http://ftpftp.inria.fr`},
{`http.bglink.com/personal/batakovic`, `http://bglink.com/personal/batakovic`},
{`http.kalsel.bps.go.id`, `http://kalsel.bps.go.id`},
{`http.www.admhmao.ru/people/frame.htm`, `http://www.admhmao.ru/people/frame.htm`},
{`http.worldbank.org/sq`, `http://worldbank.org/sq`},
{`httpwww.sun.com`, `http://www.sun.com`},
{`httpswww.unos.org`, `http://www.unos.org`},
{`ics.uci.edu/pub/ietf/`, `http://ics.uci.edu/pub/ietf/`},
{`ISSN-2177-4129periodicos.ufpel.edu.br/ojs2/index.php/Memoriahttp://dx.doi.org/10.15210/rmr.v8i14.7485`,
`http://dx.doi.org/10.15210/rmr.v8i14.7485`},
{`Shttp://hdl.handle.net/1765/1163`,
`http://hdl.handle.net/1765/1163`},
{`cdec.water.ca.gov/misc/DailyPrecip.html`,
`http://cdec.water.ca.gov/misc/DailyPrecip.html`},
{`https://www.ibge.gov.br/estatisticas/sociais/populacao/9103-estimativas-de-populacao.html?=&t=resultados.Accessed22`,
`https://www.ibge.gov.br/estatisticas/sociais/populacao/9103-estimativas-de-populacao.html?=&t=resultados`},
{`https://doi.org/10.1101/2020.06.23.167395doi:bioRxivpreprint`, // TODO: e.g. remove "doi:" or the like
`https://doi.org/10.1101/2020.06.23.167395doi:bioRxivpreprint`},
{`mail:claire.wyart@icm-institute.org,claire.wyart@inserm.frhttp://dx.doi.org/10.1016/j.cub.2015.01.006`,
`http://dx.doi.org/10.1016/j.cub.2015.01.006`},
{`http://www.nbcnews.com/technology/virtual-cockpit-what-it-takes-fly-drone-1C9319684.Acessoem:15/07/2013`,
`http://www.nbcnews.com/technology/virtual-cockpit-what-it-takes-fly-drone-1C9319684`},
}
for _, c := range cases {
out := SanitizeURL(c.in)
if out != c.out {
t.Fatalf("got %v, want %v", out, c.out)
}
}
}
func TestHasAnyPrefix(t *testing.T) {
var cases = []struct {
s string
prefix []string
result bool
}{
{s: "", prefix: nil, result: false},
{s: "", prefix: []string{}, result: false},
{s: "a", prefix: []string{}, result: false},
{s: "a", prefix: []string{"a"}, result: true},
{s: "a", prefix: []string{"aa"}, result: false},
{s: "aa", prefix: []string{"a"}, result: true},
}
for _, c := range cases {
result := HasAnyPrefix(c.s, c.prefix)
if result != c.result {
t.Fatalf("got %v, want %v", result, c.result)
}
}
}
func BenchmarkSanitizeURL(b *testing.B) {
var bms = []struct {
name string
in string
}{
{`http`, `http://acrf.com.au/2012/world-firsthpv-vaccina-tion-plan-will-protect-young-australian-men-from-cancer/`},
{`plain`, `0.0.www.epcglobalinc.org/standards_technology/Secure/v1.0/UHF-class1.pdf`},
}
for _, bm := range bms {
b.Run(bm.name, func(b *testing.B) {
for n := 0; n < b.N; n++ {
SanitizeURL(bm.in)
}
})
}
}
|