aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-09 21:27:24 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-09 21:27:24 +0200
commitd6c7e0259676da1645b7d261058acd4613fac4dd (patch)
tree5a20528965fd264a512a464b757ca2268bf4043d /skate
parent6bc75d77b688fef72760485ad27a1baa6157bfe9 (diff)
downloadrefcat-d6c7e0259676da1645b7d261058acd4613fac4dd.tar.gz
refcat-d6c7e0259676da1645b7d261058acd4613fac4dd.zip
wiki: more aggressive whitespace cleanup
Diffstat (limited to 'skate')
-rw-r--r--skate/cmd/skate-wikipedia-doi/main.go3
1 files changed, 2 insertions, 1 deletions
diff --git a/skate/cmd/skate-wikipedia-doi/main.go b/skate/cmd/skate-wikipedia-doi/main.go
index be3be0a..fe56576 100644
--- a/skate/cmd/skate-wikipedia-doi/main.go
+++ b/skate/cmd/skate-wikipedia-doi/main.go
@@ -18,6 +18,7 @@ var (
numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers")
batchSize = flag.Int("b", 100000, "batch size")
bytesNewline = []byte("\n")
+ wsReplacer = strings.NewReplacer("\t", "", "\n", "", " ", "")
)
func main() {
@@ -32,7 +33,7 @@ func main() {
return nil, nil
}
var (
- doi = strings.TrimSpace(ids.DOI)
+ doi = wsReplacer.Replace(ids.DOI)
pageTitle = strings.TrimSpace(w.PageTitle)
s = fmt.Sprintf("%s\t%s\t%s", doi, pageTitle, string(p))
)