diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-09 21:27:24 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-09 21:27:24 +0200 |
commit | d6c7e0259676da1645b7d261058acd4613fac4dd (patch) | |
tree | 5a20528965fd264a512a464b757ca2268bf4043d | |
parent | 6bc75d77b688fef72760485ad27a1baa6157bfe9 (diff) | |
download | refcat-d6c7e0259676da1645b7d261058acd4613fac4dd.tar.gz refcat-d6c7e0259676da1645b7d261058acd4613fac4dd.zip |
wiki: more aggressive whitespace cleanup
-rw-r--r-- | skate/cmd/skate-wikipedia-doi/main.go | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/skate/cmd/skate-wikipedia-doi/main.go b/skate/cmd/skate-wikipedia-doi/main.go index be3be0a..fe56576 100644 --- a/skate/cmd/skate-wikipedia-doi/main.go +++ b/skate/cmd/skate-wikipedia-doi/main.go @@ -18,6 +18,7 @@ var ( numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") batchSize = flag.Int("b", 100000, "batch size") bytesNewline = []byte("\n") + wsReplacer = strings.NewReplacer("\t", "", "\n", "", " ", "") ) func main() { @@ -32,7 +33,7 @@ func main() { return nil, nil } var ( - doi = strings.TrimSpace(ids.DOI) + doi = wsReplacer.Replace(ids.DOI) pageTitle = strings.TrimSpace(w.PageTitle) s = fmt.Sprintf("%s\t%s\t%s", doi, pageTitle, string(p)) ) |