From a23c2cee4fe5bccabbf30392d670a380ef80b82b Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Mon, 26 Jul 2021 21:11:14 +0200 Subject: switch to sligthly more performance string builder --- skate/schema.go | 64 ++++++++++++++++++++++++++++---------------------------- skate/slugify.go | 7 +++---- skate/verify.go | 13 ++++++------ 3 files changed, 41 insertions(+), 43 deletions(-) (limited to 'skate') diff --git a/skate/schema.go b/skate/schema.go index d6b4ded..b69f206 100644 --- a/skate/schema.go +++ b/skate/schema.go @@ -128,75 +128,75 @@ func RefToRelease(ref *Ref) (*Release, error) { // try to be readable. func ReleaseToUnstructured(r *Release) string { var ( - buf bytes.Buffer + sb strings.Builder names = make([]string, len(r.Contribs)) ) for i := 0; i < len(r.Contribs); i++ { names[i] = r.Contribs[i].RawName } - fmt.Fprintf(&buf, "%s", strings.Join(names, ", ")) + fmt.Fprintf(&sb, "%s", strings.Join(names, ", ")) if r.Title != "" { - if buf.Len() > 0 { - fmt.Fprintf(&buf, ". ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ". ") } - fmt.Fprintf(&buf, `%s`, r.Title) + fmt.Fprintf(&sb, `%s`, r.Title) } if len(r.Subtitle()) > 0 { - fmt.Fprintf(&buf, ": %s", strings.Join(r.Subtitle(), " ")) + fmt.Fprintf(&sb, ": %s", strings.Join(r.Subtitle(), " ")) } if r.ContainerName != "" { - if buf.Len() > 0 { - fmt.Fprintf(&buf, ". ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ". ") } - fmt.Fprintf(&buf, `%s`, r.ContainerName) + fmt.Fprintf(&sb, `%s`, r.ContainerName) } if r.Volume != "" { - if buf.Len() > 0 { - fmt.Fprintf(&buf, ", ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ", ") } - fmt.Fprintf(&buf, `vol. %s`, r.Volume) + fmt.Fprintf(&sb, `vol. %s`, r.Volume) } if r.Issue != "" { - if buf.Len() > 0 { - fmt.Fprintf(&buf, ", ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ", ") } - fmt.Fprintf(&buf, `no. %s`, r.Issue) + fmt.Fprintf(&sb, `no. %s`, r.Issue) } if r.ReleaseYear() > 0 { - if buf.Len() > 0 { - fmt.Fprintf(&buf, ", ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ", ") } - fmt.Fprintf(&buf, `%s`, r.ReleaseYearString()) + fmt.Fprintf(&sb, `%s`, r.ReleaseYearString()) } if r.Pages != "" { - if buf.Len() > 0 { - fmt.Fprintf(&buf, ", ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ", ") } - fmt.Fprintf(&buf, `pp. %s`, r.Pages) + fmt.Fprintf(&sb, `pp. %s`, r.Pages) } if r.Publisher != "" { - if buf.Len() > 0 { - fmt.Fprintf(&buf, ", ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ", ") } - fmt.Fprintf(&buf, `%s`, r.Publisher) + fmt.Fprintf(&sb, `%s`, r.Publisher) } switch { case r.ExtIDs.DOI != "": - if buf.Len() > 0 { - fmt.Fprintf(&buf, ", ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ", ") } - fmt.Fprintf(&buf, `%s`, r.ExtIDs.DOI) + fmt.Fprintf(&sb, `%s`, r.ExtIDs.DOI) case len(r.ExtIDs.ISBN) > 0: - if buf.Len() > 0 { - fmt.Fprintf(&buf, ", ") + if sb.Len() > 0 { + fmt.Fprintf(&sb, ", ") } if isbn13, err := isbn.To13(r.ExtIDs.ISBN[0]); err == nil { - fmt.Fprintf(&buf, `%s`, isbn13) + fmt.Fprintf(&sb, `%s`, isbn13) } else { - fmt.Fprintf(&buf, `%s`, r.ExtIDs.ISBN[0]) + fmt.Fprintf(&sb, `%s`, r.ExtIDs.ISBN[0]) } } - return buf.String() + return sb.String() } // ParseIsbn tries to find and validate ISBN from unstructured data. Returns a diff --git a/skate/slugify.go b/skate/slugify.go index e8fa8bf..5cdc838 100644 --- a/skate/slugify.go +++ b/skate/slugify.go @@ -1,18 +1,17 @@ package skate import ( - "bytes" "fmt" "strings" ) // slugifyString is a basic string slugifier. func slugifyString(s string) string { - var buf bytes.Buffer + var sb strings.Builder for _, c := range strings.TrimSpace(strings.ToLower(s)) { if (c > 96 && c < 123) || (c > 47 && c < 58) || (c == 32) || (c == 9) || (c == 10) { - fmt.Fprintf(&buf, "%c", c) + fmt.Fprintf(&sb, "%c", c) } } - return strings.Join(strings.Fields(buf.String()), " ") + return strings.Join(strings.Fields(sb.String()), " ") } diff --git a/skate/verify.go b/skate/verify.go index 22f0a0d..5df0a7e 100644 --- a/skate/verify.go +++ b/skate/verify.go @@ -8,7 +8,6 @@ package skate import ( - "bytes" "fmt" "regexp" "strconv" @@ -501,17 +500,17 @@ func authorSimilarityScore(s, t string) float64 { // tokenNgrams are groups of n char-tokens per word-token in string, e.g. for // n=2 and string "Anne K Lam", we would get ["an", "ne", "k", "la", "m"]. func tokenNgrams(s string, n int) (result []string) { - var buf bytes.Buffer + var sb strings.Builder for _, token := range tokenizeString(s) { - buf.Reset() + sb.Reset() for i, c := range token { if i > 0 && i%n == 0 { - result = append(result, buf.String()) - buf.Reset() + result = append(result, sb.String()) + sb.Reset() } - buf.WriteRune(c) // XXX: skipping error handling + sb.WriteRune(c) // XXX: skipping error handling } - result = append(result, buf.String()) + result = append(result, sb.String()) } return } -- cgit v1.2.3