wip: a few dot examples

author: Martin Czygan <martin.czygan@gmail.com> 2021-04-21 17:49:12 +0200
committer: Martin Czygan <martin.czygan@gmail.com> 2021-04-21 17:49:12 +0200
commit: 3cb5513cb405af78a01750a29a93be28ac5d90e4 (patch)
tree: 46774566984606a113a17803da81f3b14ada742f /skate/wordwrap
parent: ae9e380225be648ced23d814cd1d08d1621976bd (diff)
download: refcat-3cb5513cb405af78a01750a29a93be28ac5d90e4.tar.gz
refcat-3cb5513cb405af78a01750a29a93be28ac5d90e4.zip
2 files changed, 182 insertions, 0 deletions
diff --git a/skate/wordwrap/wordwrap.go b/skate/wordwrap/wordwrap.go
new file mode 100644
index 0000000..f7bedda
--- /dev/null
+++ b/skate/wordwrap/wordwrap.go
@@ -0,0 +1,83 @@
+package wordwrap
+
+import (
+	"bytes"
+	"unicode"
+)
+
+const nbsp = 0xA0
+
+// WrapString wraps the given string within lim width in characters.
+//
+// Wrapping is currently naive and only happens at white-space. A future
+// version of the library will implement smarter wrapping. This means that
+// pathological cases can dramatically reach past the limit, such as a very
+// long word.
+func WrapString(s string, lim uint) string {
+	// Initialize a buffer with a slightly larger size to account for breaks
+	init := make([]byte, 0, len(s))
+	buf := bytes.NewBuffer(init)
+
+	var current uint
+	var wordBuf, spaceBuf bytes.Buffer
+	var wordBufLen, spaceBufLen uint
+
+	for _, char := range s {
+		if char == '\n' {
+			if wordBuf.Len() == 0 {
+				if current+spaceBufLen > lim {
+					current = 0
+				} else {
+					current += spaceBufLen
+					spaceBuf.WriteTo(buf)
+				}
+				spaceBuf.Reset()
+				spaceBufLen = 0
+			} else {
+				current += spaceBufLen + wordBufLen
+				spaceBuf.WriteTo(buf)
+				spaceBuf.Reset()
+				spaceBufLen = 0
+				wordBuf.WriteTo(buf)
+				wordBuf.Reset()
+				wordBufLen = 0
+			}
+			buf.WriteRune(char)
+			current = 0
+		} else if unicode.IsSpace(char) && char != nbsp {
+			if spaceBuf.Len() == 0 || wordBuf.Len() > 0 {
+				current += spaceBufLen + wordBufLen
+				spaceBuf.WriteTo(buf)
+				spaceBuf.Reset()
+				spaceBufLen = 0
+				wordBuf.WriteTo(buf)
+				wordBuf.Reset()
+				wordBufLen = 0
+			}
+
+			spaceBuf.WriteRune(char)
+			spaceBufLen++
+		} else {
+			wordBuf.WriteRune(char)
+			wordBufLen++
+
+			if current+wordBufLen+spaceBufLen > lim && wordBufLen < lim {
+				buf.WriteRune('\n')
+				current = 0
+				spaceBuf.Reset()
+				spaceBufLen = 0
+			}
+		}
+	}
+
+	if wordBuf.Len() == 0 {
+		if current+spaceBufLen <= lim {
+			spaceBuf.WriteTo(buf)
+		}
+	} else {
+		spaceBuf.WriteTo(buf)
+		wordBuf.WriteTo(buf)
+	}
+
+	return buf.String()
+}
diff --git a/skate/wordwrap/wordwrap_test.go b/skate/wordwrap/wordwrap_test.go
new file mode 100644
index 0000000..98010eb
--- /dev/null
+++ b/skate/wordwrap/wordwrap_test.go
@@ -0,0 +1,99 @@
+package wordwrap
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestWrapString(t *testing.T) {
+	cases := []struct {
+		Input, Output string
+		Lim           uint
+	}{
+		// A simple word passes through.
+		{
+			"foo",
+			"foo",
+			4,
+		},
+		// A single word that is too long passes through.
+		// We do not break words.
+		{
+			"foobarbaz",
+			"foobarbaz",
+			4,
+		},
+		// Lines are broken at whitespace.
+		{
+			"foo bar baz",
+			"foo\nbar\nbaz",
+			4,
+		},
+		// Lines are broken at whitespace, even if words
+		// are too long. We do not break words.
+		{
+			"foo bars bazzes",
+			"foo\nbars\nbazzes",
+			4,
+		},
+		// A word that would run beyond the width is wrapped.
+		{
+			"fo sop",
+			"fo\nsop",
+			4,
+		},
+		// Do not break on non-breaking space.
+		{
+			"foo bar\u00A0baz",
+			"foo\nbar\u00A0baz",
+			10,
+		},
+		// Whitespace that trails a line and fits the width
+		// passes through, as does whitespace prefixing an
+		// explicit line break. A tab counts as one character.
+		{
+			"foo\nb\t r\n baz",
+			"foo\nb\t r\n baz",
+			4,
+		},
+		// Trailing whitespace is removed if it doesn't fit the width.
+		// Runs of whitespace on which a line is broken are removed.
+		{
+			"foo    \nb   ar   ",
+			"foo\nb\nar",
+			4,
+		},
+		// An explicit line break at the end of the input is preserved.
+		{
+			"foo bar baz\n",
+			"foo\nbar\nbaz\n",
+			4,
+		},
+		// Explicit break are always preserved.
+		{
+			"\nfoo bar\n\n\nbaz\n",
+			"\nfoo\nbar\n\n\nbaz\n",
+			4,
+		},
+		// Complete example:
+		{
+			" This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* baz  \nBAM    ",
+			" This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* baz\nBAM",
+			6,
+		},
+		// Multi-byte characters
+		{
+			strings.Repeat("\u2584 ", 4),
+			"\u2584 \u2584" + "\n" +
+				strings.Repeat("\u2584 ", 2),
+			4,
+		},
+	}
+
+	for i, tc := range cases {
+		actual := WrapString(tc.Input, tc.Lim)
+		if actual != tc.Output {
+			t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`", i, tc.Input, tc.Output, actual)
+		}
+	}
+}
author	Martin Czygan <martin.czygan@gmail.com>	2021-04-21 17:49:12 +0200
committer	Martin Czygan <martin.czygan@gmail.com>	2021-04-21 17:49:12 +0200
commit	3cb5513cb405af78a01750a29a93be28ac5d90e4 (patch)
tree	46774566984606a113a17803da81f3b14ada742f /skate/wordwrap
parent	ae9e380225be648ced23d814cd1d08d1621976bd (diff)
download	refcat-3cb5513cb405af78a01750a29a93be28ac5d90e4.tar.gz refcat-3cb5513cb405af78a01750a29a93be28ac5d90e4.zip