aboutsummaryrefslogtreecommitdiffstats
path: root/skate/set
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-03-21 01:17:38 +0100
committerMartin Czygan <martin.czygan@gmail.com>2021-03-21 01:17:38 +0100
commit09a7e8c9d013f13a1aa1ef4e9b7f397647b79967 (patch)
tree122b474e27afbc66cba1182e983ef5c8555ed12f /skate/set
parenta7e0cf191ebf8fb499e0ab9a3b6cae45727f1286 (diff)
downloadrefcat-09a7e8c9d013f13a1aa1ef4e9b7f397647b79967.tar.gz
refcat-09a7e8c9d013f13a1aa1ef4e9b7f397647b79967.zip
initial import of skate
Diffstat (limited to 'skate/set')
-rw-r--r--skate/set/set.go163
-rw-r--r--skate/set/set_test.go39
2 files changed, 202 insertions, 0 deletions
diff --git a/skate/set/set.go b/skate/set/set.go
new file mode 100644
index 0000000..a198094
--- /dev/null
+++ b/skate/set/set.go
@@ -0,0 +1,163 @@
+package set
+
+import (
+ "sort"
+ "strings"
+)
+
+// Set implements basic string set operations.
+type Set map[string]struct{}
+
+// Add adds an element.
+func (s *Set) Add(v string) *Set {
+ (*s)[v] = struct{}{}
+ return s
+}
+
+// Len returns number of elements in set.
+func (s *Set) Len() int {
+ return len(*s)
+}
+
+// IsEmpty returns if set has zero elements.
+func (s *Set) IsEmpty() bool {
+ return s.Len() == 0
+}
+
+// Equals returns true, if sets contain the same elements.
+func (s *Set) Equals(t *Set) bool {
+ for k := range *s {
+ if !t.Contains(k) {
+ return false
+ }
+ }
+ return s.Len() == t.Len()
+}
+
+// Contains returns membership status.
+func (s *Set) Contains(v string) bool {
+ _, ok := (*s)[v]
+ return ok
+}
+
+// Intersection returns a new set containing all elements found in both sets.
+func (s *Set) Intersection(t *Set) *Set {
+ u := New()
+ for _, v := range s.Slice() {
+ if t.Contains(v) {
+ u.Add(v)
+ }
+ }
+ return u
+}
+
+// Union returns the union of two sets.
+func (s *Set) Union(t *Set) *Set {
+ u := New()
+ for _, v := range s.Slice() {
+ u.Add(v)
+ }
+ for _, v := range t.Slice() {
+ u.Add(v)
+ }
+ return u
+}
+
+// Slice returns all elements as a slice.
+func (s *Set) Slice() (result []string) {
+ for k := range *s {
+ result = append(result, k)
+ }
+ return
+}
+
+// SortedSlice returns all elements as a slice, sorted.
+func (s *Set) SortedSlice() (result []string) {
+ for k := range *s {
+ result = append(result, k)
+ }
+ sort.Strings(result)
+ return
+}
+
+// TopK returns at most k elements.
+func (s *Set) TopK(k int) *Set {
+ var top []string
+ for i, v := range s.SortedSlice() {
+ if i < k {
+ top = append(top, v)
+ }
+ }
+ return FromSlice(top)
+}
+
+func (s *Set) Product(t *Set) (result [][]string) {
+ for k := range *s {
+ for l := range *t {
+ result = append(result, []string{k, l})
+ }
+ }
+ return
+}
+
+// Jaccard returns the jaccard index of sets s and t.
+func (s *Set) Jaccard(t *Set) float64 {
+ if s.IsEmpty() && t.IsEmpty() {
+ return 1
+ }
+ if u := s.Union(t); u.IsEmpty() {
+ return 0
+ } else {
+ return float64(s.Intersection(t).Len()) / float64(u.Len())
+ }
+}
+
+func (s *Set) Join(sep string) string {
+ return strings.Join(s.Slice(), sep)
+}
+
+// Max returns the size of the largest set.
+func Max(ss ...*Set) (max int) {
+ for _, s := range ss {
+ if s.Len() > max {
+ max = s.Len()
+ }
+ }
+ return
+}
+
+// Min returns the size of the smallest set.
+func Min(ss ...*Set) (min int) {
+ min = 2 << 30
+ for _, s := range ss {
+ if s.Len() < min {
+ min = s.Len()
+ }
+ }
+ return
+}
+
+func Filter(s *Set, f func(string) bool) *Set {
+ t := New()
+ for v := range *s {
+ if f(v) {
+ t.Add(v)
+ }
+ }
+ return t
+}
+
+// New creates a new set.
+func New() *Set {
+ s := make(Set)
+ return &s
+}
+
+// FromSlice initializes a set from a slice.
+func FromSlice(vs []string) *Set {
+ s := New()
+ for _, v := range vs {
+ s.Add(v)
+ }
+ return s
+}
diff --git a/skate/set/set_test.go b/skate/set/set_test.go
new file mode 100644
index 0000000..c319759
--- /dev/null
+++ b/skate/set/set_test.go
@@ -0,0 +1,39 @@
+package set
+
+import (
+ "testing"
+
+ "github.com/matryer/is"
+)
+
+func TestSet(t *testing.T) {
+ is := is.New(t)
+
+ s := make(Set)
+ is.Equal(s.Len(), 0)
+ is.True(s.IsEmpty())
+
+ s.Add("1")
+ is.Equal(s.Len(), 1)
+ is.True(!s.IsEmpty())
+ is.True(s.Contains("1"))
+ is.True(!s.Contains("2"))
+ is.Equal(s.Slice(), []string{"1"})
+
+ r := make(Set)
+ r.Add("2")
+ is.True(s.Intersection(&r).IsEmpty())
+ is.Equal(s.Union(&r).Len(), 2)
+ is.Equal(s.Union(&r).SortedSlice(), []string{"1", "2"})
+
+ r.Add("3")
+ r.Add("4")
+ r.Add("5")
+ r.Add("6")
+ r.Add("7")
+ r.Add("8")
+ top := make(Set)
+ top.Add("2")
+ top.Add("3")
+ is.Equal(r.TopK(2), &top)
+}