From 09a7e8c9d013f13a1aa1ef4e9b7f397647b79967 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Sun, 21 Mar 2021 01:17:38 +0100 Subject: initial import of skate --- skate/set/set.go | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++ skate/set/set_test.go | 39 ++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 skate/set/set.go create mode 100644 skate/set/set_test.go (limited to 'skate/set') diff --git a/skate/set/set.go b/skate/set/set.go new file mode 100644 index 0000000..a198094 --- /dev/null +++ b/skate/set/set.go @@ -0,0 +1,163 @@ +package set + +import ( + "sort" + "strings" +) + +// Set implements basic string set operations. +type Set map[string]struct{} + +// Add adds an element. +func (s *Set) Add(v string) *Set { + (*s)[v] = struct{}{} + return s +} + +// Len returns number of elements in set. +func (s *Set) Len() int { + return len(*s) +} + +// IsEmpty returns if set has zero elements. +func (s *Set) IsEmpty() bool { + return s.Len() == 0 +} + +// Equals returns true, if sets contain the same elements. +func (s *Set) Equals(t *Set) bool { + for k := range *s { + if !t.Contains(k) { + return false + } + } + return s.Len() == t.Len() +} + +// Contains returns membership status. +func (s *Set) Contains(v string) bool { + _, ok := (*s)[v] + return ok +} + +// Intersection returns a new set containing all elements found in both sets. +func (s *Set) Intersection(t *Set) *Set { + u := New() + for _, v := range s.Slice() { + if t.Contains(v) { + u.Add(v) + } + } + return u +} + +// Union returns the union of two sets. +func (s *Set) Union(t *Set) *Set { + u := New() + for _, v := range s.Slice() { + u.Add(v) + } + for _, v := range t.Slice() { + u.Add(v) + } + return u +} + +// Slice returns all elements as a slice. +func (s *Set) Slice() (result []string) { + for k := range *s { + result = append(result, k) + } + return +} + +// SortedSlice returns all elements as a slice, sorted. +func (s *Set) SortedSlice() (result []string) { + for k := range *s { + result = append(result, k) + } + sort.Strings(result) + return +} + +// TopK returns at most k elements. +func (s *Set) TopK(k int) *Set { + var top []string + for i, v := range s.SortedSlice() { + if i < k { + top = append(top, v) + } + } + return FromSlice(top) +} + +func (s *Set) Product(t *Set) (result [][]string) { + for k := range *s { + for l := range *t { + result = append(result, []string{k, l}) + } + } + return +} + +// Jaccard returns the jaccard index of sets s and t. +func (s *Set) Jaccard(t *Set) float64 { + if s.IsEmpty() && t.IsEmpty() { + return 1 + } + if u := s.Union(t); u.IsEmpty() { + return 0 + } else { + return float64(s.Intersection(t).Len()) / float64(u.Len()) + } +} + +func (s *Set) Join(sep string) string { + return strings.Join(s.Slice(), sep) +} + +// Max returns the size of the largest set. +func Max(ss ...*Set) (max int) { + for _, s := range ss { + if s.Len() > max { + max = s.Len() + } + } + return +} + +// Min returns the size of the smallest set. +func Min(ss ...*Set) (min int) { + min = 2 << 30 + for _, s := range ss { + if s.Len() < min { + min = s.Len() + } + } + return +} + +func Filter(s *Set, f func(string) bool) *Set { + t := New() + for v := range *s { + if f(v) { + t.Add(v) + } + } + return t +} + +// New creates a new set. +func New() *Set { + s := make(Set) + return &s +} + +// FromSlice initializes a set from a slice. +func FromSlice(vs []string) *Set { + s := New() + for _, v := range vs { + s.Add(v) + } + return s +} diff --git a/skate/set/set_test.go b/skate/set/set_test.go new file mode 100644 index 0000000..c319759 --- /dev/null +++ b/skate/set/set_test.go @@ -0,0 +1,39 @@ +package set + +import ( + "testing" + + "github.com/matryer/is" +) + +func TestSet(t *testing.T) { + is := is.New(t) + + s := make(Set) + is.Equal(s.Len(), 0) + is.True(s.IsEmpty()) + + s.Add("1") + is.Equal(s.Len(), 1) + is.True(!s.IsEmpty()) + is.True(s.Contains("1")) + is.True(!s.Contains("2")) + is.Equal(s.Slice(), []string{"1"}) + + r := make(Set) + r.Add("2") + is.True(s.Intersection(&r).IsEmpty()) + is.Equal(s.Union(&r).Len(), 2) + is.Equal(s.Union(&r).SortedSlice(), []string{"1", "2"}) + + r.Add("3") + r.Add("4") + r.Add("5") + r.Add("6") + r.Add("7") + r.Add("8") + top := make(Set) + top.Add("2") + top.Add("3") + is.Equal(r.TopK(2), &top) +} -- cgit v1.2.3