aboutsummaryrefslogtreecommitdiffstats
path: root/skate/set/set.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/set/set.go')
-rw-r--r--skate/set/set.go163
1 files changed, 163 insertions, 0 deletions
diff --git a/skate/set/set.go b/skate/set/set.go
new file mode 100644
index 0000000..a198094
--- /dev/null
+++ b/skate/set/set.go
@@ -0,0 +1,163 @@
+package set
+
+import (
+ "sort"
+ "strings"
+)
+
+// Set implements basic string set operations.
+type Set map[string]struct{}
+
+// Add adds an element.
+func (s *Set) Add(v string) *Set {
+ (*s)[v] = struct{}{}
+ return s
+}
+
+// Len returns number of elements in set.
+func (s *Set) Len() int {
+ return len(*s)
+}
+
+// IsEmpty returns if set has zero elements.
+func (s *Set) IsEmpty() bool {
+ return s.Len() == 0
+}
+
+// Equals returns true, if sets contain the same elements.
+func (s *Set) Equals(t *Set) bool {
+ for k := range *s {
+ if !t.Contains(k) {
+ return false
+ }
+ }
+ return s.Len() == t.Len()
+}
+
+// Contains returns membership status.
+func (s *Set) Contains(v string) bool {
+ _, ok := (*s)[v]
+ return ok
+}
+
+// Intersection returns a new set containing all elements found in both sets.
+func (s *Set) Intersection(t *Set) *Set {
+ u := New()
+ for _, v := range s.Slice() {
+ if t.Contains(v) {
+ u.Add(v)
+ }
+ }
+ return u
+}
+
+// Union returns the union of two sets.
+func (s *Set) Union(t *Set) *Set {
+ u := New()
+ for _, v := range s.Slice() {
+ u.Add(v)
+ }
+ for _, v := range t.Slice() {
+ u.Add(v)
+ }
+ return u
+}
+
+// Slice returns all elements as a slice.
+func (s *Set) Slice() (result []string) {
+ for k := range *s {
+ result = append(result, k)
+ }
+ return
+}
+
+// SortedSlice returns all elements as a slice, sorted.
+func (s *Set) SortedSlice() (result []string) {
+ for k := range *s {
+ result = append(result, k)
+ }
+ sort.Strings(result)
+ return
+}
+
+// TopK returns at most k elements.
+func (s *Set) TopK(k int) *Set {
+ var top []string
+ for i, v := range s.SortedSlice() {
+ if i < k {
+ top = append(top, v)
+ }
+ }
+ return FromSlice(top)
+}
+
+func (s *Set) Product(t *Set) (result [][]string) {
+ for k := range *s {
+ for l := range *t {
+ result = append(result, []string{k, l})
+ }
+ }
+ return
+}
+
+// Jaccard returns the jaccard index of sets s and t.
+func (s *Set) Jaccard(t *Set) float64 {
+ if s.IsEmpty() && t.IsEmpty() {
+ return 1
+ }
+ if u := s.Union(t); u.IsEmpty() {
+ return 0
+ } else {
+ return float64(s.Intersection(t).Len()) / float64(u.Len())
+ }
+}
+
+func (s *Set) Join(sep string) string {
+ return strings.Join(s.Slice(), sep)
+}
+
+// Max returns the size of the largest set.
+func Max(ss ...*Set) (max int) {
+ for _, s := range ss {
+ if s.Len() > max {
+ max = s.Len()
+ }
+ }
+ return
+}
+
+// Min returns the size of the smallest set.
+func Min(ss ...*Set) (min int) {
+ min = 2 << 30
+ for _, s := range ss {
+ if s.Len() < min {
+ min = s.Len()
+ }
+ }
+ return
+}
+
+func Filter(s *Set, f func(string) bool) *Set {
+ t := New()
+ for v := range *s {
+ if f(v) {
+ t.Add(v)
+ }
+ }
+ return t
+}
+
+// New creates a new set.
+func New() *Set {
+ s := make(Set)
+ return &s
+}
+
+// FromSlice initializes a set from a slice.
+func FromSlice(vs []string) *Set {
+ s := New()
+ for _, v := range vs {
+ s.Add(v)
+ }
+ return s
+}