diff options
Diffstat (limited to 'skate')
-rw-r--r-- | skate/schema.go | 2 | ||||
-rw-r--r-- | skate/set/set.go | 94 | ||||
-rw-r--r-- | skate/set/set_test.go | 8 | ||||
-rw-r--r-- | skate/verify.go | 4 |
4 files changed, 56 insertions, 52 deletions
diff --git a/skate/schema.go b/skate/schema.go index 14397e9..a9570b7 100644 --- a/skate/schema.go +++ b/skate/schema.go @@ -80,7 +80,7 @@ func parseIsbn(s string) []string { u []rune z string ) - valid := setPool.Get().(*set.Set) + valid := setPool.Get().(set.Set) valid.Clear() defer setPool.Put(valid) for _, v := range append(candidates10, candidates13...) { diff --git a/skate/set/set.go b/skate/set/set.go index 6bad47e..29cd3ef 100644 --- a/skate/set/set.go +++ b/skate/set/set.go @@ -8,31 +8,47 @@ import ( // Set implements basic string set operations, not thread-safe. type Set map[string]struct{} -func (s *Set) Clear() { - for k := range *s { - delete(*s, k) +// New creates a new set. +func New() Set { + var s = make(Set) + return s +} + +// FromSlice initializes a set from a slice. +func FromSlice(vs []string) Set { + s := New() + for _, v := range vs { + s.Add(v) + } + return s +} + +// Clear removes all elements. +func (s Set) Clear() { + for k := range s { + delete(s, k) } } // Add adds an element. -func (s *Set) Add(v string) *Set { - (*s)[v] = struct{}{} +func (s Set) Add(v string) Set { + s[v] = struct{}{} return s } // Len returns number of elements in set. -func (s *Set) Len() int { - return len(*s) +func (s Set) Len() int { + return len(s) } // IsEmpty returns if set has zero elements. -func (s *Set) IsEmpty() bool { +func (s Set) IsEmpty() bool { return s.Len() == 0 } // Equals returns true, if sets contain the same elements. -func (s *Set) Equals(t *Set) bool { - for k := range *s { +func (s Set) Equals(t Set) bool { + for k := range s { if !t.Contains(k) { return false } @@ -41,13 +57,13 @@ func (s *Set) Equals(t *Set) bool { } // Contains returns membership status. -func (s *Set) Contains(v string) bool { - _, ok := (*s)[v] +func (s Set) Contains(v string) bool { + _, ok := (s)[v] return ok } // Intersection returns a new set containing all elements found in both sets. -func (s *Set) Intersection(t *Set) *Set { +func (s Set) Intersection(t Set) Set { u := New() for _, v := range s.Slice() { if t.Contains(v) { @@ -58,7 +74,7 @@ func (s *Set) Intersection(t *Set) *Set { } // Union returns the union of two sets. -func (s *Set) Union(t *Set) *Set { +func (s Set) Union(t Set) Set { u := New() for _, v := range s.Slice() { u.Add(v) @@ -70,16 +86,16 @@ func (s *Set) Union(t *Set) *Set { } // Slice returns all elements as a slice. -func (s *Set) Slice() (result []string) { - for k := range *s { +func (s Set) Slice() (result []string) { + for k := range s { result = append(result, k) } return } -// SortedSlice returns all elements as a slice, sorted. -func (s *Set) SortedSlice() (result []string) { - for k := range *s { +// Sorted returns all elements as a slice, sorted. +func (s Set) Sorted() (result []string) { + for k := range s { result = append(result, k) } sort.Strings(result) @@ -87,9 +103,9 @@ func (s *Set) SortedSlice() (result []string) { } // TopK returns at most k elements. -func (s *Set) TopK(k int) *Set { +func (s Set) TopK(k int) Set { var top []string - for i, v := range s.SortedSlice() { + for i, v := range s.Sorted() { if i < k { top = append(top, v) } @@ -97,9 +113,10 @@ func (s *Set) TopK(k int) *Set { return FromSlice(top) } -func (s *Set) Product(t *Set) (result [][]string) { - for k := range *s { - for l := range *t { +// Product returns a slice of pairs, representing the cartesian product. +func (s Set) Product(t Set) (result [][]string) { + for k := range s { + for l := range t { result = append(result, []string{k, l}) } } @@ -107,7 +124,7 @@ func (s *Set) Product(t *Set) (result [][]string) { } // Jaccard returns the jaccard index of sets s and t. -func (s *Set) Jaccard(t *Set) float64 { +func (s Set) Jaccard(t Set) float64 { if s.IsEmpty() && t.IsEmpty() { return 1 } @@ -118,12 +135,13 @@ func (s *Set) Jaccard(t *Set) float64 { } } -func (s *Set) Join(sep string) string { +// Join joins elements from a set with given separator. +func (s Set) Join(sep string) string { return strings.Join(s.Slice(), sep) } // Max returns the size of the largest set. -func Max(ss ...*Set) (max int) { +func Max(ss ...Set) (max int) { for _, s := range ss { if s.Len() > max { max = s.Len() @@ -133,7 +151,7 @@ func Max(ss ...*Set) (max int) { } // Min returns the size of the smallest set. -func Min(ss ...*Set) (min int) { +func Min(ss ...Set) (min int) { min = 2 << 30 for _, s := range ss { if s.Len() < min { @@ -143,27 +161,13 @@ func Min(ss ...*Set) (min int) { return } -func Filter(s *Set, f func(string) bool) *Set { +// Filter returns a set containing all elements, which satisfy a given predicate. +func Filter(s Set, f func(string) bool) Set { t := New() - for v := range *s { + for v := range s { if f(v) { t.Add(v) } } return t } - -// New creates a new set. -func New() *Set { - s := make(Set) - return &s -} - -// FromSlice initializes a set from a slice. -func FromSlice(vs []string) *Set { - s := New() - for _, v := range vs { - s.Add(v) - } - return s -} diff --git a/skate/set/set_test.go b/skate/set/set_test.go index 403b6df..dffb3e3 100644 --- a/skate/set/set_test.go +++ b/skate/set/set_test.go @@ -22,9 +22,9 @@ func TestSet(t *testing.T) { r := make(Set) r.Add("2") - is.True(s.Intersection(&r).IsEmpty()) - is.Equal(s.Union(&r).Len(), 2) - is.Equal(s.Union(&r).SortedSlice(), []string{"1", "2"}) + is.True(s.Intersection(r).IsEmpty()) + is.Equal(s.Union(r).Len(), 2) + is.Equal(s.Union(r).Sorted(), []string{"1", "2"}) r.Add("3") r.Add("4") @@ -35,7 +35,7 @@ func TestSet(t *testing.T) { top := make(Set) top.Add("2") top.Add("3") - is.Equal(r.TopK(2), &top) + is.Equal(r.TopK(2), top) r.Clear() is.Equal(r.Len(), 0) diff --git a/skate/verify.go b/skate/verify.go index 914f6a4..e6ab03e 100644 --- a/skate/verify.go +++ b/skate/verify.go @@ -505,7 +505,7 @@ func VerifyMinTitleLength(a, b *Release, minTitleLength int) MatchResult { return MatchResult{StatusStrong, ReasonVersionedDOI} } if len(a.Extra.DataCite.Relations) > 0 || len(b.Extra.DataCite.Relations) > 0 { - getRelatedDOI := func(rel *Release) *set.Set { + getRelatedDOI := func(rel *Release) set.Set { ss := set.New() for _, rel := range rel.Extra.DataCite.Relations { if strings.ToLower(rel.RelatedIdentifierType) != "doi" { @@ -737,7 +737,7 @@ func parsePageString(s string) *ParsedPages { // averageScore take a limited set of authors and calculates pairwise // similarity scores, then returns the average of the best scores; between 0 // and 1. -func averageScore(a, b *set.Set) float64 { +func averageScore(a, b set.Set) float64 { aTrimmed := a.TopK(5) bTrimmed := b.TopK(5) maxScores := make(map[string]float64) // For each a, keep the max. |