aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
Diffstat (limited to 'skate')
-rw-r--r--skate/schema.go2
-rw-r--r--skate/set/set.go94
-rw-r--r--skate/set/set_test.go8
-rw-r--r--skate/verify.go4
4 files changed, 56 insertions, 52 deletions
diff --git a/skate/schema.go b/skate/schema.go
index 14397e9..a9570b7 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -80,7 +80,7 @@ func parseIsbn(s string) []string {
u []rune
z string
)
- valid := setPool.Get().(*set.Set)
+ valid := setPool.Get().(set.Set)
valid.Clear()
defer setPool.Put(valid)
for _, v := range append(candidates10, candidates13...) {
diff --git a/skate/set/set.go b/skate/set/set.go
index 6bad47e..29cd3ef 100644
--- a/skate/set/set.go
+++ b/skate/set/set.go
@@ -8,31 +8,47 @@ import (
// Set implements basic string set operations, not thread-safe.
type Set map[string]struct{}
-func (s *Set) Clear() {
- for k := range *s {
- delete(*s, k)
+// New creates a new set.
+func New() Set {
+ var s = make(Set)
+ return s
+}
+
+// FromSlice initializes a set from a slice.
+func FromSlice(vs []string) Set {
+ s := New()
+ for _, v := range vs {
+ s.Add(v)
+ }
+ return s
+}
+
+// Clear removes all elements.
+func (s Set) Clear() {
+ for k := range s {
+ delete(s, k)
}
}
// Add adds an element.
-func (s *Set) Add(v string) *Set {
- (*s)[v] = struct{}{}
+func (s Set) Add(v string) Set {
+ s[v] = struct{}{}
return s
}
// Len returns number of elements in set.
-func (s *Set) Len() int {
- return len(*s)
+func (s Set) Len() int {
+ return len(s)
}
// IsEmpty returns if set has zero elements.
-func (s *Set) IsEmpty() bool {
+func (s Set) IsEmpty() bool {
return s.Len() == 0
}
// Equals returns true, if sets contain the same elements.
-func (s *Set) Equals(t *Set) bool {
- for k := range *s {
+func (s Set) Equals(t Set) bool {
+ for k := range s {
if !t.Contains(k) {
return false
}
@@ -41,13 +57,13 @@ func (s *Set) Equals(t *Set) bool {
}
// Contains returns membership status.
-func (s *Set) Contains(v string) bool {
- _, ok := (*s)[v]
+func (s Set) Contains(v string) bool {
+ _, ok := (s)[v]
return ok
}
// Intersection returns a new set containing all elements found in both sets.
-func (s *Set) Intersection(t *Set) *Set {
+func (s Set) Intersection(t Set) Set {
u := New()
for _, v := range s.Slice() {
if t.Contains(v) {
@@ -58,7 +74,7 @@ func (s *Set) Intersection(t *Set) *Set {
}
// Union returns the union of two sets.
-func (s *Set) Union(t *Set) *Set {
+func (s Set) Union(t Set) Set {
u := New()
for _, v := range s.Slice() {
u.Add(v)
@@ -70,16 +86,16 @@ func (s *Set) Union(t *Set) *Set {
}
// Slice returns all elements as a slice.
-func (s *Set) Slice() (result []string) {
- for k := range *s {
+func (s Set) Slice() (result []string) {
+ for k := range s {
result = append(result, k)
}
return
}
-// SortedSlice returns all elements as a slice, sorted.
-func (s *Set) SortedSlice() (result []string) {
- for k := range *s {
+// Sorted returns all elements as a slice, sorted.
+func (s Set) Sorted() (result []string) {
+ for k := range s {
result = append(result, k)
}
sort.Strings(result)
@@ -87,9 +103,9 @@ func (s *Set) SortedSlice() (result []string) {
}
// TopK returns at most k elements.
-func (s *Set) TopK(k int) *Set {
+func (s Set) TopK(k int) Set {
var top []string
- for i, v := range s.SortedSlice() {
+ for i, v := range s.Sorted() {
if i < k {
top = append(top, v)
}
@@ -97,9 +113,10 @@ func (s *Set) TopK(k int) *Set {
return FromSlice(top)
}
-func (s *Set) Product(t *Set) (result [][]string) {
- for k := range *s {
- for l := range *t {
+// Product returns a slice of pairs, representing the cartesian product.
+func (s Set) Product(t Set) (result [][]string) {
+ for k := range s {
+ for l := range t {
result = append(result, []string{k, l})
}
}
@@ -107,7 +124,7 @@ func (s *Set) Product(t *Set) (result [][]string) {
}
// Jaccard returns the jaccard index of sets s and t.
-func (s *Set) Jaccard(t *Set) float64 {
+func (s Set) Jaccard(t Set) float64 {
if s.IsEmpty() && t.IsEmpty() {
return 1
}
@@ -118,12 +135,13 @@ func (s *Set) Jaccard(t *Set) float64 {
}
}
-func (s *Set) Join(sep string) string {
+// Join joins elements from a set with given separator.
+func (s Set) Join(sep string) string {
return strings.Join(s.Slice(), sep)
}
// Max returns the size of the largest set.
-func Max(ss ...*Set) (max int) {
+func Max(ss ...Set) (max int) {
for _, s := range ss {
if s.Len() > max {
max = s.Len()
@@ -133,7 +151,7 @@ func Max(ss ...*Set) (max int) {
}
// Min returns the size of the smallest set.
-func Min(ss ...*Set) (min int) {
+func Min(ss ...Set) (min int) {
min = 2 << 30
for _, s := range ss {
if s.Len() < min {
@@ -143,27 +161,13 @@ func Min(ss ...*Set) (min int) {
return
}
-func Filter(s *Set, f func(string) bool) *Set {
+// Filter returns a set containing all elements, which satisfy a given predicate.
+func Filter(s Set, f func(string) bool) Set {
t := New()
- for v := range *s {
+ for v := range s {
if f(v) {
t.Add(v)
}
}
return t
}
-
-// New creates a new set.
-func New() *Set {
- s := make(Set)
- return &s
-}
-
-// FromSlice initializes a set from a slice.
-func FromSlice(vs []string) *Set {
- s := New()
- for _, v := range vs {
- s.Add(v)
- }
- return s
-}
diff --git a/skate/set/set_test.go b/skate/set/set_test.go
index 403b6df..dffb3e3 100644
--- a/skate/set/set_test.go
+++ b/skate/set/set_test.go
@@ -22,9 +22,9 @@ func TestSet(t *testing.T) {
r := make(Set)
r.Add("2")
- is.True(s.Intersection(&r).IsEmpty())
- is.Equal(s.Union(&r).Len(), 2)
- is.Equal(s.Union(&r).SortedSlice(), []string{"1", "2"})
+ is.True(s.Intersection(r).IsEmpty())
+ is.Equal(s.Union(r).Len(), 2)
+ is.Equal(s.Union(r).Sorted(), []string{"1", "2"})
r.Add("3")
r.Add("4")
@@ -35,7 +35,7 @@ func TestSet(t *testing.T) {
top := make(Set)
top.Add("2")
top.Add("3")
- is.Equal(r.TopK(2), &top)
+ is.Equal(r.TopK(2), top)
r.Clear()
is.Equal(r.Len(), 0)
diff --git a/skate/verify.go b/skate/verify.go
index 914f6a4..e6ab03e 100644
--- a/skate/verify.go
+++ b/skate/verify.go
@@ -505,7 +505,7 @@ func VerifyMinTitleLength(a, b *Release, minTitleLength int) MatchResult {
return MatchResult{StatusStrong, ReasonVersionedDOI}
}
if len(a.Extra.DataCite.Relations) > 0 || len(b.Extra.DataCite.Relations) > 0 {
- getRelatedDOI := func(rel *Release) *set.Set {
+ getRelatedDOI := func(rel *Release) set.Set {
ss := set.New()
for _, rel := range rel.Extra.DataCite.Relations {
if strings.ToLower(rel.RelatedIdentifierType) != "doi" {
@@ -737,7 +737,7 @@ func parsePageString(s string) *ParsedPages {
// averageScore take a limited set of authors and calculates pairwise
// similarity scores, then returns the average of the best scores; between 0
// and 1.
-func averageScore(a, b *set.Set) float64 {
+func averageScore(a, b set.Set) float64 {
aTrimmed := a.TopK(5)
bTrimmed := b.TopK(5)
maxScores := make(map[string]float64) // For each a, keep the max.