aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/cleanup.go7
-rw-r--r--skate/reduce.go15
-rw-r--r--skate/verify.go7
-rw-r--r--skate/xio/util.go4
4 files changed, 28 insertions, 5 deletions
diff --git a/skate/cleanup.go b/skate/cleanup.go
index c8ce6d1..4a10063 100644
--- a/skate/cleanup.go
+++ b/skate/cleanup.go
@@ -9,6 +9,8 @@ import (
"mvdan.cc/xurls/v2"
)
+// TODO: This should be revisited entirely.
+
var rxRelaxed = xurls.Relaxed()
// URLFilter is a line oriented URL filter.
@@ -21,6 +23,7 @@ type FilterURL struct {
AllowedSchemas []string
}
+// Run executes the filter on a blob of data, most likely a line.
func (f *FilterURL) Run(p []byte) ([]byte, error) {
parts := strings.Split(string(p), f.Delimiter)
if len(parts) < f.Index {
@@ -59,6 +62,7 @@ type FilterDOI struct {
AllowedSchema []string
}
+// Run executes the filter on a blob of data, most likely a line.
func (f *FilterDOI) Run(p []byte) ([]byte, error) {
parts := strings.Split(string(p), f.Delimiter)
if len(parts) < f.Index {
@@ -78,8 +82,11 @@ func (f *FilterDOI) Run(p []byte) ([]byte, error) {
return []byte(strings.Join(parts, f.Delimiter)), nil
}
+// FilterRawRef is an ad-hoc filter.
type FilterRawRef struct{}
+// Run executes the filter. TODO: Gather cleanup functions together and make
+// them more easily shared.
func (f *FilterRawRef) Run(p []byte) ([]byte, error) {
var ref Ref
if err := json.Unmarshal(p, &ref); err != nil {
diff --git a/skate/reduce.go b/skate/reduce.go
index 1100e2f..c68b2dd 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -17,6 +17,21 @@
// * [ ] switch to faster logging, e.g. zerolog, https://github.com/rs/zerolog#benchmarks
// * [x] batch, parallelize
// * [ ] unify flags to "-a", "-b"
+//
+// A couple more ideas to improve.
+//
+// * each reducer could be its own type instead of a function in order to allow
+// for customizations, options, e.g.
+//
+// type ReduceExact struct {
+// ReleasesReader io.Reader
+// RefsReader io.Reader
+// W io.Writer
+// MatchResult MatchResult
+// }
+//
+// func (r *ReduceExact) Run() error { ... }
+//
package skate
import (
diff --git a/skate/verify.go b/skate/verify.go
index 5df0a7e..c5fbeff 100644
--- a/skate/verify.go
+++ b/skate/verify.go
@@ -112,8 +112,7 @@ type MatchResult struct {
Reason Reason
}
-// VerificationPair groups two identifiers and their match status and
-// match reason.
+// MatchPair groups two identifiers and their match status and match reason.
type MatchPair struct {
A string
B string
@@ -135,7 +134,9 @@ func JsonMarshalNewline(v interface{}) ([]byte, error) {
return b, nil
}
-// Verify verifies two releases and will ignore short titles.
+// Verify verifies two releases and will ignore short titles. TODO: We want
+// this to be a type with configuration options and maybe some pluggable
+// pieces, e.g. for author matching, etc.
func Verify(a, b *Release) MatchResult {
return VerifyMinTitleLength(a, b, 5)
}
diff --git a/skate/xio/util.go b/skate/xio/util.go
index 10225a1..4416c9f 100644
--- a/skate/xio/util.go
+++ b/skate/xio/util.go
@@ -9,13 +9,13 @@ import (
"sync"
)
-// SyncWriter makes any writer thread safe, using a lock.
+// SyncWriter makes any writer thread-safe using a lock.
type SyncWriter struct {
sync.Mutex
w io.Writer
}
-// NewSyncWriter returns an io.Writer that can be safely accessed by multiple
+// NewSyncWriter returns an io.Writer that can be safely written to by multiple
// goroutines.
func NewSyncWriter(w io.Writer) *SyncWriter {
return &SyncWriter{w: w}