aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
Diffstat (limited to 'skate')
-rw-r--r--skate/cmd/skate-conv/main.go7
-rw-r--r--skate/reduce.go14
-rw-r--r--skate/xio/util.go42
3 files changed, 30 insertions, 33 deletions
diff --git a/skate/cmd/skate-conv/main.go b/skate/cmd/skate-conv/main.go
index 423c394..972a7d0 100644
--- a/skate/cmd/skate-conv/main.go
+++ b/skate/cmd/skate-conv/main.go
@@ -47,7 +47,12 @@ func main() {
f = openLibraryEditionToRelease
if *extraOpenLibraryAuthorMapping != "" {
log.Printf("loading author mapping from %v ...", *extraOpenLibraryAuthorMapping)
- m, err := xio.TabsToMapFile(*extraOpenLibraryAuthorMapping, "\t", 1, 2)
+ f, err := os.Open(*extraOpenLibraryAuthorMapping)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+ m, err := xio.MapFromTabular(f, "\t", 1, 2)
if err != nil {
log.Fatal(err)
}
diff --git a/skate/reduce.go b/skate/reduce.go
index c96d17e..e2fa130 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -49,7 +49,7 @@ func groupLogf(g *zipkey.Group, s string, vs ...interface{}) {
// match result, e.g. for doi matches.
func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) error {
var (
- enc = json.NewEncoder(xio.NewSingleWriter(w))
+ enc = json.NewEncoder(xio.NewSyncWriter(w))
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
var (
@@ -101,7 +101,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer)
// match result, e.g. used with release entities converted from open library snapshots.
func ZippyExactReleases(olr, releases io.Reader, matchResult MatchResult, w io.Writer) error {
var (
- enc = json.NewEncoder(xio.NewSingleWriter(w))
+ enc = json.NewEncoder(xio.NewSyncWriter(w))
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
var (
@@ -151,7 +151,7 @@ func ZippyExactReleases(olr, releases io.Reader, matchResult MatchResult, w io.W
// fixed match result.
func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error {
var (
- enc = json.NewEncoder(xio.NewSingleWriter(w))
+ enc = json.NewEncoder(xio.NewSyncWriter(w))
// TODO: Use slug version of title. Also consider a generic schema
// (e.g. one that would look similar for OL, WB, WP, ...)
b32enc = base32.StdEncoding.WithPadding(base32.NoPadding)
@@ -216,7 +216,7 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error
// match.
func ZippyVerifyRefs(releases, refs io.Reader, w io.Writer) error {
var (
- enc = json.NewEncoder(xio.NewSingleWriter(w))
+ enc = json.NewEncoder(xio.NewSyncWriter(w))
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
var (
@@ -303,7 +303,7 @@ func ZippyVerifyRefsOpenLibraryTable(olr, refs io.Reader, w io.Writer) error {
// release) and writes biblioref.
func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
var (
- enc = json.NewEncoder(xio.NewSingleWriter(w))
+ enc = json.NewEncoder(xio.NewSyncWriter(w))
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
// TODO: For openlibrary and wayback matches, pass through either
@@ -364,7 +364,7 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
// write a bref document for each match.
func ZippyWayback(refs, cdx io.Reader, w io.Writer) error {
var (
- enc = json.NewEncoder(xio.NewSingleWriter(w))
+ enc = json.NewEncoder(xio.NewSyncWriter(w))
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
var (
@@ -438,7 +438,7 @@ func ZippyWayback(refs, cdx io.Reader, w io.Writer) error {
func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
var (
stats = statsAugment{}
- enc = json.NewEncoder(xio.NewSingleWriter(w))
+ enc = json.NewEncoder(xio.NewSyncWriter(w))
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
// g.G0 contains matched docs for a given work id, g.G1 all raw
diff --git a/skate/xio/util.go b/skate/xio/util.go
index 49f38a3..90a1012 100644
--- a/skate/xio/util.go
+++ b/skate/xio/util.go
@@ -9,23 +9,24 @@ import (
"sync"
)
-// SingleWriter makes any writer thread safe.
-type SingleWriter struct {
+// SyncWriter makes any writer thread safe.
+type SyncWriter struct {
sync.Mutex
w io.Writer
}
-// NewSingleWriter returns an io.Writer that can be safely accessed by multiple
+// NewSyncWriter returns an io.Writer that can be safely accessed by multiple
// goroutines.
-func NewSingleWriter(w io.Writer) *SingleWriter {
- return &SingleWriter{w: w}
+func NewSyncWriter(w io.Writer) *SyncWriter {
+ return &SyncWriter{w: w}
}
// Write wraps the underlying writer and gives exclusive access.
-func (w *SingleWriter) Write(p []byte) (n int, err error) {
+func (w *SyncWriter) Write(p []byte) (n int, err error) {
w.Lock()
- defer w.Unlock()
- return w.w.Write(p)
+ n, err = w.w.Write(p)
+ w.Unlock()
+ return
}
// OpenTwo opens two files. The caller needs to check for a single error only.
@@ -39,19 +40,9 @@ func OpenTwo(f0, f1 string) (g0, g1 *os.File, err error) {
return g0, g1, nil
}
-// TabsToMapFile turns two columns from a tabular file into a map.
-func TabsToMapFile(filename, sep string, kCol, vCol int) (map[string]string, error) {
- f, err := os.Open(filename)
- if err != nil {
- return nil, err
- }
- defer f.Close()
- return TabsToMap(f, sep, kCol, vCol)
-}
-
-// TabsToMap read from a reader and turns values from kCol, vCol columns
-// (1-indexed) into a map.
-func TabsToMap(r io.Reader, sep string, kCol, vCol int) (map[string]string, error) {
+// MapFromTabular reads from a reader and turns values from keyC, valueC
+// columns, both 1-indexed, into a map.
+func MapFromTabular(r io.Reader, sep string, keyC, valueC int) (map[string]string, error) {
var (
br = bufio.NewReader(r)
m = make(map[string]string)
@@ -68,12 +59,13 @@ func TabsToMap(r io.Reader, sep string, kCol, vCol int) (map[string]string, erro
return nil, err
}
fields = strings.Split(line, sep)
- if len(fields) > kCol-1 && len(fields) > vCol-1 {
- k = strings.TrimSpace(fields[kCol-1])
- v = strings.TrimSpace(fields[vCol-1])
+ if len(fields) > keyC-1 && len(fields) > valueC-1 {
+ k = strings.TrimSpace(fields[keyC-1])
+ v = strings.TrimSpace(fields[valueC-1])
m[k] = v
} else {
- return nil, fmt.Errorf("invalid line: %v (%v fields, %v, %v)", line, len(fields), kCol, vCol)
+ return nil, fmt.Errorf("invalid line: %v (%v fields, %v, %v)",
+ line, len(fields), keyC, valueC)
}
}
return m, nil