diff options
-rw-r--r-- | skate/cmd/skate-conv/main.go | 7 | ||||
-rw-r--r-- | skate/reduce.go | 14 | ||||
-rw-r--r-- | skate/xio/util.go | 42 |
3 files changed, 30 insertions, 33 deletions
diff --git a/skate/cmd/skate-conv/main.go b/skate/cmd/skate-conv/main.go index 423c394..972a7d0 100644 --- a/skate/cmd/skate-conv/main.go +++ b/skate/cmd/skate-conv/main.go @@ -47,7 +47,12 @@ func main() { f = openLibraryEditionToRelease if *extraOpenLibraryAuthorMapping != "" { log.Printf("loading author mapping from %v ...", *extraOpenLibraryAuthorMapping) - m, err := xio.TabsToMapFile(*extraOpenLibraryAuthorMapping, "\t", 1, 2) + f, err := os.Open(*extraOpenLibraryAuthorMapping) + if err != nil { + log.Fatal(err) + } + defer f.Close() + m, err := xio.MapFromTabular(f, "\t", 1, 2) if err != nil { log.Fatal(err) } diff --git a/skate/reduce.go b/skate/reduce.go index c96d17e..e2fa130 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -49,7 +49,7 @@ func groupLogf(g *zipkey.Group, s string, vs ...interface{}) { // match result, e.g. for doi matches. func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) error { var ( - enc = json.NewEncoder(xio.NewSingleWriter(w)) + enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { var ( @@ -101,7 +101,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) // match result, e.g. used with release entities converted from open library snapshots. func ZippyExactReleases(olr, releases io.Reader, matchResult MatchResult, w io.Writer) error { var ( - enc = json.NewEncoder(xio.NewSingleWriter(w)) + enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { var ( @@ -151,7 +151,7 @@ func ZippyExactReleases(olr, releases io.Reader, matchResult MatchResult, w io.W // fixed match result. func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error { var ( - enc = json.NewEncoder(xio.NewSingleWriter(w)) + enc = json.NewEncoder(xio.NewSyncWriter(w)) // TODO: Use slug version of title. Also consider a generic schema // (e.g. one that would look similar for OL, WB, WP, ...) b32enc = base32.StdEncoding.WithPadding(base32.NoPadding) @@ -216,7 +216,7 @@ func ZippyExactWiki(releases, wiki io.Reader, mr MatchResult, w io.Writer) error // match. func ZippyVerifyRefs(releases, refs io.Reader, w io.Writer) error { var ( - enc = json.NewEncoder(xio.NewSingleWriter(w)) + enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { var ( @@ -303,7 +303,7 @@ func ZippyVerifyRefsOpenLibraryTable(olr, refs io.Reader, w io.Writer) error { // release) and writes biblioref. func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { var ( - enc = json.NewEncoder(xio.NewSingleWriter(w)) + enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { // TODO: For openlibrary and wayback matches, pass through either @@ -364,7 +364,7 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { // write a bref document for each match. func ZippyWayback(refs, cdx io.Reader, w io.Writer) error { var ( - enc = json.NewEncoder(xio.NewSingleWriter(w)) + enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { var ( @@ -438,7 +438,7 @@ func ZippyWayback(refs, cdx io.Reader, w io.Writer) error { func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { var ( stats = statsAugment{} - enc = json.NewEncoder(xio.NewSingleWriter(w)) + enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { // g.G0 contains matched docs for a given work id, g.G1 all raw diff --git a/skate/xio/util.go b/skate/xio/util.go index 49f38a3..90a1012 100644 --- a/skate/xio/util.go +++ b/skate/xio/util.go @@ -9,23 +9,24 @@ import ( "sync" ) -// SingleWriter makes any writer thread safe. -type SingleWriter struct { +// SyncWriter makes any writer thread safe. +type SyncWriter struct { sync.Mutex w io.Writer } -// NewSingleWriter returns an io.Writer that can be safely accessed by multiple +// NewSyncWriter returns an io.Writer that can be safely accessed by multiple // goroutines. -func NewSingleWriter(w io.Writer) *SingleWriter { - return &SingleWriter{w: w} +func NewSyncWriter(w io.Writer) *SyncWriter { + return &SyncWriter{w: w} } // Write wraps the underlying writer and gives exclusive access. -func (w *SingleWriter) Write(p []byte) (n int, err error) { +func (w *SyncWriter) Write(p []byte) (n int, err error) { w.Lock() - defer w.Unlock() - return w.w.Write(p) + n, err = w.w.Write(p) + w.Unlock() + return } // OpenTwo opens two files. The caller needs to check for a single error only. @@ -39,19 +40,9 @@ func OpenTwo(f0, f1 string) (g0, g1 *os.File, err error) { return g0, g1, nil } -// TabsToMapFile turns two columns from a tabular file into a map. -func TabsToMapFile(filename, sep string, kCol, vCol int) (map[string]string, error) { - f, err := os.Open(filename) - if err != nil { - return nil, err - } - defer f.Close() - return TabsToMap(f, sep, kCol, vCol) -} - -// TabsToMap read from a reader and turns values from kCol, vCol columns -// (1-indexed) into a map. -func TabsToMap(r io.Reader, sep string, kCol, vCol int) (map[string]string, error) { +// MapFromTabular reads from a reader and turns values from keyC, valueC +// columns, both 1-indexed, into a map. +func MapFromTabular(r io.Reader, sep string, keyC, valueC int) (map[string]string, error) { var ( br = bufio.NewReader(r) m = make(map[string]string) @@ -68,12 +59,13 @@ func TabsToMap(r io.Reader, sep string, kCol, vCol int) (map[string]string, erro return nil, err } fields = strings.Split(line, sep) - if len(fields) > kCol-1 && len(fields) > vCol-1 { - k = strings.TrimSpace(fields[kCol-1]) - v = strings.TrimSpace(fields[vCol-1]) + if len(fields) > keyC-1 && len(fields) > valueC-1 { + k = strings.TrimSpace(fields[keyC-1]) + v = strings.TrimSpace(fields[valueC-1]) m[k] = v } else { - return nil, fmt.Errorf("invalid line: %v (%v fields, %v, %v)", line, len(fields), kCol, vCol) + return nil, fmt.Errorf("invalid line: %v (%v fields, %v, %v)", + line, len(fields), keyC, valueC) } } return m, nil |