diff options
Diffstat (limited to 'skate/cmd/skate-conv/main.go')
-rw-r--r-- | skate/cmd/skate-conv/main.go | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/skate/cmd/skate-conv/main.go b/skate/cmd/skate-conv/main.go index d48c913..3627c67 100644 --- a/skate/cmd/skate-conv/main.go +++ b/skate/cmd/skate-conv/main.go @@ -19,11 +19,17 @@ import ( ) var ( - numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") - batchSize = flag.Int("b", 100000, "batch size") - fromFormat = flag.String("f", "ref", "import schema: ref, rg, ol, oled") + numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") + batchSize = flag.Int("b", 100000, "batch size") + fromFormat = flag.String("f", "ref", "import schema: ref, rg, ol, oled") + extraOpenLibraryAuthorMapping = flag.String("Xa", "", "TSV file, mapping OL author keys (e.g. to plain text names") f func([]byte) ([]byte, error) // our converter function + + // map OL author key to author name, e.g. via: zstdcat -T0 + // ol_dump_authors_latest.txt.zst | cut -f 5 | jq -rc '[.key, .name] | + // @tsv' + openLibraryAuthorMap = make(map[string]string) ) func main() { @@ -37,6 +43,18 @@ func main() { f = openLibraryToRelease case "oled": f = openLibraryEditionToRelease + if *extraOpenLibraryAuthorMapping != "" { + f, err := os.Open(*extraOpenLibraryAuthorMapping) + if err != nil { + log.Fatal(err) + } + defer f.Close() + m, err := skate.TabsToMap(f, "\t", 1, 2) + if err != nil { + log.Fatal(err) + } + openLibraryAuthorMap = m + } default: log.Fatalf("unsupported input schema: %v", *fromFormat) } @@ -118,7 +136,7 @@ func openLibraryEditionToRelease(p []byte) ([]byte, error) { if err = json.Unmarshal(p, &w); err != nil { return nil, err } - if release, err = skate.OpenLibraryEditionToRelease(&w); err != nil { + if release, err = skate.OpenLibraryEditionToRelease(&w, openLibraryAuthorMap); err != nil { return nil, err } release.Extra.Skate.Status = "oled" |