diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-05-26 22:47:05 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-05-26 22:47:05 +0200 |
commit | fff30b71abf23222f1dbc7e45591cdd093bf85d1 (patch) | |
tree | a38e8856afc0e46fab0a3067ab3141969b13a516 /skate/cmd | |
parent | 57826605209de687e0b6e6cb151021b7bcf034ca (diff) | |
download | refcat-fff30b71abf23222f1dbc7e45591cdd093bf85d1.tar.gz refcat-fff30b71abf23222f1dbc7e45591cdd093bf85d1.zip |
add author key mapping
Diffstat (limited to 'skate/cmd')
-rw-r--r-- | skate/cmd/skate-conv/main.go | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/skate/cmd/skate-conv/main.go b/skate/cmd/skate-conv/main.go index d48c913..3627c67 100644 --- a/skate/cmd/skate-conv/main.go +++ b/skate/cmd/skate-conv/main.go @@ -19,11 +19,17 @@ import ( ) var ( - numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") - batchSize = flag.Int("b", 100000, "batch size") - fromFormat = flag.String("f", "ref", "import schema: ref, rg, ol, oled") + numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") + batchSize = flag.Int("b", 100000, "batch size") + fromFormat = flag.String("f", "ref", "import schema: ref, rg, ol, oled") + extraOpenLibraryAuthorMapping = flag.String("Xa", "", "TSV file, mapping OL author keys (e.g. to plain text names") f func([]byte) ([]byte, error) // our converter function + + // map OL author key to author name, e.g. via: zstdcat -T0 + // ol_dump_authors_latest.txt.zst | cut -f 5 | jq -rc '[.key, .name] | + // @tsv' + openLibraryAuthorMap = make(map[string]string) ) func main() { @@ -37,6 +43,18 @@ func main() { f = openLibraryToRelease case "oled": f = openLibraryEditionToRelease + if *extraOpenLibraryAuthorMapping != "" { + f, err := os.Open(*extraOpenLibraryAuthorMapping) + if err != nil { + log.Fatal(err) + } + defer f.Close() + m, err := skate.TabsToMap(f, "\t", 1, 2) + if err != nil { + log.Fatal(err) + } + openLibraryAuthorMap = m + } default: log.Fatalf("unsupported input schema: %v", *fromFormat) } @@ -118,7 +136,7 @@ func openLibraryEditionToRelease(p []byte) ([]byte, error) { if err = json.Unmarshal(p, &w); err != nil { return nil, err } - if release, err = skate.OpenLibraryEditionToRelease(&w); err != nil { + if release, err = skate.OpenLibraryEditionToRelease(&w, openLibraryAuthorMap); err != nil { return nil, err } release.Extra.Skate.Status = "oled" |