// skate-map runs a given map function over input data. We mostly want to // extract a key from a json document. package main import ( "flag" "fmt" "log" "os" "runtime" "text/tabwriter" "git.archive.org/martin/cgraph/skate" "git.archive.org/martin/cgraph/skate/parallel" ) var ( mapperName = flag.String("m", "", "mapper to run") numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") batchSize = flag.Int("b", 50000, "batch size") verbose = flag.Bool("verbose", false, "show progress") extraValue = flag.String("x", "", "extra value to pass to configurable mappers") ) func main() { flag.Parse() // TODO // [ ] add prefixes and a way to derive multiple keys in one go // [ ] how to store multiple keys, sorted? // [ ] maybe wrap jq and parallel for arbitrary nested keys availableMappers := map[string]skate.Mapper{ "id": skate.Identity, "ff": skate.WithPrefix(skate.CreateFixedMapper(*extraValue), "ff"), "ti": skate.WithPrefix(skate.MapperTitle, "ti"), "tn": skate.WithPrefix(skate.MapperTitleNormalized, "tn"), "ty": skate.WithPrefix(skate.MapperTitleNysiis, "ty"), "ts": skate.WithPrefix(skate.MapperTitleSandcrawler, "ts"), } switch { case *mapperName != "": if f, ok := availableMappers[*mapperName]; !ok { log.Fatalf("unknown mapper name: %v", *mapperName) } else { pp := parallel.NewProcessor(os.Stdin, os.Stdout, f.AsTSV) pp.NumWorkers = *numWorkers pp.BatchSize = *batchSize pp.Verbose = *verbose if err := pp.Run(); err != nil { log.Fatal(err) } } default: fmt.Println("skate-map available mappers") fmt.Println() w := tabwriter.NewWriter(os.Stdout, 0, 0, 4, ' ', 0) for k, v := range availableMappers { fmt.Fprintf(w, "%s\t%s\n", k, skate.NameOf(v)) } w.Flush() } }