From e7f8c117c3586c6387a744043ad68c1850471719 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Sat, 24 Apr 2021 02:29:10 +0200 Subject: wip: move to prefixed, w/ skate-map --- skate/cmd/skate-map/main.go | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 skate/cmd/skate-map/main.go (limited to 'skate/cmd/skate-map/main.go') diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go new file mode 100644 index 0000000..c5fb798 --- /dev/null +++ b/skate/cmd/skate-map/main.go @@ -0,0 +1,58 @@ +// skate-map runs a given map function over input data. We mostly want to +// extract a key from a json document. +package main + +import ( + "flag" + "fmt" + "log" + "os" + "runtime" + "text/tabwriter" + + "git.archive.org/martin/cgraph/skate" + "git.archive.org/martin/cgraph/skate/parallel" +) + +var ( + mapperName = flag.String("m", "", "mapper to run") + numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") + batchSize = flag.Int("b", 50000, "batch size") + verbose = flag.Bool("verbose", false, "show progress") + extraValue = flag.String("x", "", "extra value to pass to configurable mappers") +) + +func main() { + flag.Parse() + // XXX: introduce prefixes + availableMappers := map[string]skate.Mapper{ + "id": skate.Identity, + "ff": skate.CreateFixedFieldFunc(*extraValue), + "title": skate.MapperTitle, + "tnorm": skate.MapperTitleNormalized, + "tnysi": skate.MapperTitleNysiis, + "tsand": skate.MapperTitleSandcrawler, + } + switch { + case *mapperName != "": + if f, ok := availableMappers[*mapperName]; !ok { + log.Fatal("unknown mapper name: %v", *mapperName) + } else { + pp := parallel.NewProcessor(os.Stdin, os.Stdout, f) + pp.NumWorkers = *numWorkers + pp.BatchSize = *batchSize + pp.Verbose = *verbose + if err := pp.Run(); err != nil { + log.Fatal(err) + } + } + default: + fmt.Println("skate-map available mappers") + fmt.Println() + w := tabwriter.NewWriter(os.Stdout, 0, 0, 4, ' ', 0) + for k, v := range availableMappers { + fmt.Fprintf(w, "%s\t%s\n", k, skate.NameOf(v)) + } + w.Flush() + } +} -- cgit v1.2.3