From 5d5b828730fdf34dcd2a6aeba64c7df2c1be23c5 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 21 May 2018 10:55:22 -0700 Subject: copy in scalding learning example --- scald-mvp/src/main/scala/example/WordCount.scala | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 scald-mvp/src/main/scala/example/WordCount.scala (limited to 'scald-mvp/src/main/scala/example/WordCount.scala') diff --git a/scald-mvp/src/main/scala/example/WordCount.scala b/scald-mvp/src/main/scala/example/WordCount.scala new file mode 100644 index 0000000..f7230c1 --- /dev/null +++ b/scald-mvp/src/main/scala/example/WordCount.scala @@ -0,0 +1,23 @@ + +package example + +import com.twitter.scalding._ + +object WordCountJob { + + def main(args: Array[String]) { + (new WordCountJob(Args(List("--local", "", "--input", "dummy.txt", "--output", "dummy-out.txt")))).run + + import io.Source + for (line <- Source.fromFile("dummy-out.txt").getLines()) + println(line) + } +} + +class WordCountJob(args : Args) extends Job(args) { + TypedPipe.from(TextLine(args("input"))) + .flatMap { line => line.split("""\s+""") } + .groupBy { word => word } + .size + .write(TypedTsv(args("output"))) +} -- cgit v1.2.3