aboutsummaryrefslogtreecommitdiffstats
path: root/scald-mvp/src
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-05-21 10:55:22 -0700
committerBryan Newbold <bnewbold@archive.org>2018-05-21 10:55:22 -0700
commit5d5b828730fdf34dcd2a6aeba64c7df2c1be23c5 (patch)
tree5f7322b2dc25317ab847d98e182ce801a1a5eae6 /scald-mvp/src
parentb18c68c81b4c426b5d83f2e6c31026b9febcb6e0 (diff)
downloadsandcrawler-5d5b828730fdf34dcd2a6aeba64c7df2c1be23c5.tar.gz
sandcrawler-5d5b828730fdf34dcd2a6aeba64c7df2c1be23c5.zip
copy in scalding learning example
Diffstat (limited to 'scald-mvp/src')
-rw-r--r--scald-mvp/src/main/scala/example/WordCount.scala23
1 files changed, 23 insertions, 0 deletions
diff --git a/scald-mvp/src/main/scala/example/WordCount.scala b/scald-mvp/src/main/scala/example/WordCount.scala
new file mode 100644
index 0000000..f7230c1
--- /dev/null
+++ b/scald-mvp/src/main/scala/example/WordCount.scala
@@ -0,0 +1,23 @@
+
+package example
+
+import com.twitter.scalding._
+
+object WordCountJob {
+
+ def main(args: Array[String]) {
+ (new WordCountJob(Args(List("--local", "", "--input", "dummy.txt", "--output", "dummy-out.txt")))).run
+
+ import io.Source
+ for (line <- Source.fromFile("dummy-out.txt").getLines())
+ println(line)
+ }
+}
+
+class WordCountJob(args : Args) extends Job(args) {
+ TypedPipe.from(TextLine(args("input")))
+ .flatMap { line => line.split("""\s+""") }
+ .groupBy { word => word }
+ .size
+ .write(TypedTsv(args("output")))
+}