aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/HBaseMimeCountJob.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/HBaseMimeCountJob.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/HBaseMimeCountJob.scala18
1 files changed, 12 insertions, 6 deletions
diff --git a/scalding/src/main/scala/sandcrawler/HBaseMimeCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseMimeCountJob.scala
index 819a652..a6ab53a 100644
--- a/scalding/src/main/scala/sandcrawler/HBaseMimeCountJob.scala
+++ b/scalding/src/main/scala/sandcrawler/HBaseMimeCountJob.scala
@@ -9,9 +9,11 @@ import parallelai.spyglass.hbase.{HBaseSource, HBasePipeConversions}
import parallelai.spyglass.hbase.HBaseConstants.SourceMode
class HBaseMimeCountJob(args: Args) extends JobBase(args) with HBasePipeConversions {
+
val output = args("output")
- HBaseMimeCountJob.getHBaseSource
+ HBaseMimeCountJob.getHBaseSource(args("hbase-table"),
+ args("zookeeper-hosts"))
.read
.fromBytesWritable(List('mime))
.debug
@@ -20,9 +22,13 @@ class HBaseMimeCountJob(args: Args) extends JobBase(args) with HBasePipeConversi
}
object HBaseMimeCountJob {
- def getHBaseSource = HBaseBuilder.build(
- "wbgrp-journal-extract-0-qa", // HBase Table Name
- "mtrcs-zk1.us.archive.org:2181", // HBase Zookeeper server (to get runtime config info; can be array?)
- List("file:mime"),
- SourceMode.SCAN_ALL)
+
+ def getHBaseSource(hbase_table: String, zookeeper_hosts: String) : HBaseSource = {
+ return HBaseBuilder.build(
+ hbase_table,
+ zookeeper_hosts,
+ List("file:mime"),
+ SourceMode.SCAN_ALL)
+ }
+
}