aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-07-17 18:52:58 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-07-19 15:50:26 -0700
commit500525b82244151ed3e64d1cf31e96df394b5250 (patch)
treeaf367dd5c232f7bd93469af77672dde4ae3fba01 /scalding/src/main
parent1f60ff01871beef73ef4ac710b5b48106b6cdefc (diff)
downloadsandcrawler-500525b82244151ed3e64d1cf31e96df394b5250.tar.gz
sandcrawler-500525b82244151ed3e64d1cf31e96df394b5250.zip
add buildSink() method for writing to HBase
Diffstat (limited to 'scalding/src/main')
-rw-r--r--scalding/src/main/scala/sandcrawler/HBaseBuilder.scala7
1 files changed, 7 insertions, 0 deletions
diff --git a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
index b271def..fd04f2e 100644
--- a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
+++ b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
@@ -1,6 +1,8 @@
package sandcrawler
import cascading.tuple.Fields
+import parallelai.spyglass.base.JobBase
+import cascading.tap.SinkMode
import parallelai.spyglass.hbase.HBaseConstants.SourceMode
import parallelai.spyglass.hbase.HBaseSource
import scala._
@@ -48,4 +50,9 @@ object HBaseBuilder {
val (families, fields) = parseColSpecs(colSpecs)
new HBaseSource(table, server, new Fields("key"), families, fields, sourceMode = sourceMode, keyList = keyList)
}
+
+ def buildSink(table: String, server: String, colSpecs: List[String], sinkMode: SinkMode, keyList: List[String] = List("key")) : HBaseSource = {
+ val (families, fields) = parseColSpecs(colSpecs)
+ new HBaseSource(table, server, new Fields("key"), families, fields, sinkMode = sinkMode)
+ }
}