diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-07-17 18:52:58 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-07-19 15:50:26 -0700 |
commit | 500525b82244151ed3e64d1cf31e96df394b5250 (patch) | |
tree | af367dd5c232f7bd93469af77672dde4ae3fba01 /scalding/src | |
parent | 1f60ff01871beef73ef4ac710b5b48106b6cdefc (diff) | |
download | sandcrawler-500525b82244151ed3e64d1cf31e96df394b5250.tar.gz sandcrawler-500525b82244151ed3e64d1cf31e96df394b5250.zip |
add buildSink() method for writing to HBase
Diffstat (limited to 'scalding/src')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseBuilder.scala | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala index b271def..fd04f2e 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala @@ -1,6 +1,8 @@ package sandcrawler import cascading.tuple.Fields +import parallelai.spyglass.base.JobBase +import cascading.tap.SinkMode import parallelai.spyglass.hbase.HBaseConstants.SourceMode import parallelai.spyglass.hbase.HBaseSource import scala._ @@ -48,4 +50,9 @@ object HBaseBuilder { val (families, fields) = parseColSpecs(colSpecs) new HBaseSource(table, server, new Fields("key"), families, fields, sourceMode = sourceMode, keyList = keyList) } + + def buildSink(table: String, server: String, colSpecs: List[String], sinkMode: SinkMode, keyList: List[String] = List("key")) : HBaseSource = { + val (families, fields) = parseColSpecs(colSpecs) + new HBaseSource(table, server, new Fields("key"), families, fields, sinkMode = sinkMode) + } } |