aboutsummaryrefslogtreecommitdiffstats
path: root/scalding
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-07-17 18:52:58 -0700
committerBryan Newbold <bnewbold@archive.org>2018-07-17 18:56:10 -0700
commitbc10143c54c02d5e6a806f1d5f7bb326c24793f3 (patch)
treedb1f03eef42b89faf9bcb30c5be08a8e92ef758f /scalding
parentf6f2fcafb245101f75c9cda175891ef2391cda97 (diff)
downloadsandcrawler-bc10143c54c02d5e6a806f1d5f7bb326c24793f3.tar.gz
sandcrawler-bc10143c54c02d5e6a806f1d5f7bb326c24793f3.zip
add buildSink() method for writing to HBase
Diffstat (limited to 'scalding')
-rw-r--r--scalding/src/main/scala/sandcrawler/HBaseBuilder.scala7
1 files changed, 7 insertions, 0 deletions
diff --git a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
index b271def..fd04f2e 100644
--- a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
+++ b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
@@ -1,6 +1,8 @@
package sandcrawler
import cascading.tuple.Fields
+import parallelai.spyglass.base.JobBase
+import cascading.tap.SinkMode
import parallelai.spyglass.hbase.HBaseConstants.SourceMode
import parallelai.spyglass.hbase.HBaseSource
import scala._
@@ -48,4 +50,9 @@ object HBaseBuilder {
val (families, fields) = parseColSpecs(colSpecs)
new HBaseSource(table, server, new Fields("key"), families, fields, sourceMode = sourceMode, keyList = keyList)
}
+
+ def buildSink(table: String, server: String, colSpecs: List[String], sinkMode: SinkMode, keyList: List[String] = List("key")) : HBaseSource = {
+ val (families, fields) = parseColSpecs(colSpecs)
+ new HBaseSource(table, server, new Fields("key"), families, fields, sinkMode = sinkMode)
+ }
}