From b4f423438e9726d9c5efdb295552e8e737e2ca8f Mon Sep 17 00:00:00 2001 From: Saad Rashid Date: Thu, 20 Feb 2014 16:21:06 +0000 Subject: Update Scalding and Cascading latest release and pom versions. Now SpyGlass supports scalding (0.9.0rc4) and cascading (2.2.1). Its a release candidate. --- .../hbase/example/SimpleHBaseSourceExample.scala | 53 +++++++++++++++++----- .../hbase/example/SimpleHBaseSourceRunner.scala | 26 +++++++++++ 2 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 src/main/scala/parallelai/spyglass/hbase/example/SimpleHBaseSourceRunner.scala (limited to 'src/main/scala/parallelai/spyglass/hbase/example') diff --git a/src/main/scala/parallelai/spyglass/hbase/example/SimpleHBaseSourceExample.scala b/src/main/scala/parallelai/spyglass/hbase/example/SimpleHBaseSourceExample.scala index 7ba2788..5d844cb 100644 --- a/src/main/scala/parallelai/spyglass/hbase/example/SimpleHBaseSourceExample.scala +++ b/src/main/scala/parallelai/spyglass/hbase/example/SimpleHBaseSourceExample.scala @@ -6,27 +6,56 @@ import org.apache.log4j.{Level, Logger} import parallelai.spyglass.hbase.{HBasePipeConversions, HBaseSource} import parallelai.spyglass.hbase.HBaseConstants.SourceMode import cascading.tuple.Fields +import com.twitter.scalding.IterableSource /** * Simple example of HBaseSource usage */ class SimpleHBaseSourceExample(args: Args) extends JobBase(args) with HBasePipeConversions { - val isDebug: Boolean = args("debug").toBoolean - - if (isDebug) Logger.getRootLogger.setLevel(Level.DEBUG) + //val isDebug: Boolean = args("debug").toBoolean + //if (isDebug) Logger.getRootLogger.setLevel(Level.DEBUG) val output = args("output") - val hbs = new HBaseSource( - "table_name", - "quorum_name:2181", + val hbsOut = new HBaseSource( + "spyglass.hbase.test", + "cldmgr.prod.bigdata.bskyb.com:2181", new Fields("key"), - List("column_family"), - List(new Fields("column_name1", "column_name2")), - sourceMode = SourceMode.GET_LIST, keyList = List("1", "2", "3")) - .read - .fromBytesWritable(new Fields("key", "column_name1", "column_name2")) - .write(Tsv(output format "get_list")) + List("data", "data"), + List(new Fields("test1", "test2"))) + + val data = List( + ("100", 1, "A"), + ("101", 2, "B"), + ("102" , 3 , "C"), + ("103" , 4 , "D"), + ("104" , 5 , "E"), + ("104" , 6 , "F")) + + val testDataPipe = + IterableSource[(String, Int, String)](data, ('key, 'test1, 'test2)) + .debug + .toBytesWritable(List('key, 'test1, 'test2)) + + val writer = testDataPipe + writer.write(hbsOut) + + val hbs = new HBaseSource( + "spyglass.hbase.test", + "cldmgr.prod.bigdata.bskyb.com:2181", + new Fields("key"), + List("data", "data"), + List(new Fields("test1", "test2")), + sourceMode = SourceMode.SCAN_ALL) + .read + .fromBytesWritable(new Fields("key", "test1", "test2")) + + val fileWriter = hbs + fileWriter.write(Tsv("scan_all.txt")) + + + + } diff --git a/src/main/scala/parallelai/spyglass/hbase/example/SimpleHBaseSourceRunner.scala b/src/main/scala/parallelai/spyglass/hbase/example/SimpleHBaseSourceRunner.scala new file mode 100644 index 0000000..bbcf96d --- /dev/null +++ b/src/main/scala/parallelai/spyglass/hbase/example/SimpleHBaseSourceRunner.scala @@ -0,0 +1,26 @@ +package parallelai.spyglass.hbase.example + +import com.twitter.scalding.Args +import org.slf4j.LoggerFactory +import parallelai.spyglass.base.JobRunner + +object SimpleHBaseSourceRunner extends App { + + val mArgs = Args(args) + + val log = LoggerFactory.getLogger(this.getClass.getName) + + + + log.info("Starting HBaseSource Import Process Test...") + + val start1 = System.currentTimeMillis + + JobRunner.main((classOf[SimpleHBaseSourceExample].getName :: mArgs.toList).toArray) + + val end = System.currentTimeMillis + + log.info("HBaseSource Import process finished successfully.") + log.info("HBaseSource Import process : " + (end - start1) + " milliseconds to complete") + +} \ No newline at end of file -- cgit v1.2.3