diff options
| -rw-r--r-- | README.md | 205 | 
1 files changed, 106 insertions, 99 deletions
@@ -3,107 +3,114 @@ SpyGlass  Cascading and Scalding wrapper for HBase with advanced read features +Building +======== -Example -======= - -package parallelai.spyglass.hbase.example - -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.hbase.HBaseConfiguration -import org.apache.hadoop.hbase.client.HConnectionManager -import org.apache.hadoop.hbase.client.HTable -import org.apache.hadoop.hbase.util.Bytes -import org.apache.log4j.Level -import org.apache.log4j.Logger - -import com.twitter.scalding._ -import com.twitter.scalding.Args - -import parallelai.spyglass.base.JobBase -import parallelai.spyglass.hbase.HBaseSource -import parallelai.spyglass.hbase.HBaseConstants.SourceMode - -class HBaseExample(args: Args) extends JobBase(args) { - -  val isDebug: Boolean = args("debug").toBoolean - -  if (isDebug) Logger.getRootLogger().setLevel(Level.DEBUG) - -  val output = args("output") - -  println(output) - -  val jobConf = getJobConf - -  val quorumNames = "cldmgr.prod.bigdata.bskyb.com:2181" - -  case class HBaseTableStore( -      conf: Configuration, -      quorum: String, -      tableName: String) { - -    val tableBytes = Bytes.toBytes(tableName) -    val connection = HConnectionManager.getConnection(conf) -    val maxThreads = conf.getInt("hbase.htable.threads.max", 1) - -    conf.set("hbase.zookeeper.quorum", quorumNames); - -    val htable = new HTable(HBaseConfiguration.create(conf), tableName) - -  } - -  val hTableStore = HBaseTableStore(getJobConf, quorumNames, "skybet.test.tbet") - -  val hbs2 = new HBaseSource( -    "table_name", -    "quorum_name:2181", -    'key, -    Array("column_family"), -    Array('column_name), -    sourceMode = SourceMode.GET_LIST, keyList = List("5003914", "5000687", "5004897")) -    .read -    .write(Tsv(output.format("get_list"))) - -  val hbs3 = new HBaseSource( -    "table_name", -    "quorum_name:2181", -    'key, -    Array("column_family"), -    Array('column_name), -    sourceMode = SourceMode.SCAN_ALL) //, stopKey = "99460693") -    .read -    .write(Tsv(output.format("scan_all"))) - -  val hbs4 = new HBaseSource( -    "table_name", -    "quorum_name:2181", -    'key, -    Array("column_family"), -    Array('column_name), -    sourceMode = SourceMode.SCAN_RANGE, stopKey = "5003914") -    .read -    .write(Tsv(output.format("scan_range_to_end"))) +	$ mvn clean install -U +	 +	Requires Maven 3.x.x -  val hbs5 = new HBaseSource( -    "table_name", -    "quorum_name:2181", -    'key, -    Array("column_family"), -    Array('column_name), -    sourceMode = SourceMode.SCAN_RANGE, startKey = "5003914") -    .read -    .write(Tsv(output.format("scan_range_from_start"))) -  val hbs6 = new HBaseSource( -    "table_name", -    "quorum_name:2181", -    'key, -    Array("column_family"), -    Array('column_name), -    sourceMode = SourceMode.SCAN_RANGE, startKey = "5003914", stopKey = "5004897") -    .read -    .write(Tsv(output.format("scan_range_between"))) +Example +======= -}  +	package parallelai.spyglass.hbase.example +	 +	import org.apache.hadoop.conf.Configuration +	import org.apache.hadoop.hbase.HBaseConfiguration +	import org.apache.hadoop.hbase.client.HConnectionManager +	import org.apache.hadoop.hbase.client.HTable +	import org.apache.hadoop.hbase.util.Bytes +	import org.apache.log4j.Level +	import org.apache.log4j.Logger +	 +	import com.twitter.scalding._ +	import com.twitter.scalding.Args +	 +	import parallelai.spyglass.base.JobBase +	import parallelai.spyglass.hbase.HBaseSource +	import parallelai.spyglass.hbase.HBaseConstants.SourceMode +	 +	class HBaseExample(args: Args) extends JobBase(args) { +	 +	  val isDebug: Boolean = args("debug").toBoolean +	 +	  if (isDebug) Logger.getRootLogger().setLevel(Level.DEBUG) +	 +	  val output = args("output") +	 +	  println(output) +	 +	  val jobConf = getJobConf +	 +	  val quorumNames = "cldmgr.prod.bigdata.bskyb.com:2181" +	 +	  case class HBaseTableStore( +	      conf: Configuration, +	      quorum: String, +	      tableName: String) { +	 +	    val tableBytes = Bytes.toBytes(tableName) +	    val connection = HConnectionManager.getConnection(conf) +	    val maxThreads = conf.getInt("hbase.htable.threads.max", 1) +	 +	    conf.set("hbase.zookeeper.quorum", quorumNames); +	 +	    val htable = new HTable(HBaseConfiguration.create(conf), tableName) +	 +	  } +	 +	  val hTableStore = HBaseTableStore(getJobConf, quorumNames, "skybet.test.tbet") +	 +	  val hbs2 = new HBaseSource( +	    "table_name", +	    "quorum_name:2181", +	    'key, +	    Array("column_family"), +	    Array('column_name), +	    sourceMode = SourceMode.GET_LIST, keyList = List("5003914", "5000687", "5004897")) +	    .read +	    .write(Tsv(output.format("get_list"))) +	 +	  val hbs3 = new HBaseSource( +	    "table_name", +	    "quorum_name:2181", +	    'key, +	    Array("column_family"), +	    Array('column_name), +	    sourceMode = SourceMode.SCAN_ALL) //, stopKey = "99460693") +	    .read +	    .write(Tsv(output.format("scan_all"))) +	 +	  val hbs4 = new HBaseSource( +	    "table_name", +	    "quorum_name:2181", +	    'key, +	    Array("column_family"), +	    Array('column_name), +	    sourceMode = SourceMode.SCAN_RANGE, stopKey = "5003914") +	    .read +	    .write(Tsv(output.format("scan_range_to_end"))) +	 +	  val hbs5 = new HBaseSource( +	    "table_name", +	    "quorum_name:2181", +	    'key, +	    Array("column_family"), +	    Array('column_name), +	    sourceMode = SourceMode.SCAN_RANGE, startKey = "5003914") +	    .read +	    .write(Tsv(output.format("scan_range_from_start"))) +	 +	  val hbs6 = new HBaseSource( +	    "table_name", +	    "quorum_name:2181", +	    'key, +	    Array("column_family"), +	    Array('column_name), +	    sourceMode = SourceMode.SCAN_RANGE, startKey = "5003914", stopKey = "5004897") +	    .read +	    .write(Tsv(output.format("scan_range_between"))) +	 +	}   | 
