diff options
Diffstat (limited to 'scald-mvp/src')
-rw-r--r-- | scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala | 4 | ||||
-rw-r--r-- | scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala | 18 |
2 files changed, 15 insertions, 7 deletions
diff --git a/scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala index 162f729..5df6b2e 100644 --- a/scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala +++ b/scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala @@ -17,9 +17,13 @@ class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversio val output = args("output") val hbs = new HBaseSource( + //"table_name", + //"quorum_name:2181", "wbgrp-journal-extract-0-qa", // HBase Table Name "mtrcs-zk1.us.archive.org:2181", // HBase Zookeeper server (to get runtime config info; can be array?) new Fields("key"), + List("file"), + List(new Fields("size", "mimetype")), sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")) .read .debug diff --git a/scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala b/scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala index c46b0fc..598f45d 100644 --- a/scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala +++ b/scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala @@ -24,8 +24,8 @@ class HBaseRowCountTest extends FunSpec with TupleConversions { val log = LoggerFactory.getLogger(this.getClass.getName) val sampleData = List( - List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q"), - List("sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU") + List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "a", "b"), + List("sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU", "a", "b") ) JobTest("sandcrawler.HBaseRowCountJob") @@ -35,18 +35,22 @@ class HBaseRowCountTest extends FunSpec with TupleConversions { .arg("debug", "true") .source[Tuple]( new HBaseSource( - "table_name", - "quorum_name:2181", + //"table_name", + //"quorum_name:2181", + "wbgrp-journal-extract-0-qa", + "mtrcs-zk1.us.archive.org:2181", new Fields("key"), - sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU", "3")), - sampleData.map(l => new Tuple(l.map(s => {new ImmutableBytesWritable(Bytes.toBytes(s))}):_*))) + List("file"), + List(new Fields("size", "mimetype")), + sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")), + sampleData.map(l => new Tuple(l.map(s => {new ImmutableBytesWritable(Bytes.toBytes(s))}):_*))) .sink[Tuple](Tsv(output format "get_list")) { outputBuffer => log.debug("Output => " + outputBuffer) it("should return the test data provided.") { println("outputBuffer.size => " + outputBuffer.size) - assert(outputBuffer.size === 3) + assert(outputBuffer.size === 2) } } .run |