aboutsummaryrefslogtreecommitdiffstats
path: root/scald-mvp/src
diff options
context:
space:
mode:
Diffstat (limited to 'scald-mvp/src')
-rw-r--r--scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala4
-rw-r--r--scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala18
2 files changed, 15 insertions, 7 deletions
diff --git a/scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala
index 162f729..5df6b2e 100644
--- a/scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala
+++ b/scald-mvp/src/main/scala/sandcrawler/HBaseRowCountJob.scala
@@ -17,9 +17,13 @@ class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversio
val output = args("output")
val hbs = new HBaseSource(
+ //"table_name",
+ //"quorum_name:2181",
"wbgrp-journal-extract-0-qa", // HBase Table Name
"mtrcs-zk1.us.archive.org:2181", // HBase Zookeeper server (to get runtime config info; can be array?)
new Fields("key"),
+ List("file"),
+ List(new Fields("size", "mimetype")),
sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU"))
.read
.debug
diff --git a/scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala b/scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala
index c46b0fc..598f45d 100644
--- a/scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala
+++ b/scald-mvp/src/test/scala/sandcrawler/HBaseRowCountTest.scala
@@ -24,8 +24,8 @@ class HBaseRowCountTest extends FunSpec with TupleConversions {
val log = LoggerFactory.getLogger(this.getClass.getName)
val sampleData = List(
- List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q"),
- List("sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")
+ List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "a", "b"),
+ List("sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU", "a", "b")
)
JobTest("sandcrawler.HBaseRowCountJob")
@@ -35,18 +35,22 @@ class HBaseRowCountTest extends FunSpec with TupleConversions {
.arg("debug", "true")
.source[Tuple](
new HBaseSource(
- "table_name",
- "quorum_name:2181",
+ //"table_name",
+ //"quorum_name:2181",
+ "wbgrp-journal-extract-0-qa",
+ "mtrcs-zk1.us.archive.org:2181",
new Fields("key"),
- sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU", "3")),
- sampleData.map(l => new Tuple(l.map(s => {new ImmutableBytesWritable(Bytes.toBytes(s))}):_*)))
+ List("file"),
+ List(new Fields("size", "mimetype")),
+ sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")),
+ sampleData.map(l => new Tuple(l.map(s => {new ImmutableBytesWritable(Bytes.toBytes(s))}):_*)))
.sink[Tuple](Tsv(output format "get_list")) {
outputBuffer =>
log.debug("Output => " + outputBuffer)
it("should return the test data provided.") {
println("outputBuffer.size => " + outputBuffer.size)
- assert(outputBuffer.size === 3)
+ assert(outputBuffer.size === 2)
}
}
.run