From a58a3b6aa05699b7621d856e8c6f35c25a9bc940 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 29 May 2018 11:18:19 -0700 Subject: switch HBaseRowCountJob to SCAN_ALL --- scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala | 2 +- .../src/test/scala/sandcrawler/HBaseRowCountTest.scala | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'scalding/src') diff --git a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala index 98da239..d47fe60 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala @@ -24,7 +24,7 @@ class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversio new Fields("key"), List("file"), List(new Fields("size", "mimetype")), - sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")) + sourceMode = SourceMode.SCAN_ALL) .read .debug .groupAll { _.size('count) } diff --git a/scalding/src/test/scala/sandcrawler/HBaseRowCountTest.scala b/scalding/src/test/scala/sandcrawler/HBaseRowCountTest.scala index ac7cf18..94b3740 100644 --- a/scalding/src/test/scala/sandcrawler/HBaseRowCountTest.scala +++ b/scalding/src/test/scala/sandcrawler/HBaseRowCountTest.scala @@ -25,7 +25,13 @@ class HBaseRowCountTest extends FunSpec with TupleConversions { val sampleData = List( List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "a", "b"), - List("sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU", "a", "b") + List("sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU", "a", "b"), + List("sha1:SDKUVHC3YNNEGH5WAG5ZAAXWAEBNX4WT", "a", "b"), + List("sha1:35985C3YNNEGH5WAG5ZAAXWAEBNXJW56", "a", "b"), + List("sha1:885C3YNNEGH5WAG5ZAAXWA8BNXJWT6CZ", "a", "b"), + List("sha1:00904C3YNNEGH5WAG5ZA9XWAEBNXJWT6", "a", "b"), + List("sha1:249C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ", "a", "b"), + List("sha1:095893C3YNNEGH5WAG5ZAAXWAEBNXJWT", "a", "b") ) JobTest("sandcrawler.HBaseRowCountJob") @@ -42,9 +48,9 @@ class HBaseRowCountTest extends FunSpec with TupleConversions { new Fields("key"), List("file"), List(new Fields("size", "mimetype")), - sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")), + sourceMode = SourceMode.SCAN_ALL), sampleData.map(l => new Tuple(l.map(s => {new ImmutableBytesWritable(Bytes.toBytes(s))}):_*))) - .sink[Tuple](Tsv(output format "get_list")) { + .sink[Tuple](Tsv(output)) { outputBuffer => it("should return the test data provided.") { @@ -54,7 +60,7 @@ class HBaseRowCountTest extends FunSpec with TupleConversions { it("should return the correct count") { println("raw output => " + outputBuffer) - assert(outputBuffer(0).getObject(0) === 2) + assert(outputBuffer(0).getObject(0) === 8) } } .run -- cgit v1.2.3