From c14676635f39dd1bc0345e4df2d1fa06c298bfd7 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 29 May 2018 11:01:14 -0700 Subject: HBaseRowCountJob actually counts rows --- scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala') diff --git a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala index 5df6b2e..98da239 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala @@ -27,15 +27,6 @@ class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversio sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")) .read .debug - .fromBytesWritable(new Fields("key")) - .write(Tsv(output format "get_list")) - - /* - List("column_family"), - sourceMode = SourceMode.SCAN_ALL) - .read - .debug - .fromBytesWritable(new Fields("key")) - .write(Tsv(output format "get_list")) - */ + .groupAll { _.size('count) } + .write(Tsv(output)) } -- cgit v1.2.3