From a58a3b6aa05699b7621d856e8c6f35c25a9bc940 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 29 May 2018 11:18:19 -0700 Subject: switch HBaseRowCountJob to SCAN_ALL --- scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scalding/src/main/scala') diff --git a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala index 98da239..d47fe60 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala @@ -24,7 +24,7 @@ class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversio new Fields("key"), List("file"), List(new Fields("size", "mimetype")), - sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")) + sourceMode = SourceMode.SCAN_ALL) .read .debug .groupAll { _.size('count) } -- cgit v1.2.3