diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-05-29 11:18:19 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-05-29 11:18:19 -0700 |
commit | a58a3b6aa05699b7621d856e8c6f35c25a9bc940 (patch) | |
tree | 53f5805b205953915139d491892a8990f8bd2a47 /scalding/src/main | |
parent | c14676635f39dd1bc0345e4df2d1fa06c298bfd7 (diff) | |
download | sandcrawler-a58a3b6aa05699b7621d856e8c6f35c25a9bc940.tar.gz sandcrawler-a58a3b6aa05699b7621d856e8c6f35c25a9bc940.zip |
switch HBaseRowCountJob to SCAN_ALL
Diffstat (limited to 'scalding/src/main')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala index 98da239..d47fe60 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala @@ -24,7 +24,7 @@ class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversio new Fields("key"), List("file"), List(new Fields("size", "mimetype")), - sourceMode = SourceMode.GET_LIST, keyList = List("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", "sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU")) + sourceMode = SourceMode.SCAN_ALL) .read .debug .groupAll { _.size('count) } |