aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-20 18:41:51 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-21 21:25:56 -0700
commit39bf4b57cd552e8042bfa25565b390cb2a456ab0 (patch)
treeec5ecb68a49dbc69727747f766a02ba91bd3c3b6 /scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala
parent6aeafb083d73be8cf3296707c3e558d825202bce (diff)
downloadsandcrawler-39bf4b57cd552e8042bfa25565b390cb2a456ab0.tar.gz
sandcrawler-39bf4b57cd552e8042bfa25565b390cb2a456ab0.zip
distinction between status_code and status counting
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala8
1 files changed, 4 insertions, 4 deletions
diff --git a/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala
index fd0b4e2..1635e03 100644
--- a/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala
+++ b/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala
@@ -18,15 +18,15 @@ class HBaseStatusCountJob(args: Args) extends JobBase(args) with HBasePipeConver
val source = HBaseCountJob.getHBaseSource(
args("hbase-table"),
args("zookeeper-hosts"),
- "grobid0:status_code")
+ "grobid0:status")
val statusPipe : TypedPipe[Long] = source
.read
- .toTypedPipe[(ImmutableBytesWritable,ImmutableBytesWritable)]('key, 'status_code)
- .map { case (key, raw_code) => Bytes.toLong(raw_code.copyBytes()) }
+ .toTypedPipe[(ImmutableBytesWritable,ImmutableBytesWritable)]('key, 'status)
+ .map { case (key, raw_status) => Bytes.toString(raw_code.copyBytes()) }
statusPipe.groupBy { identity }
.size
.debug
- .write(TypedTsv[(Long,Long)](args("output")))
+ .write(TypedTsv[(Long,String)](args("output")))
}