From de38789a032b586b0e278a81dedb5c8fb6d43e02 Mon Sep 17 00:00:00 2001 From: Koert Kuipers Date: Tue, 6 Aug 2013 13:40:57 -0400 Subject: do not store nulls in hbase. see issue 5 --- .../spyglass/hbase/HBaseConversions.scala | 28 ++++++++++------------ 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'src/main/scala/parallelai/spyglass') diff --git a/src/main/scala/parallelai/spyglass/hbase/HBaseConversions.scala b/src/main/scala/parallelai/spyglass/hbase/HBaseConversions.scala index b6d5742..31ed3ea 100644 --- a/src/main/scala/parallelai/spyglass/hbase/HBaseConversions.scala +++ b/src/main/scala/parallelai/spyglass/hbase/HBaseConversions.scala @@ -10,15 +10,14 @@ import org.apache.hadoop.hbase.util.Bytes import cascading.tuple.TupleEntry class HBasePipeWrapper (pipe: Pipe) { - def toBytesWritable(f: Fields): Pipe = { + def toBytesWritable(f: Fields): Pipe = { asList(f) - .foldLeft(pipe){ (p, f) => { - p.map(f.toString -> f.toString){ from: String => { - new ImmutableBytesWritable(Bytes.toBytes( - if (from == null) "" else from)) - }} - }} - } + .foldLeft(pipe){ (p, f) => { + p.map(f.toString -> f.toString){ from: String => + Option(from).map(x => new ImmutableBytesWritable(Bytes.toBytes(x))).getOrElse(null) + }} + } + } // def toBytesWritable : Pipe = { // asList(Fields.ALL.asInstanceOf[TupleEntry].getFields()).foldLeft(pipe){ (p, f) => { @@ -30,13 +29,12 @@ class HBasePipeWrapper (pipe: Pipe) { def fromBytesWritable(f: Fields): Pipe = { asList(f) - .foldLeft(pipe) { (p, fld) => - p.map(fld.toString -> fld.toString) { from: ImmutableBytesWritable => { - Bytes.toString(from.get) - } - } - } - } + .foldLeft(pipe) { (p, fld) => { + p.map(fld.toString -> fld.toString) { from: ImmutableBytesWritable => + Option(from).map(x => Bytes.toString(x.get)).getOrElse(null) + } + }} + } // def fromBytesWritable : Pipe = { // asList(Fields.ALL.asInstanceOf[TupleEntry].getFields()).foldLeft(pipe) { (p, fld) => -- cgit v1.2.3