5 files changed, 1097 insertions, 1054 deletions
diff --git a/pom.xml b/pom.xml
index b582b99..8d40658 100644
--- a/pom.xml
+++ b/pom.xml
@@ -8,13 +8,6 @@
 		<url>http://www.parallelai.com</url>
 	</organization>
 
-
-	<name>Cascading and Scalding wrapper for HBase with advanced features</name>
-	<groupId>parallelai</groupId>
-	<artifactId>parallelai.spyglass</artifactId>
-	<version>2.9.3_3.0.0</version>
-	<packaging>jar</packaging>
-
 	<properties>
 		<!-- Java compilation level -->
 		<maven.compiler.source>1.6</maven.compiler.source>
@@ -28,32 +21,32 @@
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 		<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
 
-        <datafu.version>0.0.4-cdh4.2.0</datafu.version>
-        <flume.version>1.3.0-cdh4.2.0</flume.version>
-        <hadoop.version>2.0.0-cdh4.2.0</hadoop.version>
-        <hbase.version>0.94.2-cdh4.2.0</hbase.version>
-        <hive.version>0.10.0-cdh4.2.0</hive.version>
-        <mahout.version>0.7-cdh4.2.0</mahout.version>
-        <mapreduce.version>2.0.0-mr1-cdh4.2.0</mapreduce.version>
-        <oozie.version>3.3.0-cdh4.2.0</oozie.version>
-        <oozie-hadoop.version>2.0.0-cdh4.2.0.oozie-3.3.0-cdh4.2.0</oozie-hadoop.version>
-        <oozie-sharelib.version>3.3.0-cdh4.2.0</oozie-sharelib.version>
-        <pig.version>0.10.0-cdh4.2.0</pig.version>
-        <sqoop.version>1.4.2-cdh4.2.0</sqoop.version>
-        <whirr.version>0.8.0-cdh4.2.0</whirr.version>
-        <zookeeper.version>3.4.5-cdh4.2.0</zookeeper.version>
+        <datafu.version>0.0.4-cdh4.3.0</datafu.version>
+        <flume.version>1.3.0-cdh4.3.0</flume.version>
+        <hadoop.version>2.0.0-cdh4.3.0</hadoop.version>
+        <hbase.version>0.94.6-cdh4.3.0</hbase.version>
+        <hive.version>0.10.0-cdh4.3.0</hive.version>
+        <mahout.version>0.7-cdh4.3.0</mahout.version>
+        <mapreduce.version>2.0.0-mr1-cdh4.3.0</mapreduce.version>
+        <oozie.version>3.3.2-cdh4.3.0</oozie.version>
+        <oozie-hadoop.version>2.0.0-cdh4.2.0.oozie-3.3.2-cdh4.3.0</oozie-hadoop.version>
+        <oozie-sharelib.version>3.3.2-cdh4.3.0</oozie-sharelib.version>
+        <pig.version>0.11.0-cdh4.3.0</pig.version>
+        <sqoop.version>1.4.3-cdh4.3.0</sqoop.version>
+        <whirr.version>0.8.2-cdh4.3.0</whirr.version>
+        <zookeeper.version>3.4.5-cdh4.3.0</zookeeper.version>
 
 		<!-- Scala/Scalding/Cascading properties -->
-        <scala.version>2.9.3</scala.version>
-        <scalding.scala.version>2.9.3</scalding.scala.version>
+        <scala.version>2.10.2</scala.version>
+        <scalding.scala.version>2.10</scalding.scala.version>
         <scalding.version>0.8.6</scalding.version>
-		<cascading.version>2.1.0</cascading.version>
+		<cascading.version>2.1.6</cascading.version>
 		<scalding-commons.version>0.2.0</scalding-commons.version>
 		<scalatest.version>1.9.1</scalatest.version>
 
 		<trove4j.version>3.0.3</trove4j.version>
 		<maple.version>0.2.8</maple.version>
-		<specs2.version>1.12.4.1</specs2.version>
+		<specs2.version>2.1</specs2.version>
 		<typesafe.config.version>1.0.0</typesafe.config.version>
 
         <!-- Other libraries properties -->
@@ -66,7 +59,13 @@
 
 	</properties>
 
-	<distributionManagement>
+    <name>Cascading and Scalding wrapper for HBase with advanced features</name>
+    <groupId>parallelai</groupId>
+    <artifactId>parallelai.spyglass</artifactId>
+    <version>${scala.version}_2.4.0</version>
+    <packaging>jar</packaging>
+
+    <distributionManagement>
 	    <repository>
 	        <id>repo</id>
 	        <url>https://github.com/ParallelAI/mvn-repo/raw/master/releases</url>
@@ -90,6 +89,10 @@
 			<name>Con Jars</name>
 			<url>http://conjars.org/repo</url>
 		</repository>
+        <repository>
+            <id>mvnrepository</id>
+            <url>http://repo1.maven.org/maven2</url>
+        </repository>
 	</repositories>
 
 	<!-- Profiles -->
@@ -237,7 +240,7 @@
 					</includes>
 				</configuration>
 			</plugin>
-			<!-- This plugin is not supported by Eclipse, so maybe we shouldn't be 
+			<!-- This plugin is not supported by Eclipse, so maybe we shouldn't be
 				using it -->
 			<plugin>
 				<groupId>org.scala-tools</groupId>
diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseRawScheme.java b/src/main/java/parallelai/spyglass/hbase/HBaseRawScheme.java
index 7dba40d..7b62c88 100644
--- a/src/main/java/parallelai/spyglass/hbase/HBaseRawScheme.java
+++ b/src/main/java/parallelai/spyglass/hbase/HBaseRawScheme.java
@@ -1,286 +1,286 @@
-///*
-//* Copyright (c) 2009 Concurrent, Inc.
-//*
-//* This work has been released into the public domain
-//* by the copyright holder. This applies worldwide.
-//*
-//* In case this is not legally possible:
-//* The copyright holder grants any entity the right
-//* to use this work for any purpose, without any
-//* conditions, unless such conditions are required by law.
-//*/
-//
-//package parallelai.spyglass.hbase;
-//
-//import java.io.IOException;
-//import java.util.Arrays;
-//import java.util.HashSet;
-//
-//import org.apache.hadoop.hbase.client.Put;
-//import org.apache.hadoop.hbase.client.Result;
-//import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-//import org.apache.hadoop.hbase.mapred.TableOutputFormat;
-//import org.apache.hadoop.hbase.util.Bytes;
-//import org.apache.hadoop.mapred.JobConf;
-//import org.apache.hadoop.mapred.OutputCollector;
-//import org.apache.hadoop.mapred.RecordReader;
-//import org.slf4j.Logger;
-//import org.slf4j.LoggerFactory;
-//
-//import com.twitter.elephantbird.mapred.input.DeprecatedInputFormatValueCopier;
-//import com.twitter.elephantbird.mapred.input.DeprecatedInputFormatWrapper;
-//
-//import cascading.flow.FlowProcess;
-//import cascading.scheme.Scheme;
-//import cascading.scheme.SinkCall;
-//import cascading.scheme.SourceCall;
-//import cascading.tap.Tap;
-//import cascading.tuple.Fields;
-//import cascading.tuple.Tuple;
-//import cascading.tuple.TupleEntry;
-//import cascading.util.Util;
-//
-///**
-//* The HBaseRawScheme class is a {@link Scheme} subclass. It is used in conjunction
-//* with the {@HBaseRawTap} to allow for the reading and writing of data
-//* to and from a HBase cluster.
-//*
-//* @see HBaseRawTap
-//*/
-//@SuppressWarnings({ "rawtypes", "deprecation" })
-//public class HBaseRawScheme extends Scheme<JobConf, RecordReader, OutputCollector, Object[], Object[]> {
-//	/**
-//	 *
-//	 */
-//	private static final long serialVersionUID = 6248976486883281356L;
-//
-//	/** Field LOG */
-//	private static final Logger LOG = LoggerFactory.getLogger(HBaseRawScheme.class);
-//
-//	public final Fields RowKeyField = new Fields("rowkey");
-//	public final Fields RowField = new Fields("row");
-//
-//	/** String familyNames */
-//	private String[] familyNames;
-//
-//	private boolean writeNulls = true;
-//
-//	/**
-//	 * Constructor HBaseScheme creates a new HBaseScheme instance.
-//	 *
-//	 * @param keyFields
-//	 *            of type Fields
-//	 * @param familyName
-//	 *            of type String
-//	 * @param valueFields
-//	 *            of type Fields
-//	 */
-//	public HBaseRawScheme(String familyName) {
-//		this(new String[] { familyName });
-//	}
-//
-//	public HBaseRawScheme(String[] familyNames) {
-//		this.familyNames = familyNames;
-//		setSourceFields();
-//	}
-//
-//	public HBaseRawScheme(String familyName, boolean writeNulls) {
-//		this(new String[] { familyName }, writeNulls);
-//	}
-//
-//	public HBaseRawScheme(String[] familyNames, boolean writeNulls) {
-//		this.familyNames = familyNames;
-//		this.writeNulls = writeNulls;
-//		setSourceFields();
-//	}
-//
-//	private void setSourceFields() {
-//		Fields sourceFields = Fields.join(RowKeyField, RowField);
-//		setSourceFields(sourceFields);
-//	}
-//
-//	/**
-//	 * Method getFamilyNames returns the set of familyNames of this HBaseScheme
-//	 * object.
-//	 *
-//	 * @return the familyNames (type String[]) of this HBaseScheme object.
-//	 */
-//	public String[] getFamilyNames() {
-//		HashSet<String> familyNameSet = new HashSet<String>();
-//		if (familyNames != null) {
-//			for (String familyName : familyNames) {
-//				familyNameSet.add(familyName);
-//			}
-//		}
-//		return familyNameSet.toArray(new String[0]);
-//	}
-//
-//	@Override
-//	public void sourcePrepare(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) {
-//		Object[] pair = new Object[] { sourceCall.getInput().createKey(), sourceCall.getInput().createValue() };
-//
-//		sourceCall.setContext(pair);
-//	}
-//
-//	@Override
-//	public void sourceCleanup(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) {
-//		sourceCall.setContext(null);
-//	}
-//
-//	@SuppressWarnings("unchecked")
-//	@Override
-//	public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall)
-//			throws IOException {
-//		Tuple result = new Tuple();
-//
-//		Object key = sourceCall.getContext()[0];
-//		Object value = sourceCall.getContext()[1];
-//		boolean hasNext = sourceCall.getInput().next(key, value);
-//		if (!hasNext) {
-//			return false;
-//		}
-//
-//		// Skip nulls
-//		if (key == null || value == null) {
-//			return true;
-//		}
-//
-//		ImmutableBytesWritable keyWritable = (ImmutableBytesWritable) key;
-//		Result row = (Result) value;
-//		result.add(keyWritable);
-//		result.add(row);
-//		sourceCall.getIncomingEntry().setTuple(result);
-//		return true;
-//	}
-//
-//	@SuppressWarnings("unchecked")
-//	@Override
-//	public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
-//		TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
-//		OutputCollector outputCollector = sinkCall.getOutput();
-//		Tuple key = tupleEntry.selectTuple(RowKeyField);
-//		Object okey = key.getObject(0);
-//		ImmutableBytesWritable keyBytes = getBytes(okey);
-//		Put put = new Put(keyBytes.get());
-//		Fields outFields = tupleEntry.getFields().subtract(RowKeyField);
-//		if (null != outFields) {
-//			TupleEntry values = tupleEntry.selectEntry(outFields);
-//			for (int n = 0; n < values.getFields().size(); n++) {
-//				Object o = values.get(n);
-//				ImmutableBytesWritable valueBytes = getBytes(o);
-//				Comparable field = outFields.get(n);
-//				ColumnName cn = parseColumn((String) field);
-//				if (null == cn.family) {
-//					if (n >= familyNames.length)
-//						cn.family = familyNames[familyNames.length - 1];
-//					else
-//						cn.family = familyNames[n];
-//				}
-//				if (null != o || writeNulls)
-//					put.add(Bytes.toBytes(cn.family), Bytes.toBytes(cn.name), valueBytes.get());
-//			}
-//		}
-//		outputCollector.collect(null, put);
-//	}
-//
-//	private ImmutableBytesWritable getBytes(Object obj) {
-//		if (null == obj)
-//			return new ImmutableBytesWritable(new byte[0]);
-//		if (obj instanceof ImmutableBytesWritable)
-//			return (ImmutableBytesWritable) obj;
-//		else if (obj instanceof String)
-//			return new ImmutableBytesWritable(Bytes.toBytes((String) obj));
-//		else if (obj instanceof Long)
-//			return new ImmutableBytesWritable(Bytes.toBytes((Long) obj));
-//		else if (obj instanceof Integer)
-//			return new ImmutableBytesWritable(Bytes.toBytes((Integer) obj));
-//		else if (obj instanceof Short)
-//			return new ImmutableBytesWritable(Bytes.toBytes((Short) obj));
-//		else if (obj instanceof Boolean)
-//			return new ImmutableBytesWritable(Bytes.toBytes((Boolean) obj));
-//		else if (obj instanceof Double)
-//			return new ImmutableBytesWritable(Bytes.toBytes((Double) obj));
-//		else
-//			throw new IllegalArgumentException("cannot convert object to ImmutableBytesWritable, class="
-//					+ obj.getClass().getName());
-//	}
-//
-//	private ColumnName parseColumn(String column) {
-//		ColumnName ret = new ColumnName();
-//		int pos = column.indexOf(":");
-//		if (pos > 0) {
-//			ret.name = column.substring(pos + 1);
-//			ret.family = column.substring(0, pos);
-//		} else {
-//			ret.name = column;
-//		}
-//		return ret;
-//	}
-//
-//	private class ColumnName {
-//		String family;
-//		String name;
-//
-//		ColumnName() {
-//		}
-//	}
-//
-//	@Override
-//	public void sinkConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
-//		conf.setOutputFormat(TableOutputFormat.class);
-//		conf.setOutputKeyClass(ImmutableBytesWritable.class);
-//		conf.setOutputValueClass(Put.class);
-//	}
-//
-//	@Override
-//	public void sourceConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
-//			JobConf conf) {
-//		DeprecatedInputFormatWrapper.setInputFormat(org.apache.hadoop.hbase.mapreduce.TableInputFormat.class, conf,
-//				ValueCopier.class);
-//		if (null != familyNames) {
-//			String columns = Util.join(this.familyNames, " ");
-//			LOG.debug("sourcing from column families: {}", columns);
-//			conf.set(org.apache.hadoop.hbase.mapreduce.TableInputFormat.SCAN_COLUMNS, columns);
-//		}
-//	}
-//
-//	@Override
-//	public boolean equals(Object object) {
-//		if (this == object) {
-//			return true;
-//		}
-//		if (object == null || getClass() != object.getClass()) {
-//			return false;
-//		}
-//		if (!super.equals(object)) {
-//			return false;
-//		}
-//
-//		HBaseRawScheme that = (HBaseRawScheme) object;
-//
-//		if (!Arrays.equals(familyNames, that.familyNames)) {
-//			return false;
-//		}
-//		return true;
-//	}
-//
-//	@Override
-//	public int hashCode() {
-//		int result = super.hashCode();
-//		result = 31 * result + (familyNames != null ? Arrays.hashCode(familyNames) : 0);
-//		return result;
-//	}
-//
-//	public static class ValueCopier implements DeprecatedInputFormatValueCopier<Result> {
-//
-//		public ValueCopier() {
-//		}
-//
-//		public void copyValue(Result oldValue, Result newValue) {
-//			if (null != oldValue && null != newValue) {
-//				oldValue.copyFrom(newValue);
-//			}
-//		}
-//
-//	}
-//}
+/*
+* Copyright (c) 2009 Concurrent, Inc.
+*
+* This work has been released into the public domain
+* by the copyright holder. This applies worldwide.
+*
+* In case this is not legally possible:
+* The copyright holder grants any entity the right
+* to use this work for any purpose, without any
+* conditions, unless such conditions are required by law.
+*/
+
+package parallelai.spyglass.hbase;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapred.TableOutputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.RecordReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.twitter.elephantbird.mapred.input.DeprecatedInputFormatValueCopier;
+import com.twitter.elephantbird.mapred.input.DeprecatedInputFormatWrapper;
+
+import cascading.flow.FlowProcess;
+import cascading.scheme.Scheme;
+import cascading.scheme.SinkCall;
+import cascading.scheme.SourceCall;
+import cascading.tap.Tap;
+import cascading.tuple.Fields;
+import cascading.tuple.Tuple;
+import cascading.tuple.TupleEntry;
+import cascading.util.Util;
+
+/**
+* The HBaseRawScheme class is a {@link Scheme} subclass. It is used in conjunction
+* with the {@HBaseRawTap} to allow for the reading and writing of data
+* to and from a HBase cluster.
+*
+* @see HBaseRawTap
+*/
+@SuppressWarnings({ "rawtypes", "deprecation" })
+public class HBaseRawScheme extends Scheme<JobConf, RecordReader, OutputCollector, Object[], Object[]> {
+	/**
+	 *
+	 */
+	private static final long serialVersionUID = 6248976486883281356L;
+
+	/** Field LOG */
+	private static final Logger LOG = LoggerFactory.getLogger(HBaseRawScheme.class);
+
+	public final Fields RowKeyField = new Fields("rowkey");
+	public final Fields RowField = new Fields("row");
+
+	/** String familyNames */
+	private String[] familyNames;
+
+	private boolean writeNulls = true;
+
+	/**
+	 * Constructor HBaseScheme creates a new HBaseScheme instance.
+	 *
+	 * @param keyFields
+	 *            of type Fields
+	 * @param familyName
+	 *            of type String
+	 * @param valueFields
+	 *            of type Fields
+	 */
+	public HBaseRawScheme(String familyName) {
+		this(new String[] { familyName });
+	}
+
+	public HBaseRawScheme(String[] familyNames) {
+		this.familyNames = familyNames;
+		setSourceFields();
+	}
+
+	public HBaseRawScheme(String familyName, boolean writeNulls) {
+		this(new String[] { familyName }, writeNulls);
+	}
+
+	public HBaseRawScheme(String[] familyNames, boolean writeNulls) {
+		this.familyNames = familyNames;
+		this.writeNulls = writeNulls;
+		setSourceFields();
+	}
+
+	private void setSourceFields() {
+		Fields sourceFields = Fields.join(RowKeyField, RowField);
+		setSourceFields(sourceFields);
+	}
+
+	/**
+	 * Method getFamilyNames returns the set of familyNames of this HBaseScheme
+	 * object.
+	 *
+	 * @return the familyNames (type String[]) of this HBaseScheme object.
+	 */
+	public String[] getFamilyNames() {
+		HashSet<String> familyNameSet = new HashSet<String>();
+		if (familyNames != null) {
+			for (String familyName : familyNames) {
+				familyNameSet.add(familyName);
+			}
+		}
+		return familyNameSet.toArray(new String[0]);
+	}
+
+	@Override
+	public void sourcePrepare(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) {
+		Object[] pair = new Object[] { sourceCall.getInput().createKey(), sourceCall.getInput().createValue() };
+
+		sourceCall.setContext(pair);
+	}
+
+	@Override
+	public void sourceCleanup(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) {
+		sourceCall.setContext(null);
+	}
+
+	@SuppressWarnings("unchecked")
+	@Override
+	public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall)
+			throws IOException {
+		Tuple result = new Tuple();
+
+		Object key = sourceCall.getContext()[0];
+		Object value = sourceCall.getContext()[1];
+		boolean hasNext = sourceCall.getInput().next(key, value);
+		if (!hasNext) {
+			return false;
+		}
+
+		// Skip nulls
+		if (key == null || value == null) {
+			return true;
+		}
+
+		ImmutableBytesWritable keyWritable = (ImmutableBytesWritable) key;
+		Result row = (Result) value;
+		result.add(keyWritable);
+		result.add(row);
+		sourceCall.getIncomingEntry().setTuple(result);
+		return true;
+	}
+
+	@SuppressWarnings("unchecked")
+	@Override
+	public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
+		TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
+		OutputCollector outputCollector = sinkCall.getOutput();
+		Tuple key = tupleEntry.selectTuple(RowKeyField);
+		Object okey = key.getObject(0);
+		ImmutableBytesWritable keyBytes = getBytes(okey);
+		Put put = new Put(keyBytes.get());
+		Fields outFields = tupleEntry.getFields().subtract(RowKeyField);
+		if (null != outFields) {
+			TupleEntry values = tupleEntry.selectEntry(outFields);
+			for (int n = 0; n < values.getFields().size(); n++) {
+				Object o = values.get(n);
+				ImmutableBytesWritable valueBytes = getBytes(o);
+				Comparable field = outFields.get(n);
+				ColumnName cn = parseColumn((String) field);
+				if (null == cn.family) {
+					if (n >= familyNames.length)
+						cn.family = familyNames[familyNames.length - 1];
+					else
+						cn.family = familyNames[n];
+				}
+				if (null != o || writeNulls)
+					put.add(Bytes.toBytes(cn.family), Bytes.toBytes(cn.name), valueBytes.get());
+			}
+		}
+		outputCollector.collect(null, put);
+	}
+
+	private ImmutableBytesWritable getBytes(Object obj) {
+		if (null == obj)
+			return new ImmutableBytesWritable(new byte[0]);
+		if (obj instanceof ImmutableBytesWritable)
+			return (ImmutableBytesWritable) obj;
+		else if (obj instanceof String)
+			return new ImmutableBytesWritable(Bytes.toBytes((String) obj));
+		else if (obj instanceof Long)
+			return new ImmutableBytesWritable(Bytes.toBytes((Long) obj));
+		else if (obj instanceof Integer)
+			return new ImmutableBytesWritable(Bytes.toBytes((Integer) obj));
+		else if (obj instanceof Short)
+			return new ImmutableBytesWritable(Bytes.toBytes((Short) obj));
+		else if (obj instanceof Boolean)
+			return new ImmutableBytesWritable(Bytes.toBytes((Boolean) obj));
+		else if (obj instanceof Double)
+			return new ImmutableBytesWritable(Bytes.toBytes((Double) obj));
+		else
+			throw new IllegalArgumentException("cannot convert object to ImmutableBytesWritable, class="
+					+ obj.getClass().getName());
+	}
+
+	private ColumnName parseColumn(String column) {
+		ColumnName ret = new ColumnName();
+		int pos = column.indexOf(":");
+		if (pos > 0) {
+			ret.name = column.substring(pos + 1);
+			ret.family = column.substring(0, pos);
+		} else {
+			ret.name = column;
+		}
+		return ret;
+	}
+
+	private class ColumnName {
+		String family;
+		String name;
+
+		ColumnName() {
+		}
+	}
+
+	@Override
+	public void sinkConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
+		conf.setOutputFormat(TableOutputFormat.class);
+		conf.setOutputKeyClass(ImmutableBytesWritable.class);
+		conf.setOutputValueClass(Put.class);
+	}
+
+	@Override
+	public void sourceConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
+			JobConf conf) {
+		DeprecatedInputFormatWrapper.setInputFormat(org.apache.hadoop.hbase.mapreduce.TableInputFormat.class, conf,
+				ValueCopier.class);
+		if (null != familyNames) {
+			String columns = Util.join(this.familyNames, " ");
+			LOG.debug("sourcing from column families: {}", columns);
+			conf.set(org.apache.hadoop.hbase.mapreduce.TableInputFormat.SCAN_COLUMNS, columns);
+		}
+	}
+
+	@Override
+	public boolean equals(Object object) {
+		if (this == object) {
+			return true;
+		}
+		if (object == null || getClass() != object.getClass()) {
+			return false;
+		}
+		if (!super.equals(object)) {
+			return false;
+		}
+
+		HBaseRawScheme that = (HBaseRawScheme) object;
+
+		if (!Arrays.equals(familyNames, that.familyNames)) {
+			return false;
+		}
+		return true;
+	}
+
+	@Override
+	public int hashCode() {
+		int result = super.hashCode();
+		result = 31 * result + (familyNames != null ? Arrays.hashCode(familyNames) : 0);
+		return result;
+	}
+
+	public static class ValueCopier implements DeprecatedInputFormatValueCopier<Result> {
+
+		public ValueCopier() {
+		}
+
+		public void copyValue(Result oldValue, Result newValue) {
+			if (null != oldValue && null != newValue) {
+				oldValue.copyFrom(newValue);
+			}
+		}
+
+	}
+}
diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java b/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java
index 780d3fc..5dcd57d 100644
--- a/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java
+++ b/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java
@@ -1,311 +1,311 @@
-///*
-//* Copyright (c) 2009 Concurrent, Inc.
-//*
-//* This work has been released into the public domain
-//* by the copyright holder. This applies worldwide.
-//*
-//* In case this is not legally possible:
-//* The copyright holder grants any entity the right
-//* to use this work for any purpose, without any
-//* conditions, unless such conditions are required by law.
-//*/
-//
-//package parallelai.spyglass.hbase;
-//
-//import java.io.IOException;
-//import java.util.UUID;
-//
-//import org.apache.hadoop.conf.Configuration;
-//import org.apache.hadoop.fs.Path;
-//import org.apache.hadoop.hbase.HBaseConfiguration;
-//import org.apache.hadoop.hbase.HColumnDescriptor;
-//import org.apache.hadoop.hbase.HTableDescriptor;
-//import org.apache.hadoop.hbase.MasterNotRunningException;
-//import org.apache.hadoop.hbase.ZooKeeperConnectionException;
-//import org.apache.hadoop.hbase.client.HBaseAdmin;
-//import org.apache.hadoop.hbase.client.Scan;
-//import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
-//import org.apache.hadoop.mapred.FileInputFormat;
-//import org.apache.hadoop.mapred.JobConf;
-//import org.apache.hadoop.mapred.OutputCollector;
-//import org.apache.hadoop.mapred.RecordReader;
-//import org.slf4j.Logger;
-//import org.slf4j.LoggerFactory;
-//
-//import cascading.flow.FlowProcess;
-//import cascading.tap.SinkMode;
-//import cascading.tap.Tap;
-//import cascading.tap.hadoop.io.HadoopTupleEntrySchemeIterator;
-//import cascading.tuple.TupleEntryCollector;
-//import cascading.tuple.TupleEntryIterator;
-//
-//import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-//
-///**
-//* The HBaseRawTap class is a {@link Tap} subclass. It is used in conjunction with
-//* the {@HBaseRawScheme} to allow for the reading and writing
-//* of data to and from a HBase cluster.
-//*/
-//@SuppressWarnings({ "deprecation", "rawtypes" })
-//public class HBaseRawTap extends Tap<JobConf, RecordReader, OutputCollector> {
-//	/**
-//	 *
-//	 */
-//	private static final long serialVersionUID = 8019189493428493323L;
-//
-//	/** Field LOG */
-//	private static final Logger LOG = LoggerFactory.getLogger(HBaseRawTap.class);
-//
-//	private final String id = UUID.randomUUID().toString();
-//
-//	/** Field SCHEME */
-//	public static final String SCHEME = "hbase";
-//
-//	/** Field hBaseAdmin */
-//	private transient HBaseAdmin hBaseAdmin;
-//
-//	/** Field hostName */
-//	private String quorumNames;
-//	/** Field tableName */
-//	private String tableName;
-//	private String base64Scan;
-//
-//	/**
-//	 * Constructor HBaseTap creates a new HBaseTap instance.
-//	 *
-//	 * @param tableName
-//	 *            of type String
-//	 * @param HBaseFullScheme
-//	 *            of type HBaseFullScheme
-//	 */
-//	public HBaseRawTap(String tableName, HBaseRawScheme HBaseFullScheme) {
-//		super(HBaseFullScheme, SinkMode.UPDATE);
-//		this.tableName = tableName;
-//	}
-//
-//	/**
-//	 * Constructor HBaseTap creates a new HBaseTap instance.
-//	 *
-//	 * @param tableName
-//	 *            of type String
-//	 * @param HBaseFullScheme
-//	 *            of type HBaseFullScheme
-//	 * @param sinkMode
-//	 *            of type SinkMode
-//	 */
-//	public HBaseRawTap(String tableName, HBaseRawScheme HBaseFullScheme, SinkMode sinkMode) {
-//		super(HBaseFullScheme, sinkMode);
-//		this.tableName = tableName;
-//	}
-//
-//	/**
-//	 * Constructor HBaseTap creates a new HBaseTap instance.
-//	 *
-//	 * @param tableName
-//	 *            of type String
-//	 * @param HBaseFullScheme
-//	 *            of type HBaseFullScheme
-//	 */
-//	public HBaseRawTap(String quorumNames, String tableName, HBaseRawScheme HBaseFullScheme) {
-//		super(HBaseFullScheme, SinkMode.UPDATE);
-//		this.quorumNames = quorumNames;
-//		this.tableName = tableName;
-//	}
-//
-//	/**
-//	 * Constructor HBaseTap creates a new HBaseTap instance.
-//	 *
-//	 * @param tableName
-//	 *            of type String
-//	 * @param HBaseFullScheme
-//	 *            of type HBaseFullScheme
-//	 * @param sinkMode
-//	 *            of type SinkMode
-//	 */
-//	public HBaseRawTap(String quorumNames, String tableName, HBaseRawScheme HBaseFullScheme, SinkMode sinkMode) {
-//		super(HBaseFullScheme, sinkMode);
-//		this.quorumNames = quorumNames;
-//		this.tableName = tableName;
-//	}
-//
-//	/**
-//	 * Constructor HBaseTap creates a new HBaseTap instance.
-//	 *
-//	 * @param quorumNames		HBase quorum
-//	 * @param tableName			The name of the HBase table to read
-//	 * @param HBaseFullScheme
-//	 * @param base64Scan		An optional base64 encoded scan object
-//	 * @param sinkMode			If REPLACE the output table will be deleted before writing to
-//	 */
-//	public HBaseRawTap(String quorumNames, String tableName, HBaseRawScheme HBaseFullScheme, String base64Scan, SinkMode sinkMode) {
-//		super(HBaseFullScheme, sinkMode);
-//		this.quorumNames = quorumNames;
-//		this.tableName = tableName;
-//		this.base64Scan = base64Scan;
-//	}
-//
-//	/**
-//	 * Method getTableName returns the tableName of this HBaseTap object.
-//	 *
-//	 * @return the tableName (type String) of this HBaseTap object.
-//	 */
-//	public String getTableName() {
-//		return tableName;
-//	}
-//
-//	public Path getPath() {
-//		return new Path(SCHEME + ":/" + tableName.replaceAll(":", "_"));
-//	}
-//
-//	protected HBaseAdmin getHBaseAdmin(JobConf conf) throws MasterNotRunningException, ZooKeeperConnectionException {
-//		if (hBaseAdmin == null) {
-//			Configuration hbaseConf = HBaseConfiguration.create(conf);
-//			hBaseAdmin = new HBaseAdmin(hbaseConf);
-//		}
-//
-//		return hBaseAdmin;
-//	}
-//
-//	@Override
-//	public void sinkConfInit(FlowProcess<JobConf> process, JobConf conf) {
-//		if (quorumNames != null) {
-//			conf.set("hbase.zookeeper.quorum", quorumNames);
-//		}
-//
-//		LOG.debug("sinking to table: {}", tableName);
-//
-//		if (isReplace() && conf.get("mapred.task.partition") == null) {
-//			try {
-//				deleteResource(conf);
-//
-//			} catch (IOException e) {
-//				throw new RuntimeException("could not delete resource: " + e);
-//			}
-//		}
-//
-//		else if (isUpdate() || isReplace()) {
-//			try {
-//				createResource(conf);
-//			} catch (IOException e) {
-//				throw new RuntimeException(tableName + " does not exist !", e);
-//			}
-//
-//		}
-//
-//		conf.set(TableOutputFormat.OUTPUT_TABLE, tableName);
-//		super.sinkConfInit(process, conf);
-//	}
-//
-//	@Override
-//	public String getIdentifier() {
-//		return id;
-//	}
-//
-//	@Override
-//	public TupleEntryIterator openForRead(FlowProcess<JobConf> jobConfFlowProcess, RecordReader recordReader)
-//			throws IOException {
-//		return new HadoopTupleEntrySchemeIterator(jobConfFlowProcess, this, recordReader);
-//	}
-//
-//	@Override
-//	public TupleEntryCollector openForWrite(FlowProcess<JobConf> jobConfFlowProcess, OutputCollector outputCollector)
-//			throws IOException {
-//		HBaseTapCollector hBaseCollector = new HBaseTapCollector(jobConfFlowProcess, this);
-//		hBaseCollector.prepare();
-//		return hBaseCollector;
-//	}
-//
-//	@Override
-//	public boolean createResource(JobConf jobConf) throws IOException {
-//		HBaseAdmin hBaseAdmin = getHBaseAdmin(jobConf);
-//
-//		if (hBaseAdmin.tableExists(tableName)) {
-//			return true;
-//		}
-//
-//		LOG.info("creating hbase table: {}", tableName);
-//
-//		HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
-//
-//		String[] familyNames = ((HBaseRawScheme) getScheme()).getFamilyNames();
-//
-//		for (String familyName : familyNames) {
-//			tableDescriptor.addFamily(new HColumnDescriptor(familyName));
-//		}
-//
-//		hBaseAdmin.createTable(tableDescriptor);
-//
-//		return true;
-//	}
-//
-//	@Override
-//	public boolean deleteResource(JobConf jobConf) throws IOException {
-//		if (getHBaseAdmin(jobConf).tableExists(tableName)) {
-//			if (getHBaseAdmin(jobConf).isTableEnabled(tableName))
-//				getHBaseAdmin(jobConf).disableTable(tableName);
-//			getHBaseAdmin(jobConf).deleteTable(tableName);
-//		}
-//		return true;
-//	}
-//
-//	@Override
-//	public boolean resourceExists(JobConf jobConf) throws IOException {
-//		return getHBaseAdmin(jobConf).tableExists(tableName);
-//	}
-//
-//	@Override
-//	public long getModifiedTime(JobConf jobConf) throws IOException {
-//		return System.currentTimeMillis(); // currently unable to find last mod
-//											// time
-//											// on a table
-//	}
-//
-//	@Override
-//	public void sourceConfInit(FlowProcess<JobConf> process, JobConf conf) {
-//		// a hack for MultiInputFormat to see that there is a child format
-//		FileInputFormat.setInputPaths(conf, getPath());
-//
-//		if (quorumNames != null) {
-//			conf.set("hbase.zookeeper.quorum", quorumNames);
-//		}
-//
-//		LOG.debug("sourcing from table: {}", tableName);
-//		conf.set(TableInputFormat.INPUT_TABLE, tableName);
-//		if (null != base64Scan)
-//			conf.set(TableInputFormat.SCAN, base64Scan);
-//
-//		super.sourceConfInit(process, conf);
-//	}
-//
-//	@Override
-//	public boolean equals(Object object) {
-//		if (this == object) {
-//			return true;
-//		}
-//		if (object == null || getClass() != object.getClass()) {
-//			return false;
-//		}
-//		if (!super.equals(object)) {
-//			return false;
-//		}
-//
-//		HBaseRawTap hBaseTap = (HBaseRawTap) object;
-//
-//		if (tableName != null ? !tableName.equals(hBaseTap.tableName) : hBaseTap.tableName != null) {
-//			return false;
-//		}
-//
-//		if (base64Scan != null ? !base64Scan.equals(hBaseTap.base64Scan) : hBaseTap.base64Scan != null) {
-//			return false;
-//		}
-//
-//		return true;
-//	}
-//
-//	@Override
-//	public int hashCode() {
-//		int result = super.hashCode();
-//		result = 31 * result + (tableName != null ? tableName.hashCode() : 0) + (base64Scan != null ? base64Scan.hashCode() : 0);
-//		return result;
-//	}
-//}
+/*
+* Copyright (c) 2009 Concurrent, Inc.
+*
+* This work has been released into the public domain
+* by the copyright holder. This applies worldwide.
+*
+* In case this is not legally possible:
+* The copyright holder grants any entity the right
+* to use this work for any purpose, without any
+* conditions, unless such conditions are required by law.
+*/
+
+package parallelai.spyglass.hbase;
+
+import java.io.IOException;
+import java.util.UUID;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.ZooKeeperConnectionException;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.RecordReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import cascading.flow.FlowProcess;
+import cascading.tap.SinkMode;
+import cascading.tap.Tap;
+import cascading.tap.hadoop.io.HadoopTupleEntrySchemeIterator;
+import cascading.tuple.TupleEntryCollector;
+import cascading.tuple.TupleEntryIterator;
+
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+
+/**
+* The HBaseRawTap class is a {@link Tap} subclass. It is used in conjunction with
+* the {@HBaseRawScheme} to allow for the reading and writing
+* of data to and from a HBase cluster.
+*/
+@SuppressWarnings({ "deprecation", "rawtypes" })
+public class HBaseRawTap extends Tap<JobConf, RecordReader, OutputCollector> {
+	/**
+	 *
+	 */
+	private static final long serialVersionUID = 8019189493428493323L;
+
+	/** Field LOG */
+	private static final Logger LOG = LoggerFactory.getLogger(HBaseRawTap.class);
+
+	private final String id = UUID.randomUUID().toString();
+
+	/** Field SCHEME */
+	public static final String SCHEME = "hbase";
+
+	/** Field hBaseAdmin */
+	private transient HBaseAdmin hBaseAdmin;
+
+	/** Field hostName */
+	private String quorumNames;
+	/** Field tableName */
+	private String tableName;
+	private String base64Scan;
+
+	/**
+	 * Constructor HBaseTap creates a new HBaseTap instance.
+	 *
+	 * @param tableName
+	 *            of type String
+	 * @param HBaseFullScheme
+	 *            of type HBaseFullScheme
+	 */
+	public HBaseRawTap(String tableName, HBaseRawScheme HBaseFullScheme) {
+		super(HBaseFullScheme, SinkMode.UPDATE);
+		this.tableName = tableName;
+	}
+
+	/**
+	 * Constructor HBaseTap creates a new HBaseTap instance.
+	 *
+	 * @param tableName
+	 *            of type String
+	 * @param HBaseFullScheme
+	 *            of type HBaseFullScheme
+	 * @param sinkMode
+	 *            of type SinkMode
+	 */
+	public HBaseRawTap(String tableName, HBaseRawScheme HBaseFullScheme, SinkMode sinkMode) {
+		super(HBaseFullScheme, sinkMode);
+		this.tableName = tableName;
+	}
+
+	/**
+	 * Constructor HBaseTap creates a new HBaseTap instance.
+	 *
+	 * @param tableName
+	 *            of type String
+	 * @param HBaseFullScheme
+	 *            of type HBaseFullScheme
+	 */
+	public HBaseRawTap(String quorumNames, String tableName, HBaseRawScheme HBaseFullScheme) {
+		super(HBaseFullScheme, SinkMode.UPDATE);
+		this.quorumNames = quorumNames;
+		this.tableName = tableName;
+	}
+
+	/**
+	 * Constructor HBaseTap creates a new HBaseTap instance.
+	 *
+	 * @param tableName
+	 *            of type String
+	 * @param HBaseFullScheme
+	 *            of type HBaseFullScheme
+	 * @param sinkMode
+	 *            of type SinkMode
+	 */
+	public HBaseRawTap(String quorumNames, String tableName, HBaseRawScheme HBaseFullScheme, SinkMode sinkMode) {
+		super(HBaseFullScheme, sinkMode);
+		this.quorumNames = quorumNames;
+		this.tableName = tableName;
+	}
+
+	/**
+	 * Constructor HBaseTap creates a new HBaseTap instance.
+	 *
+	 * @param quorumNames		HBase quorum
+	 * @param tableName			The name of the HBase table to read
+	 * @param HBaseFullScheme
+	 * @param base64Scan		An optional base64 encoded scan object
+	 * @param sinkMode			If REPLACE the output table will be deleted before writing to
+	 */
+	public HBaseRawTap(String quorumNames, String tableName, HBaseRawScheme HBaseFullScheme, String base64Scan, SinkMode sinkMode) {
+		super(HBaseFullScheme, sinkMode);
+		this.quorumNames = quorumNames;
+		this.tableName = tableName;
+		this.base64Scan = base64Scan;
+	}
+
+	/**
+	 * Method getTableName returns the tableName of this HBaseTap object.
+	 *
+	 * @return the tableName (type String) of this HBaseTap object.
+	 */
+	public String getTableName() {
+		return tableName;
+	}
+
+	public Path getPath() {
+		return new Path(SCHEME + ":/" + tableName.replaceAll(":", "_"));
+	}
+
+	protected HBaseAdmin getHBaseAdmin(JobConf conf) throws MasterNotRunningException, ZooKeeperConnectionException {
+		if (hBaseAdmin == null) {
+			Configuration hbaseConf = HBaseConfiguration.create(conf);
+			hBaseAdmin = new HBaseAdmin(hbaseConf);
+		}
+
+		return hBaseAdmin;
+	}
+
+	@Override
+	public void sinkConfInit(FlowProcess<JobConf> process, JobConf conf) {
+		if (quorumNames != null) {
+			conf.set("hbase.zookeeper.quorum", quorumNames);
+		}
+
+		LOG.debug("sinking to table: {}", tableName);
+
+		if (isReplace() && conf.get("mapred.task.partition") == null) {
+			try {
+				deleteResource(conf);
+
+			} catch (IOException e) {
+				throw new RuntimeException("could not delete resource: " + e);
+			}
+		}
+
+		else if (isUpdate() || isReplace()) {
+			try {
+				createResource(conf);
+			} catch (IOException e) {
+				throw new RuntimeException(tableName + " does not exist !", e);
+			}
+
+		}
+
+		conf.set(TableOutputFormat.OUTPUT_TABLE, tableName);
+		super.sinkConfInit(process, conf);
+	}
+
+	@Override
+	public String getIdentifier() {
+		return id;
+	}
+
+	@Override
+	public TupleEntryIterator openForRead(FlowProcess<JobConf> jobConfFlowProcess, RecordReader recordReader)
+			throws IOException {
+		return new HadoopTupleEntrySchemeIterator(jobConfFlowProcess, this, recordReader);
+	}
+
+	@Override
+	public TupleEntryCollector openForWrite(FlowProcess<JobConf> jobConfFlowProcess, OutputCollector outputCollector)
+			throws IOException {
+		HBaseTapCollector hBaseCollector = new HBaseTapCollector(jobConfFlowProcess, this);
+		hBaseCollector.prepare();
+		return hBaseCollector;
+	}
+
+	@Override
+	public boolean createResource(JobConf jobConf) throws IOException {
+		HBaseAdmin hBaseAdmin = getHBaseAdmin(jobConf);
+
+		if (hBaseAdmin.tableExists(tableName)) {
+			return true;
+		}
+
+		LOG.info("creating hbase table: {}", tableName);
+
+		HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
+
+		String[] familyNames = ((HBaseRawScheme) getScheme()).getFamilyNames();
+
+		for (String familyName : familyNames) {
+			tableDescriptor.addFamily(new HColumnDescriptor(familyName));
+		}
+
+		hBaseAdmin.createTable(tableDescriptor);
+
+		return true;
+	}
+
+	@Override
+	public boolean deleteResource(JobConf jobConf) throws IOException {
+		if (getHBaseAdmin(jobConf).tableExists(tableName)) {
+			if (getHBaseAdmin(jobConf).isTableEnabled(tableName))
+				getHBaseAdmin(jobConf).disableTable(tableName);
+			getHBaseAdmin(jobConf).deleteTable(tableName);
+		}
+		return true;
+	}
+
+	@Override
+	public boolean resourceExists(JobConf jobConf) throws IOException {
+		return getHBaseAdmin(jobConf).tableExists(tableName);
+	}
+
+	@Override
+	public long getModifiedTime(JobConf jobConf) throws IOException {
+		return System.currentTimeMillis(); // currently unable to find last mod
+											// time
+											// on a table
+	}
+
+	@Override
+	public void sourceConfInit(FlowProcess<JobConf> process, JobConf conf) {
+		// a hack for MultiInputFormat to see that there is a child format
+		FileInputFormat.setInputPaths(conf, getPath());
+
+		if (quorumNames != null) {
+			conf.set("hbase.zookeeper.quorum", quorumNames);
+		}
+
+		LOG.debug("sourcing from table: {}", tableName);
+		conf.set(TableInputFormat.INPUT_TABLE, tableName);
+		if (null != base64Scan)
+			conf.set(TableInputFormat.SCAN, base64Scan);
+
+		super.sourceConfInit(process, conf);
+	}
+
+	@Override
+	public boolean equals(Object object) {
+		if (this == object) {
+			return true;
+		}
+		if (object == null || getClass() != object.getClass()) {
+			return false;
+		}
+		if (!super.equals(object)) {
+			return false;
+		}
+
+		HBaseRawTap hBaseTap = (HBaseRawTap) object;
+
+		if (tableName != null ? !tableName.equals(hBaseTap.tableName) : hBaseTap.tableName != null) {
+			return false;
+		}
+
+		if (base64Scan != null ? !base64Scan.equals(hBaseTap.base64Scan) : hBaseTap.base64Scan != null) {
+			return false;
+		}
+
+		return true;
+	}
+
+	@Override
+	public int hashCode() {
+		int result = super.hashCode();
+		result = 31 * result + (tableName != null ? tableName.hashCode() : 0) + (base64Scan != null ? base64Scan.hashCode() : 0);
+		return result;
+	}
+}
diff --git a/src/main/java/parallelai/spyglass/jdbc/db/DBOutputFormat.java b/src/main/java/parallelai/spyglass/jdbc/db/DBOutputFormat.java
index 1166970..3f10a04 100644
--- a/src/main/java/parallelai/spyglass/jdbc/db/DBOutputFormat.java
+++ b/src/main/java/parallelai/spyglass/jdbc/db/DBOutputFormat.java
@@ -29,6 +29,14 @@
 
 package parallelai.spyglass.jdbc.db;
 
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
@@ -39,353 +47,385 @@ import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
 
-import com.jcraft.jsch.Logger;
-
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.SQLException;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-
 /**
- * A OutputFormat that sends the reduce output to a SQL table. <p/> {@link DBOutputFormat} accepts
- * &lt;key,value&gt; pairs, where key has a type extending DBWritable. Returned {@link RecordWriter}
- * writes <b>only the key</b> to the database with a batch SQL query.
+ * A OutputFormat that sends the reduce output to a SQL table.
+ * <p/>
+ * {@link DBOutputFormat} accepts &lt;key,value&gt; pairs, where key has a type
+ * extending DBWritable. Returned {@link RecordWriter} writes <b>only the
+ * key</b> to the database with a batch SQL query.
  */
-public class DBOutputFormat<K extends DBWritable, V> implements OutputFormat<K, V> {
-    private static final Log LOG = LogFactory.getLog(DBOutputFormat.class);
-
-    /** A RecordWriter that writes the reduce output to a SQL table */
-    protected class DBRecordWriter implements RecordWriter<K, V> {
-        private Connection connection;
-        private PreparedStatement insertStatement;
-        private PreparedStatement updateStatement;
-        private final int statementsBeforeExecute;
-
-        private long statementsAdded = 0;
-        private long insertStatementsCurrent = 0;
-        private long updateStatementsCurrent = 0;
-
-        protected DBRecordWriter(Connection connection, PreparedStatement insertStatement,
-            PreparedStatement updateStatement, int statementsBeforeExecute) {
-            this.connection = connection;
-            this.insertStatement = insertStatement;
-            this.updateStatement = updateStatement;
-            this.statementsBeforeExecute = statementsBeforeExecute;
-        }
-
-        /** {@inheritDoc} */
-        public void close(Reporter reporter) throws IOException {
-            executeBatch();
-
-            try {
-                if (insertStatement != null) { insertStatement.close(); }
-
-                if (updateStatement != null) { updateStatement.close(); }
-
-                connection.commit();
-            } catch (SQLException exception) {
-                rollBack();
-
-                createThrowMessage("unable to commit batch", 0, exception);
-            } finally {
-                try {
-                    connection.close();
-                } catch (SQLException exception) {
-                    throw new IOException("unable to close connection", exception);
-                }
-            }
-        }
-
-        private void executeBatch() throws IOException {
-            try {
-                if (insertStatementsCurrent != 0) {
-                    LOG.info(
-                        "executing insert batch " + createBatchMessage(insertStatementsCurrent));
-
-                    insertStatement.executeBatch();
-                }
-
-                insertStatementsCurrent = 0;
-            } catch (SQLException exception) {
-                rollBack();
-
-                createThrowMessage("unable to execute insert batch", insertStatementsCurrent, exception);
-            }
-
-            try {
-                if (updateStatementsCurrent != 0) {
-                    LOG.info(
-                        "executing update batch " + createBatchMessage(updateStatementsCurrent));
-
-                    int[] result = updateStatement.executeBatch();
-
-                    int count = 0;
-
-                    for (int value : result) { count += value; }
-
-                    if (count != updateStatementsCurrent) {
-                        throw new IOException(
-                            "update did not update same number of statements executed in batch, batch: "
-                            + updateStatementsCurrent + " updated: " + count);
-                    }
-                }
-
-                updateStatementsCurrent = 0;
-            } catch (SQLException exception) {
-            	
-            	String message = exception.getMessage();
-            	if (message.indexOf("Duplicate Key") >= 0) {
-            		LOG.warn("In exception block. Bypass exception becuase of Insert/Update.");
-            	} else {
-                    rollBack();
-
-                    createThrowMessage("unable to execute update batch", updateStatementsCurrent, exception);
-            	}
-            }
-        }
-
-        private void rollBack() {
-            try {
-                connection.rollback();
-            } catch (SQLException sqlException) {
-                LOG.warn(StringUtils.stringifyException(sqlException));
-            }
-        }
-
-        private String createBatchMessage(long currentStatements) {
-            return String
-                .format("[totstmts: %d][crntstmts: %d][batch: %d]", statementsAdded, currentStatements, statementsBeforeExecute);
-        }
-
-        private void createThrowMessage(String stateMessage, long currentStatements,
-            SQLException exception) throws IOException {
-            String message = exception.getMessage();
-
-            message = message.substring(0, Math.min(75, message.length()));
-
-            int messageLength = exception.getMessage().length();
-            String batchMessage = createBatchMessage(currentStatements);
-            String template = "%s [msglength: %d]%s %s";
-            String errorMessage =
-                String.format(template, stateMessage, messageLength, batchMessage, message);
-
-            LOG.error(errorMessage, exception.getNextException());
-
-            throw new IOException(errorMessage, exception.getNextException());
-        }
-
-        /** {@inheritDoc} */
-        public synchronized void write(K key, V value) throws IOException {
-            try {
-                if (value == null) {
-                    key.write(insertStatement);
-                    insertStatement.addBatch();
-                    insertStatementsCurrent++;
-                } else {
-                    key.write(updateStatement);
-                    updateStatement.addBatch();
-                    updateStatementsCurrent++;
-                }
-            } catch (SQLException exception) {
-                throw new IOException("unable to add batch statement", exception);
-            }
-
-            statementsAdded++;
-
-            if (statementsAdded % statementsBeforeExecute == 0) { executeBatch(); }
-        }
-    }
-
-    /**
-     * Constructs the query used as the prepared statement to insert data.
-     *
-     * @param table      the table to insert into
-     * @param fieldNames the fields to insert into. If field names are unknown, supply an array of
-     *                   nulls.
-     */
-    protected String constructInsertQuery(String table, String[] fieldNames) {
-        if (fieldNames == null) {
-            throw new IllegalArgumentException("Field names may not be null");
-        }
-
-        StringBuilder query = new StringBuilder();
-
-        query.append("INSERT INTO ").append(table);
-
-        if (fieldNames.length > 0 && fieldNames[0] != null) {
-            query.append(" (");
-
-            for (int i = 0; i < fieldNames.length; i++) {
-                query.append(fieldNames[i]);
-
-                if (i != fieldNames.length - 1) { query.append(","); }
-            }
-
-            query.append(")");
-
-        }
-
-        query.append(" VALUES (");
-
-        for (int i = 0; i < fieldNames.length; i++) {
-            query.append("?");
-
-            if (i != fieldNames.length - 1) { query.append(","); }
-        }
-
-        query.append(")");
-        
-        boolean test = true;
-        if (test) {
-        	query.append(" ON DUPLICATE KEY UPDATE ");
-        	
-        	
-            for (int i = 1; i < fieldNames.length; i++) {
-
-                
-                if ( (i != 1) ) { query.append(","); }
-                //if (i != fieldNames.length - 1) { query.append(","); }
-                //&& (i != fieldNames.length - 1)
-                query.append(fieldNames[i]);
-                query.append(" = ?");
-                
-                
-            }
-        }
-        
-        query.append(";");
-        
-        LOG.info(" ===================== " + query.toString());
-        return query.toString();
-    }
-
-    protected String constructUpdateQuery(String table, String[] fieldNames, String[] updateNames) {
-        if (fieldNames == null) {
-            throw new IllegalArgumentException("field names may not be null");
-        }
-
-        Set<String> updateNamesSet = new HashSet<String>();
-        Collections.addAll(updateNamesSet, updateNames);
-
-        StringBuilder query = new StringBuilder();
-
-        query.append("UPDATE ").append(table);
-
-        query.append(" SET ");
-
-        if (fieldNames.length > 0 && fieldNames[0] != null) {
-            int count = 0;
-
-            for (int i = 0; i < fieldNames.length; i++) {
-                if (updateNamesSet.contains(fieldNames[i])) { continue; }
-
-                if (count != 0) { query.append(","); }
-
-                query.append(fieldNames[i]);
-                query.append(" = ?");
-
-                count++;
-            }
-        }
-
-        query.append(" WHERE ");
-
-        if (updateNames.length > 0 && updateNames[0] != null) {
-            for (int i = 0; i < updateNames.length; i++) {
-                query.append(updateNames[i]);
-                query.append(" = ?");
-
-                if (i != updateNames.length - 1) { query.append(" and "); }
-            }
-        }
-
-        query.append(";");
-        System.out.println("Update Query => " + query.toString());
-        return query.toString();
-    }
-
-    /** {@inheritDoc} */
-    public void checkOutputSpecs(FileSystem filesystem, JobConf job) throws IOException {
-    }
-
-    /** {@inheritDoc} */
-    public RecordWriter<K, V> getRecordWriter(FileSystem filesystem, JobConf job, String name,
-        Progressable progress) throws IOException {
-        DBConfiguration dbConf = new DBConfiguration(job);
-
-        String tableName = dbConf.getOutputTableName();
-        String[] fieldNames = dbConf.getOutputFieldNames();
-        String[] updateNames = dbConf.getOutputUpdateFieldNames();
-        int batchStatements = dbConf.getBatchStatementsNum();
-
-        Connection connection = dbConf.getConnection();
-
-        configureConnection(connection);
-
-        String sqlInsert = constructInsertQuery(tableName, fieldNames);
-        PreparedStatement insertPreparedStatement;
-
-        try {
-            insertPreparedStatement = connection.prepareStatement(sqlInsert);
-            insertPreparedStatement.setEscapeProcessing(true); // should be on by default
-        } catch (SQLException exception) {
-            throw new IOException("unable to create statement for: " + sqlInsert, exception);
-        }
-
-        String sqlUpdate =
-            updateNames != null ? constructUpdateQuery(tableName, fieldNames, updateNames) : null;
-        PreparedStatement updatePreparedStatement = null;
-
-        try {
-            updatePreparedStatement =
-                sqlUpdate != null ? connection.prepareStatement(sqlUpdate) : null;
-        } catch (SQLException exception) {
-            throw new IOException("unable to create statement for: " + sqlUpdate, exception);
-        }
-
-        return new DBRecordWriter(connection, insertPreparedStatement, updatePreparedStatement, batchStatements);
-    }
-
-    protected void configureConnection(Connection connection) {
-        setAutoCommit(connection);
-    }
-
-    protected void setAutoCommit(Connection connection) {
-        try {
-            connection.setAutoCommit(false);
-        } catch (Exception exception) {
-            throw new RuntimeException("unable to set auto commit", exception);
-        }
-    }
-
-    /**
-     * Initializes the reduce-part of the job with the appropriate output settings
-     *
-     * @param job                 The job
-     * @param dbOutputFormatClass
-     * @param tableName           The table to insert data into
-     * @param fieldNames          The field names in the table. If unknown, supply the appropriate
-     */
-    public static void setOutput(JobConf job, Class<? extends DBOutputFormat> dbOutputFormatClass,
-        String tableName, String[] fieldNames, String[] updateFields, int batchSize) {
-        if (dbOutputFormatClass == null) { job.setOutputFormat(DBOutputFormat.class); } else {
-            job.setOutputFormat(dbOutputFormatClass);
-        }
-
-        // writing doesn't always happen in reduce
-        job.setReduceSpeculativeExecution(false);
-        job.setMapSpeculativeExecution(false);
-
-        DBConfiguration dbConf = new DBConfiguration(job);
-
-        dbConf.setOutputTableName(tableName);
-        dbConf.setOutputFieldNames(fieldNames);
-
-        if (updateFields != null) { dbConf.setOutputUpdateFieldNames(updateFields); }
-
-        if (batchSize != -1) { dbConf.setBatchStatementsNum(batchSize); }
-    }
+public class DBOutputFormat<K extends DBWritable, V> implements
+		OutputFormat<K, V> {
+	private static final Log LOG = LogFactory.getLog(DBOutputFormat.class);
+
+	/** A RecordWriter that writes the reduce output to a SQL table */
+	protected class DBRecordWriter implements RecordWriter<K, V> {
+		private Connection connection;
+		private PreparedStatement insertStatement;
+		private PreparedStatement updateStatement;
+		private final int statementsBeforeExecute;
+
+		private long statementsAdded = 0;
+		private long insertStatementsCurrent = 0;
+		private long updateStatementsCurrent = 0;
+
+		protected DBRecordWriter(Connection connection,
+				PreparedStatement insertStatement,
+				PreparedStatement updateStatement, int statementsBeforeExecute) {
+			this.connection = connection;
+			this.insertStatement = insertStatement;
+			this.updateStatement = updateStatement;
+			this.statementsBeforeExecute = statementsBeforeExecute;
+		}
+
+		/** {@inheritDoc} */
+		public void close(Reporter reporter) throws IOException {
+			executeBatch();
+
+			try {
+				if (insertStatement != null) {
+					insertStatement.close();
+				}
+
+				if (updateStatement != null) {
+					updateStatement.close();
+				}
+
+				connection.commit();
+			} catch (SQLException exception) {
+				rollBack();
+
+				createThrowMessage("unable to commit batch", 0, exception);
+			} finally {
+				try {
+					connection.close();
+				} catch (SQLException exception) {
+					throw new IOException("unable to close connection", exception);
+				}
+			}
+		}
+
+		private void executeBatch() throws IOException {
+			try {
+				if (insertStatementsCurrent != 0) {
+					LOG.info("executing insert batch "
+							+ createBatchMessage(insertStatementsCurrent));
+
+					insertStatement.executeBatch();
+				}
+
+				insertStatementsCurrent = 0;
+			} catch (SQLException exception) {
+				rollBack();
+
+				createThrowMessage("unable to execute insert batch",
+						insertStatementsCurrent, exception);
+			}
+
+			try {
+				if (updateStatementsCurrent != 0) {
+					LOG.info("executing update batch "
+							+ createBatchMessage(updateStatementsCurrent));
+
+					int[] result = updateStatement.executeBatch();
+
+					int count = 0;
+
+					for (int value : result) {
+						count += value;
+					}
+
+					if (count != updateStatementsCurrent) {
+						throw new IOException(
+								"update did not update same number of statements executed in batch, batch: "
+										+ updateStatementsCurrent + " updated: " + count);
+					}
+				}
+
+				updateStatementsCurrent = 0;
+			} catch (SQLException exception) {
+
+				String message = exception.getMessage();
+				if (message.indexOf("Duplicate Key") >= 0) {
+					LOG.warn("In exception block. Bypass exception becuase of Insert/Update.");
+				} else {
+					rollBack();
+
+					createThrowMessage("unable to execute update batch",
+							updateStatementsCurrent, exception);
+				}
+			}
+		}
+
+		private void rollBack() {
+			try {
+				connection.rollback();
+			} catch (SQLException sqlException) {
+				LOG.warn(StringUtils.stringifyException(sqlException));
+			}
+		}
+
+		private String createBatchMessage(long currentStatements) {
+			return String.format("[totstmts: %d][crntstmts: %d][batch: %d]",
+					statementsAdded, currentStatements, statementsBeforeExecute);
+		}
+
+		private void createThrowMessage(String stateMessage,
+				long currentStatements, SQLException exception) throws IOException {
+			String message = exception.getMessage();
+
+			// message = message.substring(0, Math.min(75, message.length()));
+
+			int messageLength = exception.getMessage().length();
+			String batchMessage = createBatchMessage(currentStatements);
+			String template = "%s [msglength: %d]%s %s";
+			String errorMessage = String.format(template, stateMessage,
+					messageLength, batchMessage, message);
+
+			LOG.error(errorMessage, exception.getNextException());
+
+			throw new IOException(errorMessage, exception.getNextException());
+		}
+
+		/** {@inheritDoc} */
+		public synchronized void write(K key, V value) throws IOException {
+			try {
+				if (value == null) {
+					key.write(insertStatement);
+					insertStatement.addBatch();
+					insertStatementsCurrent++;
+				} else {
+					key.write(updateStatement);
+					updateStatement.addBatch();
+					updateStatementsCurrent++;
+				}
+			} catch (SQLException exception) {
+				throw new IOException("unable to add batch statement", exception);
+			}
+
+			statementsAdded++;
+
+			if (statementsAdded % statementsBeforeExecute == 0) {
+				executeBatch();
+			}
+		}
+	}
+
+	/**
+	 * Constructs the query used as the prepared statement to insert data.
+	 * 
+	 * @param table
+	 *           the table to insert into
+	 * @param fieldNames
+	 *           the fields to insert into. If field names are unknown, supply an
+	 *           array of nulls.
+	 */
+	protected String constructInsertQuery(String table, String[] fieldNames) {
+		if (fieldNames == null) {
+			throw new IllegalArgumentException("Field names may not be null");
+		}
+
+		StringBuilder query = new StringBuilder();
+
+		query.append("INSERT INTO ").append(table);
+
+		if (fieldNames.length > 0 && fieldNames[0] != null) {
+			query.append(" (");
+
+			for (int i = 0; i < fieldNames.length; i++) {
+				query.append(fieldNames[i]);
+
+				if (i != fieldNames.length - 1) {
+					query.append(",");
+				}
+			}
+
+			query.append(")");
+
+		}
+
+		query.append(" VALUES (");
+
+		for (int i = 0; i < fieldNames.length; i++) {
+			query.append("?");
+
+			if (i != fieldNames.length - 1) {
+				query.append(",");
+			}
+		}
+
+		query.append(")");
+
+		boolean test = true;
+		if (test) {
+			query.append(" ON DUPLICATE KEY UPDATE ");
+
+			for (int i = 1; i < fieldNames.length; i++) {
+
+				if ((i != 1)) {
+					query.append(",");
+				}
+				// if (i != fieldNames.length - 1) { query.append(","); }
+				// && (i != fieldNames.length - 1)
+				query.append(fieldNames[i]);
+				query.append(" = ?");
+
+			}
+		}
+
+		query.append(";");
+
+		LOG.info(" ===================== " + query.toString());
+		return query.toString();
+	}
+
+	protected String constructUpdateQuery(String table, String[] fieldNames,
+			String[] updateNames) {
+		if (fieldNames == null) {
+			throw new IllegalArgumentException("field names may not be null");
+		}
+
+		Set<String> updateNamesSet = new HashSet<String>();
+		Collections.addAll(updateNamesSet, updateNames);
+
+		StringBuilder query = new StringBuilder();
+
+		query.append("UPDATE ").append(table);
+
+		query.append(" SET ");
+
+		if (fieldNames.length > 0 && fieldNames[0] != null) {
+			int count = 0;
+
+			for (int i = 0; i < fieldNames.length; i++) {
+				if (updateNamesSet.contains(fieldNames[i])) {
+					continue;
+				}
+
+				if (count != 0) {
+					query.append(",");
+				}
+
+				query.append(fieldNames[i]);
+				query.append(" = ?");
+
+				count++;
+			}
+		}
+
+		query.append(" WHERE ");
+
+		if (updateNames.length > 0 && updateNames[0] != null) {
+			for (int i = 0; i < updateNames.length; i++) {
+				query.append(updateNames[i]);
+				query.append(" = ?");
+
+				if (i != updateNames.length - 1) {
+					query.append(" and ");
+				}
+			}
+		}
+
+		query.append(";");
+		System.out.println("Update Query => " + query.toString());
+		return query.toString();
+	}
+
+	/** {@inheritDoc} */
+	public void checkOutputSpecs(FileSystem filesystem, JobConf job)
+			throws IOException {
+	}
+
+	/** {@inheritDoc} */
+	public RecordWriter<K, V> getRecordWriter(FileSystem filesystem,
+			JobConf job, String name, Progressable progress) throws IOException {
+		DBConfiguration dbConf = new DBConfiguration(job);
+
+		String tableName = dbConf.getOutputTableName();
+		String[] fieldNames = dbConf.getOutputFieldNames();
+		String[] updateNames = dbConf.getOutputUpdateFieldNames();
+		int batchStatements = dbConf.getBatchStatementsNum();
+
+		Connection connection = dbConf.getConnection();
+
+		configureConnection(connection);
+
+		String sqlInsert = constructInsertQuery(tableName, fieldNames);
+		PreparedStatement insertPreparedStatement;
+
+		try {
+			insertPreparedStatement = connection.prepareStatement(sqlInsert);
+			insertPreparedStatement.setEscapeProcessing(true); // should be on by
+																				// default
+		} catch (SQLException exception) {
+			throw new IOException("unable to create statement for: " + sqlInsert,
+					exception);
+		}
+
+		String sqlUpdate = updateNames != null ? constructUpdateQuery(tableName,
+				fieldNames, updateNames) : null;
+		PreparedStatement updatePreparedStatement = null;
+
+		try {
+			updatePreparedStatement = sqlUpdate != null ? connection
+					.prepareStatement(sqlUpdate) : null;
+		} catch (SQLException exception) {
+			throw new IOException("unable to create statement for: " + sqlUpdate,
+					exception);
+		}
+
+		return new DBRecordWriter(connection, insertPreparedStatement,
+				updatePreparedStatement, batchStatements);
+	}
+
+	protected void configureConnection(Connection connection) {
+		setAutoCommit(connection);
+	}
+
+	protected void setAutoCommit(Connection connection) {
+		try {
+			connection.setAutoCommit(false);
+		} catch (Exception exception) {
+			throw new RuntimeException("unable to set auto commit", exception);
+		}
+	}
+
+	/**
+	 * Initializes the reduce-part of the job with the appropriate output
+	 * settings
+	 * 
+	 * @param job
+	 *           The job
+	 * @param dbOutputFormatClass
+	 * @param tableName
+	 *           The table to insert data into
+	 * @param fieldNames
+	 *           The field names in the table. If unknown, supply the appropriate
+	 */
+	public static void setOutput(JobConf job,
+			Class<? extends DBOutputFormat> dbOutputFormatClass, String tableName,
+			String[] fieldNames, String[] updateFields, int batchSize) {
+		if (dbOutputFormatClass == null) {
+			job.setOutputFormat(DBOutputFormat.class);
+		} else {
+			job.setOutputFormat(dbOutputFormatClass);
+		}
+
+		// writing doesn't always happen in reduce
+		job.setReduceSpeculativeExecution(false);
+		job.setMapSpeculativeExecution(false);
+
+		DBConfiguration dbConf = new DBConfiguration(job);
+
+		dbConf.setOutputTableName(tableName);
+		dbConf.setOutputFieldNames(fieldNames);
+
+		if (updateFields != null) {
+			dbConf.setOutputUpdateFieldNames(updateFields);
+		}
+
+		if (batchSize != -1) {
+			dbConf.setBatchStatementsNum(batchSize);
+		}
+	}
 }
diff --git a/src/main/scala/parallelai/spyglass/hbase/HBaseRawSource.scala b/src/main/scala/parallelai/spyglass/hbase/HBaseRawSource.scala
index 6216695..450a57d 100644
--- a/src/main/scala/parallelai/spyglass/hbase/HBaseRawSource.scala
+++ b/src/main/scala/parallelai/spyglass/hbase/HBaseRawSource.scala
@@ -1,83 +1,83 @@
-//package parallelai.spyglass.hbase
-//
-//import cascading.pipe.Pipe
-//import cascading.pipe.assembly.Coerce
-//import cascading.scheme.Scheme
-//import cascading.tap.{ Tap, SinkMode }
-//import cascading.tuple.Fields
-//import org.apache.hadoop.mapred.{ RecordReader, OutputCollector, JobConf }
-//import org.apache.hadoop.hbase.util.Bytes
-//import scala.collection.JavaConversions._
-//import scala.collection.mutable.WrappedArray
-//import com.twitter.scalding._
-//import org.apache.hadoop.hbase.io.ImmutableBytesWritable
-//import org.apache.hadoop.hbase.client.Scan
-//import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil
-//import org.apache.hadoop.hbase.util.Base64
-//import java.io.ByteArrayOutputStream
-//import java.io.DataOutputStream
-//
-//object HBaseRawSource {
-//	/**
-//	 * Converts a scan object to a base64 string that can be passed to HBaseRawSource
-//	 * @param scan
-//	 * @return base64 string representation
-//	 */
-//	def convertScanToString(scan: Scan) = {
-//		val out = new ByteArrayOutputStream();
-//		val dos = new DataOutputStream(out);
-//		scan.write(dos);
-//		Base64.encodeBytes(out.toByteArray());
-//	}
-//}
-//
-//
-///**
-// * @author Rotem Hermon
-// *
-// * HBaseRawSource is a scalding source that passes the original row (Result) object to the
-// * mapper for customized processing.
-// *
-// * @param	tableName	The name of the HBase table to read
-// * @param	quorumNames	HBase quorum
-// * @param	familyNames	Column families to get (source, if null will get all) or update to (sink)
-// * @param	writeNulls	Should the sink write null values. default = true. If false, null columns will not be written
-// * @param	base64Scan	An optional base64 encoded scan object
-// * @param	sinkMode	If REPLACE the output table will be deleted before writing to
-// *
-// */
-//class HBaseRawSource(
-//	tableName: String,
-//	quorumNames: String = "localhost",
-//	familyNames: Array[String],
-//	writeNulls: Boolean = true,
-//	base64Scan: String = null,
-//	sinkMode: SinkMode = null) extends Source {
-//
-//	override val hdfsScheme = new HBaseRawScheme(familyNames, writeNulls)
-//		.asInstanceOf[Scheme[JobConf, RecordReader[_, _], OutputCollector[_, _], _, _]]
-//
-//	override def createTap(readOrWrite: AccessMode)(implicit mode: Mode): Tap[_, _, _] = {
-//		val hBaseScheme = hdfsScheme match {
-//			case hbase: HBaseRawScheme => hbase
-//			case _ => throw new ClassCastException("Failed casting from Scheme to HBaseRawScheme")
-//		}
-//		mode match {
-//			case hdfsMode @ Hdfs(_, _) => readOrWrite match {
-//				case Read => {
-//					new HBaseRawTap(quorumNames, tableName, hBaseScheme, base64Scan, sinkMode match {
-//						case null => SinkMode.KEEP
-//						case _ => sinkMode
-//					}).asInstanceOf[Tap[_, _, _]]
-//				}
-//				case Write => {
-//					new HBaseRawTap(quorumNames, tableName, hBaseScheme, base64Scan, sinkMode match {
-//						case null => SinkMode.UPDATE
-//						case _ => sinkMode
-//					}).asInstanceOf[Tap[_, _, _]]
-//				}
-//			}
-//			case _ => super.createTap(readOrWrite)(mode)
-//		}
-//	}
-//}
+package parallelai.spyglass.hbase
+
+import cascading.pipe.Pipe
+import cascading.pipe.assembly.Coerce
+import cascading.scheme.Scheme
+import cascading.tap.{ Tap, SinkMode }
+import cascading.tuple.Fields
+import org.apache.hadoop.mapred.{ RecordReader, OutputCollector, JobConf }
+import org.apache.hadoop.hbase.util.Bytes
+import scala.collection.JavaConversions._
+import scala.collection.mutable.WrappedArray
+import com.twitter.scalding._
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable
+import org.apache.hadoop.hbase.client.Scan
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil
+import org.apache.hadoop.hbase.util.Base64
+import java.io.ByteArrayOutputStream
+import java.io.DataOutputStream
+
+object HBaseRawSource {
+	/**
+	 * Converts a scan object to a base64 string that can be passed to HBaseRawSource
+	 * @param scan
+	 * @return base64 string representation
+	 */
+	def convertScanToString(scan: Scan) = {
+		val out = new ByteArrayOutputStream();
+		val dos = new DataOutputStream(out);
+		scan.write(dos);
+		Base64.encodeBytes(out.toByteArray());
+	}
+}
+
+
+/**
+* @author Rotem Hermon
+*
+* HBaseRawSource is a scalding source that passes the original row (Result) object to the
+* mapper for customized processing.
+*
+* @param	tableName	The name of the HBase table to read
+* @param	quorumNames	HBase quorum
+* @param	familyNames	Column families to get (source, if null will get all) or update to (sink)
+* @param	writeNulls	Should the sink write null values. default = true. If false, null columns will not be written
+* @param	base64Scan	An optional base64 encoded scan object
+* @param	sinkMode	If REPLACE the output table will be deleted before writing to
+*
+*/
+class HBaseRawSource(
+	tableName: String,
+	quorumNames: String = "localhost",
+	familyNames: Array[String],
+	writeNulls: Boolean = true,
+	base64Scan: String = null,
+	sinkMode: SinkMode = null) extends Source {
+
+	override val hdfsScheme = new HBaseRawScheme(familyNames, writeNulls)
+		.asInstanceOf[Scheme[JobConf, RecordReader[_, _], OutputCollector[_, _], _, _]]
+
+	override def createTap(readOrWrite: AccessMode)(implicit mode: Mode): Tap[_, _, _] = {
+		val hBaseScheme = hdfsScheme match {
+			case hbase: HBaseRawScheme => hbase
+			case _ => throw new ClassCastException("Failed casting from Scheme to HBaseRawScheme")
+		}
+		mode match {
+			case hdfsMode @ Hdfs(_, _) => readOrWrite match {
+				case Read => {
+					new HBaseRawTap(quorumNames, tableName, hBaseScheme, base64Scan, sinkMode match {
+						case null => SinkMode.KEEP
+						case _ => sinkMode
+					}).asInstanceOf[Tap[_, _, _]]
+				}
+				case Write => {
+					new HBaseRawTap(quorumNames, tableName, hBaseScheme, base64Scan, sinkMode match {
+						case null => SinkMode.UPDATE
+						case _ => sinkMode
+					}).asInstanceOf[Tap[_, _, _]]
+				}
+			}
+			case _ => super.createTap(readOrWrite)(mode)
+		}
+	}
+}