aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntonios Chalkiopoulos <Antwnis@gmail.com>2014-08-22 19:09:55 +0100
committerAntonios Chalkiopoulos <Antwnis@gmail.com>2014-08-22 19:09:55 +0100
commit1ae61f8fd770acd19aa58133618b0d98a56cfcf2 (patch)
tree8e58798045897644828732660adcff71eb1479a8
parent83af28c8aa3ba707340b752dd7b619c92d1c5067 (diff)
downloadSpyGlass-1ae61f8fd770acd19aa58133618b0d98a56cfcf2.tar.gz
SpyGlass-1ae61f8fd770acd19aa58133618b0d98a56cfcf2.zip
Preparation for CDH5.1 - HBase 0.98.1
-rw-r--r--pom.xml22
-rw-r--r--src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java56
-rw-r--r--src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java2
-rw-r--r--src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java8
-rw-r--r--src/main/java/parallelai/spyglass/hbase/HBaseTap.java2
-rw-r--r--src/main/scala/parallelai/spyglass/hbase/example/HBaseExample.scala7
-rw-r--r--src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala11
7 files changed, 53 insertions, 55 deletions
diff --git a/pom.xml b/pom.xml
index b67fdc9..d2c4f83 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,11 +29,11 @@
<maven-scala-plugin.version>2.15.2</maven-scala-plugin.version>
<maven.surefire.plugin.version>2.12.3</maven.surefire.plugin.version>
- <cdh.version>cdh4.5.0</cdh.version>
+ <cdh.version>cdh5.1.0</cdh.version>
- <hadoop.version>2.0.0-${cdh.version}</hadoop.version>
- <hadoop.core.version>2.0.0-mr1-${cdh.version}</hadoop.core.version>
- <hbase.version>0.94.6-${cdh.version}</hbase.version>
+ <hadoop.version>2.3.0-${cdh.version}</hadoop.version>
+ <hadoop.core.version>2.3.0-mr1-${cdh.version}</hadoop.core.version>
+ <hbase.version>0.98.1-${cdh.version}</hbase.version>
<!-- Scala/Scalding/Cascading properties -->
<!-- can be 2.9.3 and 2.10.2 -->
@@ -63,7 +63,7 @@
<repository>
<id>cloudera-cdh4</id>
<name>Cloudera CDH4</name>
- <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
+ <url>http://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<repository>
<id>conjars</id>
@@ -153,7 +153,17 @@
<!-- HBase -->
<dependency>
<groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
+ <artifactId>hbase-server</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <version>${hbase.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java b/src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java
index 332bbd7..7b61047 100644
--- a/src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java
+++ b/src/main/java/parallelai/spyglass/hbase/HBaseInputFormatGranular.java
@@ -14,7 +14,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Addressing;
@@ -110,26 +109,26 @@ public class HBaseInputFormatGranular extends HBaseInputFormatBase {
: maxKey;
HRegionLocation regionLoc = table.getRegionLocation(keys.getFirst()[i]);
- HServerAddress regionServerAddress = regionLoc.getServerAddress();
- InetAddress regionAddress = regionServerAddress.getInetSocketAddress().getAddress();
- String regionLocation;
- try {
- regionLocation = reverseDNS(regionAddress);
- } catch (NamingException e) {
- LOG.error("Cannot resolve the host name for " + regionAddress
- + " because of " + e);
- regionLocation = regionServerAddress.getHostname();
- }
+ //HServerAddress regionServerAddress = regionLoc.getServerAddress();
+// InetAddress regionAddress = regionServerAddress.getInetSocketAddress().getAddress();
+// String regionLocation;
+// try {
+// regionLocation = reverseDNS(regionAddress);
+// } catch (NamingException e) {
+// LOG.error("Cannot resolve the host name for " + regionAddress
+// + " because of " + e);
+// regionLocation = regionLoc.getHostname();
+// }
regionNames[i] = regionLoc.getRegionInfo().getRegionNameAsString();
- LOG.debug("***** " + regionLocation);
+ LOG.debug("***** " + regionLoc.getHostname());
- if (regionLocation == null || regionLocation.length() == 0)
- throw new IOException("The region info for regiosn " + i
+ if (regionLoc.getHostname() == null || regionLoc.getHostname().length() == 0)
+ throw new IOException("The region info for region " + i
+ " is null or empty");
- regions[i] = regionLocation;
+ regions[i] = regionLoc.getHostname();
LOG.debug(String.format(
"Region (%s) has start key (%s) and stop key (%s)", regions[i],
@@ -189,18 +188,19 @@ public class HBaseInputFormatGranular extends HBaseInputFormatBase {
byte[] rStart = cRegion.getRegionInfo().getStartKey();
byte[] rStop = cRegion.getRegionInfo().getEndKey();
- HServerAddress regionServerAddress = cRegion
- .getServerAddress();
- InetAddress regionAddress = regionServerAddress
- .getInetSocketAddress().getAddress();
- String regionLocation;
- try {
- regionLocation = reverseDNS(regionAddress);
- } catch (NamingException e) {
- LOG.error("Cannot resolve the host name for "
- + regionAddress + " because of " + e);
- regionLocation = regionServerAddress.getHostname();
- }
+// HServerAddress regionServerAddress = cRegion
+// .getServerAddress();
+// InetAddress regionAddress = regionServerAddress
+// .getInetSocketAddress().getAddress();
+// String regionLocation;
+// try {
+// regionLocation = reverseDNS(regionAddress);
+// regionLocation = cRegion.
+// } catch (NamingException e) {
+// LOG.error("Cannot resolve the host name for "
+// + regionAddress + " because of " + e);
+// regionLocation = cRegion.getHostname();
+// }
String regionName = cRegion.getRegionInfo().getRegionNameAsString();
@@ -219,7 +219,7 @@ public class HBaseInputFormatGranular extends HBaseInputFormatBase {
.compareTo(stopRow, rStop) >= 0)), rStop.length));
HBaseTableSplitGranular split = new HBaseTableSplitGranular(
- table.getTableName(), sStart, sStop, regionLocation, regionName,
+ table.getTableName(), sStart, sStop, cRegion.getHostname(), regionName,
SourceMode.SCAN_RANGE, useSalt);
split.setEndRowInclusive(currentRegion == maxRegions);
diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java b/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java
index efe548d..ece7e61 100644
--- a/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java
+++ b/src/main/java/parallelai/spyglass/hbase/HBaseRawTap.java
@@ -156,7 +156,7 @@ public class HBaseRawTap extends Tap<JobConf, RecordReader, OutputCollector> {
return new Path(SCHEME + ":/" + tableName.replaceAll(":", "_"));
}
- protected HBaseAdmin getHBaseAdmin(JobConf conf) throws MasterNotRunningException, ZooKeeperConnectionException {
+ protected HBaseAdmin getHBaseAdmin(JobConf conf) throws IOException {
if (hBaseAdmin == null) {
Configuration hbaseConf = HBaseConfiguration.create(conf);
hBaseAdmin = new HBaseAdmin(hbaseConf);
diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java b/src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java
index b34ed3f..9ac9c8f 100644
--- a/src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java
+++ b/src/main/java/parallelai/spyglass/hbase/HBaseRecordReaderGranular.java
@@ -251,7 +251,7 @@ public class HBaseRecordReaderGranular extends HBaseRecordReaderBase {
}
lastSuccessfulRow = key.get();
- Writables.copyWritable(result, value);
+ value.copyFrom(result);
return true;
}
return false;
@@ -302,7 +302,7 @@ public class HBaseRecordReaderGranular extends HBaseRecordReaderBase {
key.set(result.getRow());
}
lastSuccessfulRow = key.get();
- Writables.copyWritable(result, value);
+ value.copyFrom(result);
return true;
} else {
@@ -352,7 +352,7 @@ public class HBaseRecordReaderGranular extends HBaseRecordReaderBase {
key.set(result.getRow());
}
lastSuccessfulRow = key.get();
- Writables.copyWritable(result, value);
+ value.copyFrom(result);
return true;
} else {
@@ -408,7 +408,7 @@ public class HBaseRecordReaderGranular extends HBaseRecordReaderBase {
key.set(result.getRow());
}
lastSuccessfulRow = key.get();
- Writables.copyWritable(result, value);
+ value.copyFrom(result);
return true;
} else {
LOG.debug(String.format("+ Key (%s) return an EMPTY result. Get (%s)", Bytes.toString(nextKey), theGet.getId()) ); //alg0
diff --git a/src/main/java/parallelai/spyglass/hbase/HBaseTap.java b/src/main/java/parallelai/spyglass/hbase/HBaseTap.java
index bfe6670..0b1ee98 100644
--- a/src/main/java/parallelai/spyglass/hbase/HBaseTap.java
+++ b/src/main/java/parallelai/spyglass/hbase/HBaseTap.java
@@ -138,7 +138,7 @@ public class HBaseTap extends Tap<JobConf, RecordReader, OutputCollector> {
return new Path(SCHEME + ":/" + tableName.replaceAll(":", "_"));
}
- protected HBaseAdmin getHBaseAdmin(JobConf conf) throws MasterNotRunningException, ZooKeeperConnectionException {
+ protected HBaseAdmin getHBaseAdmin(JobConf conf) throws IOException {
if (hBaseAdmin == null) {
Configuration hbaseConf = HBaseConfiguration.create(conf);
hBaseAdmin = new HBaseAdmin(hbaseConf);
diff --git a/src/main/scala/parallelai/spyglass/hbase/example/HBaseExample.scala b/src/main/scala/parallelai/spyglass/hbase/example/HBaseExample.scala
index 13c75d6..d8889c2 100644
--- a/src/main/scala/parallelai/spyglass/hbase/example/HBaseExample.scala
+++ b/src/main/scala/parallelai/spyglass/hbase/example/HBaseExample.scala
@@ -1,10 +1,5 @@
package parallelai.spyglass.hbase.example
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hbase.HBaseConfiguration
-import org.apache.hadoop.hbase.client.HConnectionManager
-import org.apache.hadoop.hbase.client.HTable
-import org.apache.hadoop.hbase.util.Bytes
import org.apache.log4j.Level
import org.apache.log4j.Logger
import com.twitter.scalding._
@@ -12,8 +7,6 @@ import com.twitter.scalding.Args
import parallelai.spyglass.base.JobBase
import parallelai.spyglass.hbase.HBaseSource
import parallelai.spyglass.hbase.HBaseConstants.SourceMode
-import org.apache.hadoop.hbase.client.Put
-import parallelai.spyglass.hbase.HBaseSalter
class HBaseExample(args: Args) extends JobBase(args) {
diff --git a/src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala b/src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala
index c503247..b8f7da4 100644
--- a/src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala
+++ b/src/main/scala/parallelai/spyglass/hbase/example/HBaseExampleRunner.scala
@@ -1,14 +1,9 @@
package parallelai.spyglass.hbase.example
-import com.twitter.scalding.Tool
-import org.joda.time.format.DateTimeFormat
-import java.util.Formatter.DateTime
import parallelai.spyglass.base.JobRunner
import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, HBaseConfiguration}
-import org.apache.hadoop.hbase.client.{Put, HTable, HConnectionManager, HBaseAdmin}
-import org.apache.hadoop.hbase.io.hfile.Compression
-import org.apache.hadoop.hbase.regionserver.StoreFile
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.client.{Put, HTable, HConnectionManager}
import org.apache.hadoop.hbase.util.Bytes
import parallelai.spyglass.hbase.HBaseSalter
@@ -71,4 +66,4 @@ object HBaseExampleRunner extends App {
"--output", output, "--debug", "true", "--job.lib.path", jobLibPath, "--quorum", quorum))
-} \ No newline at end of file
+}