aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.md75
-rw-r--r--pom.xml56
-rw-r--r--src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala6
-rw-r--r--src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala6
4 files changed, 89 insertions, 54 deletions
diff --git a/README.md b/README.md
index beaa4a4..6df2fb8 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,13 @@ Cascading and Scalding wrapper for HBase with advanced read and write features.
Prevent Hot Spotting by the use of transparent key prefixes.
+Changes
+=======
+- Added JDBC Tap Functionality
+- Added Delete Functionality
+- Added Region grouping of splits
+- Migrated maven repo to local subdirectory. See below.
+
Building
========
@@ -17,7 +24,7 @@ To use SpyGlass as a dependency use the following repository
<repositories>
<repository>
<id>parallelai-releases</id>
- <url>https://github.com/ParallelAI/mvn-repo/raw/master/releases</url>
+ <url>https://github.com/ParallelAI/SpyGlass/raw/master/releases</url>
</repository>
</repositories>
@@ -270,4 +277,68 @@ e.g.
6. Jdbc Tap and Source
===========================
-To be added soon. \ No newline at end of file
+To be added soon.
+
+e.g.
+ val jdbcSourceRead = new JDBCSource(
+ "TABLE_01",
+ "com.mysql.jdbc.Driver",
+ "jdbc:mysql://localhost:3306/sky_db?zeroDateTimeBehavior=convertToNull",
+ "root",
+ "password",
+ List("ID", "TEST_COLUMN1", "TEST_COLUMN2", "TEST_COLUMN3"),
+ List("bigint(20)", "varchar(45)", "varchar(45)", "bigint(20)"),
+ List("id"),
+ new Fields("key", "column1", "column2", "column3"),
+ null, null, null
+ )
+
+ val jdbcSourceWrite = new JDBCSource(
+ "TABLE_01",
+ "com.mysql.jdbc.Driver",
+ "jdbc:mysql://localhost:3306/sky_db?zeroDateTimeBehavior=convertToNull",
+ "root",
+ "password",
+ List("ID", "TEST_COLUMN1", "TEST_COLUMN2", "TEST_COLUMN3"),
+ List("bigint(20)", "varchar(45)", "varchar(45)", "bigint(20)"),
+ List("id"),
+ new Fields("key", "column1", "column2", "column3"),
+ null, null, null
+ )
+
+
+7. HBase Delete Functionality
+=============================
+
+Delete functionality has been added to SpyGlass version 4.1.0 onwards. Below is an example of how to use it.
+
+The feature can be enable by using the parameter sinkMode = SinkMode.REPLACE in the HBaseSource
+You can use sinkMode = SinkMode.UPDATE to add to the HBaseTable as usual.
+
+e.g.
+ val eraser = toIBW(input, TABLE_SCHEMA)
+ .write(new HBaseSource( "_TEST.SALT.01", quorum, 'key,
+ TABLE_SCHEMA.tail.map((x: Symbol) => "data"),
+ TABLE_SCHEMA.tail.map((x: Symbol) => new Fields(x.name)), sinkMode = SinkMode.REPLACE ))
+
+All rows with the key will be deleted. This includes all versions too.
+
+8. Regional Split Group in Scan and Get
+=======================================
+
+This functionality reduces the number of mappers significantly when using hot spot prevention with prefixes.
+
+This feature can be activated by using inputSplitType = SplitType.REGIONAL
+You can use inputSplitType = SplitType.GRANULAR to use the previous functionality as is.
+
+e.g.
+ val hbase04 = new HBaseSource( "_TEST.SALT.01", quorum, 'key,
+ TABLE_SCHEMA.tail.map((x: Symbol) => "data"),
+ TABLE_SCHEMA.tail.map((x: Symbol) => new Fields(x.name)),
+ sourceMode = SourceMode.SCAN_RANGE, startKey = sttKeyP, stopKey = stpKeyP,
+ inputSplitType = splitType).read
+ .fromBytesWritable(TABLE_SCHEMA )
+ .map(('key, 'salted, 'unsalted) -> 'testData) {x: (String, String, String) => List(x._1, x._2, x._3)}
+ .project('testData)
+ .write(TextLine("saltTesting/ScanRangeNoSalt01"))
+ .groupAll(group => group.toList[List[List[String]]]('testData -> 'testData)) \ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 066632c..c1c6ebc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -41,8 +41,10 @@
<zookeeper.version>3.4.5-${cdh.version}</zookeeper.version>
<!-- Scala/Scalding/Cascading properties -->
- <scala.version>2.10.2</scala.version>
- <scalding.scala.version>2.10</scalding.scala.version>
+ <!-- can be 2.9.3 and 2.10.2 -->
+ <scala.version>2.9.3</scala.version>
+ <!-- 2.10 for Scala 2.10.2 and 2.9.3 for Scala version 2.9.3 -->
+ <scalding.scala.version>2.9.3</scalding.scala.version>
<scalding.version>0.8.6</scalding.version>
<cascading.version>2.1.6</cascading.version>
<scalding-commons.version>0.2.0</scalding-commons.version>
@@ -50,7 +52,8 @@
<trove4j.version>3.0.3</trove4j.version>
<maple.version>0.2.8</maple.version>
- <specs2.version>2.1.1</specs2.version>
+ <!-- 2.1.1 for Scala 2.10.2 and 1.12.4.1 for Scala 2.9.3-->
+ <specs2.version>1.12.4.1</specs2.version>
<typesafe.config.version>1.0.0</typesafe.config.version>
<!-- Other libraries properties -->
@@ -66,18 +69,9 @@
<name>Cascading and Scalding wrapper for HBase with advanced features</name>
<groupId>parallelai</groupId>
<artifactId>parallelai.spyglass</artifactId>
- <version>${scala.version}_4.0.0-SNAPSHOT</version>
+ <version>${scala.version}_4.1.0</version>
<packaging>jar</packaging>
- <!--
- <distributionManagement>
- <repository>
- <id>conjars</id>
- <url>scp://repo@conjars.org:</url>
- </repository>
- </distributionManagement>
- -->
-
<!-- Repositories -->
<repositories>
<repository>
@@ -180,12 +174,6 @@
<groupId>com.twitter</groupId>
<artifactId>scalding-core_${scalding.scala.version}</artifactId>
<version>${scalding.version}</version>
- <!--<exclusions>-->
- <!--<exclusion>-->
- <!--<groupId>com.twitter</groupId>-->
- <!--<artifactId>maple</artifactId>-->
- <!--</exclusion>-->
- <!--</exclusions>-->
</dependency>
<dependency>
@@ -374,39 +362,15 @@
</lifecycleMappingMetadata>
</configuration>
</plugin>
- <!--
<plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>wagon-maven-plugin</artifactId>
- <version>1.0-beta-3</version>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-deploy-plugin</artifactId>
<configuration>
- <serverId>conjars</serverId>
- <fromFile>${project.build.directory}/${project.build.finalName}.jar</fromFile>
- <fromFile>${project.build.directory}/pom.xml</fromFile>
- <url>scp://repo@conjars.org/</url>
+ <altDeploymentRepository>repo::default::file:${project.basedir}/releases</altDeploymentRepository>
</configuration>
- <executions>
- <execution>
- <id>upload-war-to-server</id>
- <phase>deploy</phase>
- <goals>
- <goal>deploy</goal>
- </goals>
- </execution>
- </executions>
</plugin>
- -->
</plugins>
</pluginManagement>
- <!--
- <extensions>
- <extension>
- <groupId>org.apache.maven.wagon</groupId>
- <artifactId>wagon-ssh</artifactId>
- <version>1.0</version>
- </extension>
- </extensions>
- -->
</build>
</project>
diff --git a/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala b/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala
index d290f06..33d12f7 100644
--- a/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala
+++ b/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala
@@ -27,9 +27,9 @@ class HdfsToJdbc (args: Args) extends JobBase(args) {
'key_id, 'col1, 'col2, 'col3
)
- val url = "mysql01.prod.bigdata.bskyb.com"
- val dbName = "skybet_db"
- val tableName = "skybet_hbase_betdetail_jdbc_test"
+ val url = "mysql01.domain"
+ val dbName = "db"
+ val tableName = "table"
val jdbcSource2 = new JDBCSource(
diff --git a/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala b/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala
index 765b422..c4e6c84 100644
--- a/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala
+++ b/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala
@@ -29,9 +29,9 @@ class JdbcSourceShouldReadWrite (args: Args) extends JobBase(args) {
LOG.info("Setting logging to Level.DEBUG")
}
- val url = "mysql01.prod.bigdata.bskyb.com"
- val dbName = "skybet_db"
- val tableName = "skybet_hbase_betdetail_jdbc_test"
+ val url = "mysql01.domain"
+ val dbName = "db"
+ val tableName = "table"
val jdbcSourceRead = new JDBCSource(
"TABLE_01",