diff options
-rw-r--r-- | README.md | 75 | ||||
-rw-r--r-- | pom.xml | 56 | ||||
-rw-r--r-- | src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala | 6 | ||||
-rw-r--r-- | src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala | 6 |
4 files changed, 89 insertions, 54 deletions
@@ -5,6 +5,13 @@ Cascading and Scalding wrapper for HBase with advanced read and write features. Prevent Hot Spotting by the use of transparent key prefixes. +Changes +======= +- Added JDBC Tap Functionality +- Added Delete Functionality +- Added Region grouping of splits +- Migrated maven repo to local subdirectory. See below. + Building ======== @@ -17,7 +24,7 @@ To use SpyGlass as a dependency use the following repository <repositories> <repository> <id>parallelai-releases</id> - <url>https://github.com/ParallelAI/mvn-repo/raw/master/releases</url> + <url>https://github.com/ParallelAI/SpyGlass/raw/master/releases</url> </repository> </repositories> @@ -270,4 +277,68 @@ e.g. 6. Jdbc Tap and Source =========================== -To be added soon.
\ No newline at end of file +To be added soon. + +e.g. + val jdbcSourceRead = new JDBCSource( + "TABLE_01", + "com.mysql.jdbc.Driver", + "jdbc:mysql://localhost:3306/sky_db?zeroDateTimeBehavior=convertToNull", + "root", + "password", + List("ID", "TEST_COLUMN1", "TEST_COLUMN2", "TEST_COLUMN3"), + List("bigint(20)", "varchar(45)", "varchar(45)", "bigint(20)"), + List("id"), + new Fields("key", "column1", "column2", "column3"), + null, null, null + ) + + val jdbcSourceWrite = new JDBCSource( + "TABLE_01", + "com.mysql.jdbc.Driver", + "jdbc:mysql://localhost:3306/sky_db?zeroDateTimeBehavior=convertToNull", + "root", + "password", + List("ID", "TEST_COLUMN1", "TEST_COLUMN2", "TEST_COLUMN3"), + List("bigint(20)", "varchar(45)", "varchar(45)", "bigint(20)"), + List("id"), + new Fields("key", "column1", "column2", "column3"), + null, null, null + ) + + +7. HBase Delete Functionality +============================= + +Delete functionality has been added to SpyGlass version 4.1.0 onwards. Below is an example of how to use it. + +The feature can be enable by using the parameter sinkMode = SinkMode.REPLACE in the HBaseSource +You can use sinkMode = SinkMode.UPDATE to add to the HBaseTable as usual. + +e.g. + val eraser = toIBW(input, TABLE_SCHEMA) + .write(new HBaseSource( "_TEST.SALT.01", quorum, 'key, + TABLE_SCHEMA.tail.map((x: Symbol) => "data"), + TABLE_SCHEMA.tail.map((x: Symbol) => new Fields(x.name)), sinkMode = SinkMode.REPLACE )) + +All rows with the key will be deleted. This includes all versions too. + +8. Regional Split Group in Scan and Get +======================================= + +This functionality reduces the number of mappers significantly when using hot spot prevention with prefixes. + +This feature can be activated by using inputSplitType = SplitType.REGIONAL +You can use inputSplitType = SplitType.GRANULAR to use the previous functionality as is. + +e.g. + val hbase04 = new HBaseSource( "_TEST.SALT.01", quorum, 'key, + TABLE_SCHEMA.tail.map((x: Symbol) => "data"), + TABLE_SCHEMA.tail.map((x: Symbol) => new Fields(x.name)), + sourceMode = SourceMode.SCAN_RANGE, startKey = sttKeyP, stopKey = stpKeyP, + inputSplitType = splitType).read + .fromBytesWritable(TABLE_SCHEMA ) + .map(('key, 'salted, 'unsalted) -> 'testData) {x: (String, String, String) => List(x._1, x._2, x._3)} + .project('testData) + .write(TextLine("saltTesting/ScanRangeNoSalt01")) + .groupAll(group => group.toList[List[List[String]]]('testData -> 'testData))
\ No newline at end of file @@ -41,8 +41,10 @@ <zookeeper.version>3.4.5-${cdh.version}</zookeeper.version> <!-- Scala/Scalding/Cascading properties --> - <scala.version>2.10.2</scala.version> - <scalding.scala.version>2.10</scalding.scala.version> + <!-- can be 2.9.3 and 2.10.2 --> + <scala.version>2.9.3</scala.version> + <!-- 2.10 for Scala 2.10.2 and 2.9.3 for Scala version 2.9.3 --> + <scalding.scala.version>2.9.3</scalding.scala.version> <scalding.version>0.8.6</scalding.version> <cascading.version>2.1.6</cascading.version> <scalding-commons.version>0.2.0</scalding-commons.version> @@ -50,7 +52,8 @@ <trove4j.version>3.0.3</trove4j.version> <maple.version>0.2.8</maple.version> - <specs2.version>2.1.1</specs2.version> + <!-- 2.1.1 for Scala 2.10.2 and 1.12.4.1 for Scala 2.9.3--> + <specs2.version>1.12.4.1</specs2.version> <typesafe.config.version>1.0.0</typesafe.config.version> <!-- Other libraries properties --> @@ -66,18 +69,9 @@ <name>Cascading and Scalding wrapper for HBase with advanced features</name> <groupId>parallelai</groupId> <artifactId>parallelai.spyglass</artifactId> - <version>${scala.version}_4.0.0-SNAPSHOT</version> + <version>${scala.version}_4.1.0</version> <packaging>jar</packaging> - <!-- - <distributionManagement> - <repository> - <id>conjars</id> - <url>scp://repo@conjars.org:</url> - </repository> - </distributionManagement> - --> - <!-- Repositories --> <repositories> <repository> @@ -180,12 +174,6 @@ <groupId>com.twitter</groupId> <artifactId>scalding-core_${scalding.scala.version}</artifactId> <version>${scalding.version}</version> - <!--<exclusions>--> - <!--<exclusion>--> - <!--<groupId>com.twitter</groupId>--> - <!--<artifactId>maple</artifactId>--> - <!--</exclusion>--> - <!--</exclusions>--> </dependency> <dependency> @@ -374,39 +362,15 @@ </lifecycleMappingMetadata> </configuration> </plugin> - <!-- <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>wagon-maven-plugin</artifactId> - <version>1.0-beta-3</version> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-deploy-plugin</artifactId> <configuration> - <serverId>conjars</serverId> - <fromFile>${project.build.directory}/${project.build.finalName}.jar</fromFile> - <fromFile>${project.build.directory}/pom.xml</fromFile> - <url>scp://repo@conjars.org/</url> + <altDeploymentRepository>repo::default::file:${project.basedir}/releases</altDeploymentRepository> </configuration> - <executions> - <execution> - <id>upload-war-to-server</id> - <phase>deploy</phase> - <goals> - <goal>deploy</goal> - </goals> - </execution> - </executions> </plugin> - --> </plugins> </pluginManagement> - <!-- - <extensions> - <extension> - <groupId>org.apache.maven.wagon</groupId> - <artifactId>wagon-ssh</artifactId> - <version>1.0</version> - </extension> - </extensions> - --> </build> </project> diff --git a/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala b/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala index d290f06..33d12f7 100644 --- a/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala +++ b/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala @@ -27,9 +27,9 @@ class HdfsToJdbc (args: Args) extends JobBase(args) { 'key_id, 'col1, 'col2, 'col3 ) - val url = "mysql01.prod.bigdata.bskyb.com" - val dbName = "skybet_db" - val tableName = "skybet_hbase_betdetail_jdbc_test" + val url = "mysql01.domain" + val dbName = "db" + val tableName = "table" val jdbcSource2 = new JDBCSource( diff --git a/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala b/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala index 765b422..c4e6c84 100644 --- a/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala +++ b/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala @@ -29,9 +29,9 @@ class JdbcSourceShouldReadWrite (args: Args) extends JobBase(args) { LOG.info("Setting logging to Level.DEBUG") } - val url = "mysql01.prod.bigdata.bskyb.com" - val dbName = "skybet_db" - val tableName = "skybet_hbase_betdetail_jdbc_test" + val url = "mysql01.domain" + val dbName = "db" + val tableName = "table" val jdbcSourceRead = new JDBCSource( "TABLE_01", |