From cefd1a9332d674f0ccf69ab1c7bdc102c2a48c92 Mon Sep 17 00:00:00 2001 From: cra14 Date: Wed, 4 Sep 2013 14:00:44 +0100 Subject: Moved maven repo locally Added documentation. Scala 2.9.3 and 2.10.2 can be toggled --- README.md | 75 +++++++++++++++++++++- pom.xml | 56 +++------------- .../spyglass/jdbc/testing/HdfsToJdbc.scala | 6 +- .../jdbc/testing/JdbcSourceShouldReadWrite.scala | 6 +- 4 files changed, 89 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index beaa4a4..6df2fb8 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,13 @@ Cascading and Scalding wrapper for HBase with advanced read and write features. Prevent Hot Spotting by the use of transparent key prefixes. +Changes +======= +- Added JDBC Tap Functionality +- Added Delete Functionality +- Added Region grouping of splits +- Migrated maven repo to local subdirectory. See below. + Building ======== @@ -17,7 +24,7 @@ To use SpyGlass as a dependency use the following repository parallelai-releases - https://github.com/ParallelAI/mvn-repo/raw/master/releases + https://github.com/ParallelAI/SpyGlass/raw/master/releases @@ -270,4 +277,68 @@ e.g. 6. Jdbc Tap and Source =========================== -To be added soon. \ No newline at end of file +To be added soon. + +e.g. + val jdbcSourceRead = new JDBCSource( + "TABLE_01", + "com.mysql.jdbc.Driver", + "jdbc:mysql://localhost:3306/sky_db?zeroDateTimeBehavior=convertToNull", + "root", + "password", + List("ID", "TEST_COLUMN1", "TEST_COLUMN2", "TEST_COLUMN3"), + List("bigint(20)", "varchar(45)", "varchar(45)", "bigint(20)"), + List("id"), + new Fields("key", "column1", "column2", "column3"), + null, null, null + ) + + val jdbcSourceWrite = new JDBCSource( + "TABLE_01", + "com.mysql.jdbc.Driver", + "jdbc:mysql://localhost:3306/sky_db?zeroDateTimeBehavior=convertToNull", + "root", + "password", + List("ID", "TEST_COLUMN1", "TEST_COLUMN2", "TEST_COLUMN3"), + List("bigint(20)", "varchar(45)", "varchar(45)", "bigint(20)"), + List("id"), + new Fields("key", "column1", "column2", "column3"), + null, null, null + ) + + +7. HBase Delete Functionality +============================= + +Delete functionality has been added to SpyGlass version 4.1.0 onwards. Below is an example of how to use it. + +The feature can be enable by using the parameter sinkMode = SinkMode.REPLACE in the HBaseSource +You can use sinkMode = SinkMode.UPDATE to add to the HBaseTable as usual. + +e.g. + val eraser = toIBW(input, TABLE_SCHEMA) + .write(new HBaseSource( "_TEST.SALT.01", quorum, 'key, + TABLE_SCHEMA.tail.map((x: Symbol) => "data"), + TABLE_SCHEMA.tail.map((x: Symbol) => new Fields(x.name)), sinkMode = SinkMode.REPLACE )) + +All rows with the key will be deleted. This includes all versions too. + +8. Regional Split Group in Scan and Get +======================================= + +This functionality reduces the number of mappers significantly when using hot spot prevention with prefixes. + +This feature can be activated by using inputSplitType = SplitType.REGIONAL +You can use inputSplitType = SplitType.GRANULAR to use the previous functionality as is. + +e.g. + val hbase04 = new HBaseSource( "_TEST.SALT.01", quorum, 'key, + TABLE_SCHEMA.tail.map((x: Symbol) => "data"), + TABLE_SCHEMA.tail.map((x: Symbol) => new Fields(x.name)), + sourceMode = SourceMode.SCAN_RANGE, startKey = sttKeyP, stopKey = stpKeyP, + inputSplitType = splitType).read + .fromBytesWritable(TABLE_SCHEMA ) + .map(('key, 'salted, 'unsalted) -> 'testData) {x: (String, String, String) => List(x._1, x._2, x._3)} + .project('testData) + .write(TextLine("saltTesting/ScanRangeNoSalt01")) + .groupAll(group => group.toList[List[List[String]]]('testData -> 'testData)) \ No newline at end of file diff --git a/pom.xml b/pom.xml index 066632c..c1c6ebc 100644 --- a/pom.xml +++ b/pom.xml @@ -41,8 +41,10 @@ 3.4.5-${cdh.version} - 2.10.2 - 2.10 + + 2.9.3 + + 2.9.3 0.8.6 2.1.6 0.2.0 @@ -50,7 +52,8 @@ 3.0.3 0.2.8 - 2.1.1 + + 1.12.4.1 1.0.0 @@ -66,18 +69,9 @@ Cascading and Scalding wrapper for HBase with advanced features parallelai parallelai.spyglass - ${scala.version}_4.0.0-SNAPSHOT + ${scala.version}_4.1.0 jar - - @@ -180,12 +174,6 @@ com.twitter scalding-core_${scalding.scala.version} ${scalding.version} - - - - - - @@ -374,39 +362,15 @@ - - diff --git a/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala b/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala index d290f06..33d12f7 100644 --- a/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala +++ b/src/main/scala/parallelai/spyglass/jdbc/testing/HdfsToJdbc.scala @@ -27,9 +27,9 @@ class HdfsToJdbc (args: Args) extends JobBase(args) { 'key_id, 'col1, 'col2, 'col3 ) - val url = "mysql01.prod.bigdata.bskyb.com" - val dbName = "skybet_db" - val tableName = "skybet_hbase_betdetail_jdbc_test" + val url = "mysql01.domain" + val dbName = "db" + val tableName = "table" val jdbcSource2 = new JDBCSource( diff --git a/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala b/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala index 765b422..c4e6c84 100644 --- a/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala +++ b/src/main/scala/parallelai/spyglass/jdbc/testing/JdbcSourceShouldReadWrite.scala @@ -29,9 +29,9 @@ class JdbcSourceShouldReadWrite (args: Args) extends JobBase(args) { LOG.info("Setting logging to Level.DEBUG") } - val url = "mysql01.prod.bigdata.bskyb.com" - val dbName = "skybet_db" - val tableName = "skybet_hbase_betdetail_jdbc_test" + val url = "mysql01.domain" + val dbName = "db" + val tableName = "table" val jdbcSourceRead = new JDBCSource( "TABLE_01", -- cgit v1.2.3