aboutsummaryrefslogtreecommitdiffstats
path: root/scalding
diff options
context:
space:
mode:
Diffstat (limited to 'scalding')
-rw-r--r--scalding/project/plugins.sbt1
-rw-r--r--scalding/scalastyle-config.xml117
-rw-r--r--scalding/src/main/scala/sandcrawler/HBaseBuilder.scala2
-rw-r--r--scalding/src/main/scala/sandcrawler/HBaseCountJob.scala2
-rw-r--r--scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala4
5 files changed, 122 insertions, 4 deletions
diff --git a/scalding/project/plugins.sbt b/scalding/project/plugins.sbt
index 084d4bf..a339892 100644
--- a/scalding/project/plugins.sbt
+++ b/scalding/project/plugins.sbt
@@ -1,2 +1,3 @@
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")
addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.0")
+addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
diff --git a/scalding/scalastyle-config.xml b/scalding/scalastyle-config.xml
new file mode 100644
index 0000000..2f20677
--- /dev/null
+++ b/scalding/scalastyle-config.xml
@@ -0,0 +1,117 @@
+<scalastyle>
+ <name>Scalastyle standard configuration</name>
+ <check level="warning" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.file.FileLengthChecker" enabled="true">
+ <parameters>
+ <parameter name="maxFileLength"><![CDATA[800]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.file.HeaderMatchesChecker" enabled="false">
+ <parameters>
+ <parameter name="header"><![CDATA[// Copyright (C) 2011-2012 the original author or authors.
+// See the LICENCE.txt file distributed with this work for additional
+// information regarding copyright ownership.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
+ <parameters>
+ <parameter name="maxLineLength"><![CDATA[160]]></parameter>
+ <parameter name="tabSize"><![CDATA[4]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
+ <parameters>
+ <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
+ <parameters>
+ <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+ <parameters>
+ <parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
+ <parameters>
+ <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+ <parameters>
+ <parameter name="maxParameters"><![CDATA[8]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true">
+ <parameters>
+ <parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.file.RegexChecker" enabled="true">
+ <parameters>
+ <parameter name="regex"><![CDATA[println]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true">
+ <parameters>
+ <parameter name="maxTypes"><![CDATA[30]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true">
+ <parameters>
+ <parameter name="maximum"><![CDATA[10]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
+ <parameters>
+ <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+ <parameter name="doubleLineAllowed"><![CDATA[false]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true">
+ <parameters>
+ <parameter name="maxLength"><![CDATA[50]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true">
+ <parameters>
+ <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
+ <parameters>
+ <parameter name="maxMethods"><![CDATA[30]]></parameter>
+ </parameters>
+ </check>
+ <check level="warning" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
+ <check level="warning" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
+</scalastyle>
diff --git a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
index 85766d6..b271def 100644
--- a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
+++ b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala
@@ -44,7 +44,7 @@ object HBaseBuilder {
(families, groupedColNames.map({fields => new Fields(fields : _*)}))
}
- def build(table: String, server: String, colSpecs: List[String], sourceMode: SourceMode, keyList: List[String] = List("key")) = {
+ def build(table: String, server: String, colSpecs: List[String], sourceMode: SourceMode, keyList: List[String] = List("key")) : HBaseSource = {
val (families, fields) = parseColSpecs(colSpecs)
new HBaseSource(table, server, new Fields("key"), families, fields, sourceMode = sourceMode, keyList = keyList)
}
diff --git a/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala
index 863c16f..1ebc261 100644
--- a/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala
+++ b/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala
@@ -24,7 +24,7 @@ class HBaseCountJob(args: Args, colSpec: String) extends JobBase(args) with HBas
}
object HBaseCountJob {
- def getHBaseSource(hbaseTable: String, zookeeperHosts: String, colSpec: String) = HBaseBuilder.build(
+ def getHBaseSource(hbaseTable: String, zookeeperHosts: String, colSpec: String) : HBaseSource = HBaseBuilder.build(
hbaseTable, // HBase Table Name
zookeeperHosts, // HBase Zookeeper server (to get runtime config info; can be array?)
List(colSpec),
diff --git a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala
index 334b7a6..ba3b9cd 100644
--- a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala
+++ b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala
@@ -22,9 +22,9 @@ class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversio
object HBaseRowCountJob {
- // eg, "wbgrp-journal-extract-0-qa", "mtrcs-zk1.us.archive.org:2181"
+ // eg, "wbgrp-journal-extract-0-qa",7 "mtrcs-zk1.us.archive.org:2181"
def getHBaseSource(hbaseTable: String, zookeeperHosts: String) : HBaseSource = {
- return HBaseBuilder.build(
+ HBaseBuilder.build(
hbaseTable,
zookeeperHosts,
List("file:size"),