diff options
-rw-r--r-- | scalding/project/plugins.sbt | 1 | ||||
-rw-r--r-- | scalding/scalastyle-config.xml | 117 | ||||
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseBuilder.scala | 2 | ||||
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseCountJob.scala | 2 | ||||
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala | 4 |
5 files changed, 122 insertions, 4 deletions
diff --git a/scalding/project/plugins.sbt b/scalding/project/plugins.sbt index 084d4bf..a339892 100644 --- a/scalding/project/plugins.sbt +++ b/scalding/project/plugins.sbt @@ -1,2 +1,3 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6") addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.0") +addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") diff --git a/scalding/scalastyle-config.xml b/scalding/scalastyle-config.xml new file mode 100644 index 0000000..2f20677 --- /dev/null +++ b/scalding/scalastyle-config.xml @@ -0,0 +1,117 @@ +<scalastyle> + <name>Scalastyle standard configuration</name> + <check level="warning" class="org.scalastyle.file.FileTabChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.file.FileLengthChecker" enabled="true"> + <parameters> + <parameter name="maxFileLength"><![CDATA[800]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.file.HeaderMatchesChecker" enabled="false"> + <parameters> + <parameter name="header"><![CDATA[// Copyright (C) 2011-2012 the original author or authors. +// See the LICENCE.txt file distributed with this work for additional +// information regarding copyright ownership. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License.]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.file.FileLineLengthChecker" enabled="true"> + <parameters> + <parameter name="maxLineLength"><![CDATA[160]]></parameter> + <parameter name="tabSize"><![CDATA[4]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true"> + <parameters> + <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true"> + <parameters> + <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true"> + <parameters> + <parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true"> + <parameters> + <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true"> + <parameters> + <parameter name="maxParameters"><![CDATA[8]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true"> + <parameters> + <parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters> + <parameter name="regex"><![CDATA[println]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true"> + <parameters> + <parameter name="maxTypes"><![CDATA[30]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true"> + <parameters> + <parameter name="maximum"><![CDATA[10]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true"> + <parameters> + <parameter name="singleLineAllowed"><![CDATA[true]]></parameter> + <parameter name="doubleLineAllowed"><![CDATA[false]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true"> + <parameters> + <parameter name="maxLength"><![CDATA[50]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true"> + <parameters> + <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true"> + <parameters> + <parameter name="maxMethods"><![CDATA[30]]></parameter> + </parameters> + </check> + <check level="warning" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check> + <check level="warning" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check> +</scalastyle> diff --git a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala index 85766d6..b271def 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala @@ -44,7 +44,7 @@ object HBaseBuilder { (families, groupedColNames.map({fields => new Fields(fields : _*)})) } - def build(table: String, server: String, colSpecs: List[String], sourceMode: SourceMode, keyList: List[String] = List("key")) = { + def build(table: String, server: String, colSpecs: List[String], sourceMode: SourceMode, keyList: List[String] = List("key")) : HBaseSource = { val (families, fields) = parseColSpecs(colSpecs) new HBaseSource(table, server, new Fields("key"), families, fields, sourceMode = sourceMode, keyList = keyList) } diff --git a/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala index 863c16f..1ebc261 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala @@ -24,7 +24,7 @@ class HBaseCountJob(args: Args, colSpec: String) extends JobBase(args) with HBas } object HBaseCountJob { - def getHBaseSource(hbaseTable: String, zookeeperHosts: String, colSpec: String) = HBaseBuilder.build( + def getHBaseSource(hbaseTable: String, zookeeperHosts: String, colSpec: String) : HBaseSource = HBaseBuilder.build( hbaseTable, // HBase Table Name zookeeperHosts, // HBase Zookeeper server (to get runtime config info; can be array?) List(colSpec), diff --git a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala index 334b7a6..ba3b9cd 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala @@ -22,9 +22,9 @@ class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversio object HBaseRowCountJob { - // eg, "wbgrp-journal-extract-0-qa", "mtrcs-zk1.us.archive.org:2181" + // eg, "wbgrp-journal-extract-0-qa",7 "mtrcs-zk1.us.archive.org:2181" def getHBaseSource(hbaseTable: String, zookeeperHosts: String) : HBaseSource = { - return HBaseBuilder.build( + HBaseBuilder.build( hbaseTable, zookeeperHosts, List("file:size"), |