diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-07-19 15:21:33 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-07-19 15:21:33 -0700 |
commit | 3c2488978562b34cf60e51546cc71b0cbfbe5eaf (patch) | |
tree | 6a2c4d88fd910f040248058667c06ab4b57e1ec6 /scalding | |
parent | a02a10d98759015ee200a456ad13b6143cd7fde6 (diff) | |
download | sandcrawler-3c2488978562b34cf60e51546cc71b0cbfbe5eaf.tar.gz sandcrawler-3c2488978562b34cf60e51546cc71b0cbfbe5eaf.zip |
Improved style and style checking.
- Excludes checking of files in /example directories.
- Warns about block imports, which have been removed.
- Checks indenting. Parameters should be indented 2 spaces. See https://docs.scala-lang.org/style/indentation.html#methods-with-numerous-arguments
- Imports should be grouped (java.*, scala.*, other), with a blank line between groups
Diffstat (limited to 'scalding')
-rw-r--r-- | scalding/build.sbt | 6 | ||||
-rw-r--r-- | scalding/scalastyle-config.xml | 26 | ||||
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseBuilder.scala | 4 | ||||
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseCountJob.scala | 13 | ||||
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala | 12 | ||||
-rw-r--r-- | scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala | 18 |
6 files changed, 54 insertions, 25 deletions
diff --git a/scalding/build.sbt b/scalding/build.sbt index f333111..ba2a825 100644 --- a/scalding/build.sbt +++ b/scalding/build.sbt @@ -13,6 +13,12 @@ lazy val root = (project in file(".")). test in assembly := {}, )), + (scalastyleSources in Compile) := { + // all .scala files in "src/main/scala" + val scalaSourceFiles = ((scalaSource in Compile).value ** "*.scala").get + val dirNameToExclude = "/example/" + scalaSourceFiles.filterNot(_.getAbsolutePath.contains(dirNameToExclude)) + }, name := "sandcrawler", resolvers += "conjars.org" at "http://conjars.org/repo", diff --git a/scalding/scalastyle-config.xml b/scalding/scalastyle-config.xml index 2f20677..23ec993 100644 --- a/scalding/scalastyle-config.xml +++ b/scalding/scalastyle-config.xml @@ -6,6 +6,13 @@ <parameter name="maxFileLength"><![CDATA[800]]></parameter> </parameters> </check> +<check enabled="true" class="org.scalastyle.file.IndentationChecker" level="warning"> + <parameters> + <parameter name="tabSize">2</parameter> + <parameter name="methodParamIndentSize">2</parameter> + <parameter name="classParamIndentSize">4</parameter> + </parameters> +</check> <check level="warning" class="org.scalastyle.file.HeaderMatchesChecker" enabled="false"> <parameters> <parameter name="header"><![CDATA[// Copyright (C) 2011-2012 the original author or authors. @@ -50,11 +57,6 @@ </parameters> </check> <check level="warning" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check> - <check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true"> - <parameters> - <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter> - </parameters> - </check> <check level="warning" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true"> <parameters> <parameter name="maxParameters"><![CDATA[8]]></parameter> @@ -114,4 +116,18 @@ <check level="warning" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check> <check level="warning" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check> <check level="warning" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check> + <check enabled="true" class="org.scalastyle.scalariform.BlockImportChecker" level="warning"/> + <check enabled="true" class="org.scalastyle.scalariform.ImportOrderChecker" level="warning"> + <parameters> + <parameter name="groups">java,scala,others</parameter> + <parameter name="group.java">javax?\..+</parameter> + <parameter name="group.scala">scala\..+</parameter> + <parameter name="group.others">.+</parameter> + </parameters> +</check> + <check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true"> + <parameters> + <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter> + </parameters> + </check> </scalastyle> diff --git a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala index b271def..b4ade24 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseBuilder.scala @@ -39,8 +39,8 @@ object HBaseBuilder { val groupMap: Map[String, List[String]] = colSpecs.groupBy(c => (c split ":")(0)) val families = groupMap.keys.toList val groupedColNames : List[List[String]] = families map {fam => { - val cols = {groupMap(fam).map(v => v.split(":")(1))} - cols}} + val cols = {groupMap(fam).map(v => v.split(":")(1))} + cols}} (families, groupedColNames.map({fields => new Fields(fields : _*)})) } diff --git a/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala index 1ebc261..b12e723 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseCountJob.scala @@ -1,21 +1,24 @@ package sandcrawler +import java.util.Properties + import cascading.property.AppProps import cascading.tuple.Fields import com.twitter.scalding._ -import java.util.Properties import parallelai.spyglass.base.JobBase -import parallelai.spyglass.hbase.{HBaseSource, HBasePipeConversions} import parallelai.spyglass.hbase.HBaseConstants.SourceMode +import parallelai.spyglass.hbase.HBasePipeConversions +import parallelai.spyglass.hbase.HBaseSource class HBaseCountJob(args: Args, colSpec: String) extends JobBase(args) with HBasePipeConversions { val output = args("output") HBaseBuilder.parseColSpec(colSpec) val Col: String = colSpec.split(":")(1) - HBaseCountJob.getHBaseSource(args("hbase-table"), - args("zookeeper-hosts"), - colSpec) + HBaseCountJob.getHBaseSource( + args("hbase-table"), + args("zookeeper-hosts"), + colSpec) .read .fromBytesWritable(Symbol(Col)) .debug diff --git a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala index ba3b9cd..4c3de33 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseRowCountJob.scala @@ -1,19 +1,22 @@ package sandcrawler +import java.util.Properties + import cascading.property.AppProps import cascading.tuple.Fields import com.twitter.scalding._ -import java.util.Properties import parallelai.spyglass.base.JobBase -import parallelai.spyglass.hbase.{HBaseSource, HBasePipeConversions} import parallelai.spyglass.hbase.HBaseConstants.SourceMode +import parallelai.spyglass.hbase.HBasePipeConversions +import parallelai.spyglass.hbase.HBaseSource class HBaseRowCountJob(args: Args) extends JobBase(args) with HBasePipeConversions { val output = args("output") - HBaseRowCountJob.getHBaseSource(args("hbase-table"), - args("zookeeper-hosts")) + HBaseRowCountJob.getHBaseSource( + args("hbase-table"), + args("zookeeper-hosts")) .read .debug .groupAll { _.size('count) } @@ -30,5 +33,4 @@ object HBaseRowCountJob { List("file:size"), SourceMode.SCAN_ALL) } - } diff --git a/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala b/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala index 375d155..befb037 100644 --- a/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala +++ b/scalding/src/main/scala/sandcrawler/HBaseStatusCountJob.scala @@ -1,16 +1,17 @@ package sandcrawler +import java.util.Properties + import cascading.property.AppProps import cascading.tuple.Fields import com.twitter.scalding._ import com.twitter.scalding.typed.TDsl._ -import java.util.Properties -import parallelai.spyglass.base.JobBase +import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.util.Bytes -import parallelai.spyglass.hbase.{HBaseSource, HBasePipeConversions} +import parallelai.spyglass.base.JobBase +import parallelai.spyglass.hbase.BasePipeConversions import parallelai.spyglass.hbase.HBaseConstants.SourceMode -import com.twitter.scalding.Args -import org.apache.hadoop.hbase.io.ImmutableBytesWritable +import parallelai.spyglass.hbase.HBaseSource class HBaseStatusCountJob(args: Args) extends JobBase(args) with HBasePipeConversions { @@ -19,9 +20,10 @@ class HBaseStatusCountJob(args: Args) extends JobBase(args) with HBasePipeConver HBaseBuilder.parseColSpec(colSpec) val Col: String = colSpec.split(":")(1) - val source : TypedPipe[Long] = HBaseCountJob.getHBaseSource(args("hbase-table"), - args("zookeeper-hosts"), - colSpec) + val source : TypedPipe[Long] = HBaseCountJob.getHBaseSource( + args("hbase-table"), + args("zookeeper-hosts"), + colSpec) .read .toTypedPipe[(ImmutableBytesWritable,ImmutableBytesWritable)]('key, 'status_code) .map { case (key, raw_code) => Bytes.toLong(raw_code.copyBytes()) } |