diff options
Diffstat (limited to 'scalding/src')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala | 6 |
1 files changed, 0 insertions, 6 deletions
diff --git a/scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala b/scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala index 03db3cf..eb168ac 100644 --- a/scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala +++ b/scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala @@ -1,7 +1,5 @@ package sandcrawler -// TODO: fix import order to satisfy scala style - import java.util.Properties import scala.util.Try @@ -30,9 +28,6 @@ case class CdxLine(surt: String, datetime: String, url: String, mime: String, ht * 5. filter to only those with null HBase key column * 6. convert CDX fields to HBase columns * 7. sink results to HBase - * - * TODO: I really mixed the Scalding "field-base" and "type-based" APIs here. - * Should decide on a best practice. */ class CdxBackfillJob(args: Args) extends JobBase(args) with HBasePipeConversions { @@ -107,7 +102,6 @@ object CdxBackfillJob { "application/xml" -> "text/xml" ) - // TODO: improvement of control flow val lower = raw.toLowerCase() normalMime.foreach { case (key, value) => if (lower.startsWith(key)) { |