aboutsummaryrefslogtreecommitdiffstats
path: root/scalding
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-07-24 11:35:14 -0700
committerBryan Newbold <bnewbold@archive.org>2018-07-24 11:35:14 -0700
commit7802970c3d42cd3872ff0a0e8d0ffbbbae56ff80 (patch)
tree26e640fd1c7eb72e94062a54f890d84196c60fa1 /scalding
parentb235a60a7a9e701997d3e9f3673538cf294d8b5f (diff)
downloadsandcrawler-7802970c3d42cd3872ff0a0e8d0ffbbbae56ff80.tar.gz
sandcrawler-7802970c3d42cd3872ff0a0e8d0ffbbbae56ff80.zip
CdxBackfillJob: comment cleanup
Diffstat (limited to 'scalding')
-rw-r--r--scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala6
1 files changed, 0 insertions, 6 deletions
diff --git a/scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala b/scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala
index 03db3cf..eb168ac 100644
--- a/scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala
+++ b/scalding/src/main/scala/sandcrawler/CdxBackfillJob.scala
@@ -1,7 +1,5 @@
package sandcrawler
-// TODO: fix import order to satisfy scala style
-
import java.util.Properties
import scala.util.Try
@@ -30,9 +28,6 @@ case class CdxLine(surt: String, datetime: String, url: String, mime: String, ht
* 5. filter to only those with null HBase key column
* 6. convert CDX fields to HBase columns
* 7. sink results to HBase
- *
- * TODO: I really mixed the Scalding "field-base" and "type-based" APIs here.
- * Should decide on a best practice.
*/
class CdxBackfillJob(args: Args) extends JobBase(args) with HBasePipeConversions {
@@ -107,7 +102,6 @@ object CdxBackfillJob {
"application/xml" -> "text/xml"
)
- // TODO: improvement of control flow
val lower = raw.toLowerCase()
normalMime.foreach { case (key, value) =>
if (lower.startsWith(key)) {