aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/ScoreJob.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/ScoreJob.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/ScoreJob.scala51
1 files changed, 7 insertions, 44 deletions
diff --git a/scalding/src/main/scala/sandcrawler/ScoreJob.scala b/scalding/src/main/scala/sandcrawler/ScoreJob.scala
index 386b367..75d45e9 100644
--- a/scalding/src/main/scala/sandcrawler/ScoreJob.scala
+++ b/scalding/src/main/scala/sandcrawler/ScoreJob.scala
@@ -1,16 +1,12 @@
package sandcrawler
-import cascading.flow.FlowDef
-import cascading.tuple.Fields
-import com.twitter.scalding.{Args,Source,TextLine,TypedPipe, TypedTsv}
-//import com.twitter.scalding.source.TypedText
-import parallelai.spyglass.base.JobBase
-import parallelai.spyglass.hbase.HBasePipeConversions
-import parallelai.spyglass.hbase.HBaseSource
-import com.twitter.scalding.{ Dsl, RichPipe, IterableSource, TupleSetter, TupleConverter }
import cascading.pipe.Pipe
+import com.twitter.scalding.Args
+import com.twitter.scalding.TypedPipe
+import com.twitter.scalding.TypedTsv
+import parallelai.spyglass.base.JobBase
-class ScoreJob(args: Args) extends JobBase(args) { //with HBasePipeConversions {
+class ScoreJob(args: Args) extends JobBase(args) {
// TODO: Instantiate any subclass of Scorable specified in args.
val sc1 : Scorable = new GrobidScorable()
val sc2 : Scorable = new CrossrefScorable()
@@ -27,10 +23,10 @@ class ScoreJob(args: Args) extends JobBase(args) { //with HBasePipeConversions {
}
//TypedTsv doesn't work over case classes.
.map { entry => (entry.slug, entry.score, entry.json1, entry.json2) }
-
.write(TypedTsv[(String, Int, String, String)](args("output")))
}
+/*
// Ugly hack to get non-String information into ScoreJob above.
object ScoreJob {
var scorable1 : Option[Scorable] = None
@@ -57,38 +53,5 @@ object ScoreJob {
case None => null
}
}
-
- /*
- implicit def sourceToRichPipe(src: Source): RichPipe = new RichPipe(src.read)
-
- // This converts an Iterable into a Pipe or RichPipe with index (int-based) fields
- implicit def toPipe[T](iter: Iterable[T])(implicit set: TupleSetter[T], conv: TupleConverter[T]): Pipe =
- IterableSource[T](iter)(set, conv).read
-
- implicit def iterableToRichPipe[T](iter: Iterable[T])(implicit set: TupleSetter[T], conv: TupleConverter[T]): RichPipe =
- RichPipe(toPipe(iter)(set, conv))
-
- // Provide args as an implicit val for extensions such as the Checkpoint extension.
-// implicit protected def _implicitJobArgs: Args = args
-
- def getFeaturesPipe1(pipe : cascading.pipe.Pipe) : TypedPipe[String] = {
- pipe
- // The next line gives an error: value toTypedPipe is not a member of cascading.pipe.Pipe
- .toTypedPipe[String](new Fields("line"))
- }
-
- def getFeaturesPipe(pipe : cascading.pipe.Pipe) : TypedPipe[MapFeatures] = {
- pipe
- .fromBytesWritable(new Fields("key", "tei_json"))
- // I needed to change symbols to strings when I pulled this out of ScoreJob.
- .toTypedPipe[(String, String)](new Fields("key", "tei_json"))
- .map { entry =>
- val (key : String, json : String) = (entry._1, entry._2)
- GrobidScorable.grobidToSlug(json) match {
- case Some(slug) => new MapFeatures(slug, json)
- case None => new MapFeatures(Scorable.NoSlug, json)
- }
- }
- }
- */
}
+ */