diff options
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/FatcatScorable.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/FatcatScorable.scala | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/scalding/src/main/scala/sandcrawler/FatcatScorable.scala b/scalding/src/main/scala/sandcrawler/FatcatScorable.scala index cffc2c0..2090e84 100644 --- a/scalding/src/main/scala/sandcrawler/FatcatScorable.scala +++ b/scalding/src/main/scala/sandcrawler/FatcatScorable.scala @@ -11,6 +11,21 @@ import com.twitter.scalding._ import com.twitter.scalding.typed.TDsl._ import parallelai.spyglass.hbase.HBasePipeConversions + +class FatcatScorableRight extends Scorable { + + def getSource(args : Args) : Source = { + TextLine(args("fatcat-release-input-right")) + } + + def getFeaturesPipe(args : Args)(implicit mode : Mode, flowDef : FlowDef) : TypedPipe[Option[MapFeatures]] = { + getSource(args).read + .toTypedPipe[String](new Fields("line")) + .filter { FatcatScorable.keepRecord(_) } + .map { FatcatScorable.jsonToMapFeatures(_) } + } +} + class FatcatScorable extends Scorable with HBasePipeConversions { def getSource(args : Args) : Source = { |