blob: 9a8d701e9f4f52c7a66f038a1d873c0af76e2231 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
package sandcrawler
import cascading.pipe.Pipe
import com.twitter.scalding.Args
import com.twitter.scalding.TypedPipe
import com.twitter.scalding.TypedTsv
import parallelai.spyglass.base.JobBase
class GrobidScorableDumpJob(args: Args) extends JobBase(args) {
val sc1 : Scorable = new GrobidScorable()
val pipe1 : TypedPipe[(String, ReduceFeatures)] = sc1.getInputPipe(args)
pipe1
.map { case (slug, features) => (slug, features.json) }
.write(TypedTsv[(String, String)](args("output")))
}
|