aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/GrobidScorable.scala
diff options
context:
space:
mode:
authorEllen Spertus <ellen.spertus@gmail.com>2018-08-09 11:38:05 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-08-09 11:38:05 -0700
commit25ade249538aade9dcd39d459bacdf43ea0a7dd6 (patch)
tree218c8a9851fc29902503c2f40b7884f4d9db0c12 /scalding/src/main/scala/sandcrawler/GrobidScorable.scala
parent6d64c5d4e1527c7277527132efa858def2589486 (diff)
downloadsandcrawler-25ade249538aade9dcd39d459bacdf43ea0a7dd6.tar.gz
sandcrawler-25ade249538aade9dcd39d459bacdf43ea0a7dd6.zip
Fixed scalastyle violations.
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/GrobidScorable.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/GrobidScorable.scala21
1 files changed, 9 insertions, 12 deletions
diff --git a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala
index bf36855..95d6dae 100644
--- a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala
+++ b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala
@@ -10,7 +10,7 @@ import parallelai.spyglass.hbase.HBasePipeConversions
import parallelai.spyglass.hbase.HBaseSource
class GrobidScorable extends Scorable with HBasePipeConversions {
- def getFeaturesPipe(args : Args)(implicit flowDef : FlowDef, mode : Mode) = {
+ def getFeaturesPipe(args : Args)(implicit flowDef : FlowDef, mode : Mode) : TypedPipe[MapFeatures] = {
// TODO: Clean up code after debugging.
val grobidSource = HBaseBuilder.build(
args("hbase-table"),
@@ -18,21 +18,18 @@ class GrobidScorable extends Scorable with HBasePipeConversions {
List("grobid0:tei_json"),
SourceMode.SCAN_ALL)
-// val pipe0 : Pipe = grobidSource.read
-// val grobidPipe : TypedPipe[MapFeatures] = pipe0
grobidSource.read
- .fromBytesWritable(new Fields("key", "tei_json"))
- // .debug // Should be 4 tuples for mocked data
+ .fromBytesWritable(new Fields("key", "tei_json"))
// TODO: Figure out why this line (used in HBaseCrossrefScoreJob.scala)
// didn't work here: .toTypedPipe[(String, String)]('key, 'tei_json)
- .toTypedPipe[(String, String)](new Fields("key", "tei_json"))
- .map { entry =>
- val (key : String, json : String) = (entry._1, entry._2)
- GrobidScorable.grobidToSlug(json) match {
- case Some(slug) => new MapFeatures(slug, json)
- case None => new MapFeatures(Scorable.NoSlug, json)
+ .toTypedPipe[(String, String)](new Fields("key", "tei_json"))
+ .map { entry =>
+ val (key : String, json : String) = (entry._1, entry._2)
+ GrobidScorable.grobidToSlug(json) match {
+ case Some(slug) => new MapFeatures(slug, json)
+ case None => new MapFeatures(Scorable.NoSlug, json)
+ }
}
- }
}
}