aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/GrobidScorable.scala
diff options
context:
space:
mode:
authorEllen Spertus <ellen.spertus@gmail.com>2018-08-07 09:51:18 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-08-07 09:51:18 -0700
commitc71b2da70ff7d3b77082db25672f6f3669f2238c (patch)
tree30f1cad98c781fdd5d1de18ffda232cd4286c72f /scalding/src/main/scala/sandcrawler/GrobidScorable.scala
parent308b33d889d804380427d2aa112efec77b3e1770 (diff)
downloadsandcrawler-c71b2da70ff7d3b77082db25672f6f3669f2238c.tar.gz
sandcrawler-c71b2da70ff7d3b77082db25672f6f3669f2238c.zip
Added CrossrefScorable.scala. All code compiles.
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/GrobidScorable.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/GrobidScorable.scala13
1 files changed, 5 insertions, 8 deletions
diff --git a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala
index 5dac64c..8da7708 100644
--- a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala
+++ b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala
@@ -16,8 +16,9 @@ class GrobidScorable extends Scorable with HBasePipeConversions {
args("hbase-table"),
args("zookeeper-hosts"))
- val pipe0 : Pipe = grobidSource.read
- val grobidPipe : TypedPipe[MapFeatures] = pipe0
+// val pipe0 : Pipe = grobidSource.read
+// val grobidPipe : TypedPipe[MapFeatures] = pipe0
+ grobidSource.read
.fromBytesWritable(new Fields("key", "tei_json"))
// .debug // Should be 4 tuples for mocked data
// TODO: Figure out why this line (used in HBaseCrossrefScoreJob.scala)
@@ -26,14 +27,10 @@ class GrobidScorable extends Scorable with HBasePipeConversions {
.map { entry =>
val (key : String, json : String) = (entry._1, entry._2)
HBaseCrossrefScore.grobidToSlug(json) match {
- case Some(slug) => new MapFeatures(slug, key, json)
- case None => new MapFeatures(Scorable.NoSlug, key, json)
+ case Some(slug) => new MapFeatures(slug, json)
+ case None => new MapFeatures(Scorable.NoSlug, json)
}
}
- .filter {
- _.slug != Scorable.NoSlug
- }
- grobidPipe
}
/*
def fromBytesWritableLocal(f: Fields): Pipe = {