From c71b2da70ff7d3b77082db25672f6f3669f2238c Mon Sep 17 00:00:00 2001 From: Ellen Spertus Date: Tue, 7 Aug 2018 09:51:18 -0700 Subject: Added CrossrefScorable.scala. All code compiles. --- scalding/src/main/scala/sandcrawler/GrobidScorable.scala | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'scalding/src/main/scala/sandcrawler/GrobidScorable.scala') diff --git a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala index 5dac64c..8da7708 100644 --- a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala +++ b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala @@ -16,8 +16,9 @@ class GrobidScorable extends Scorable with HBasePipeConversions { args("hbase-table"), args("zookeeper-hosts")) - val pipe0 : Pipe = grobidSource.read - val grobidPipe : TypedPipe[MapFeatures] = pipe0 +// val pipe0 : Pipe = grobidSource.read +// val grobidPipe : TypedPipe[MapFeatures] = pipe0 + grobidSource.read .fromBytesWritable(new Fields("key", "tei_json")) // .debug // Should be 4 tuples for mocked data // TODO: Figure out why this line (used in HBaseCrossrefScoreJob.scala) @@ -26,14 +27,10 @@ class GrobidScorable extends Scorable with HBasePipeConversions { .map { entry => val (key : String, json : String) = (entry._1, entry._2) HBaseCrossrefScore.grobidToSlug(json) match { - case Some(slug) => new MapFeatures(slug, key, json) - case None => new MapFeatures(Scorable.NoSlug, key, json) + case Some(slug) => new MapFeatures(slug, json) + case None => new MapFeatures(Scorable.NoSlug, json) } } - .filter { - _.slug != Scorable.NoSlug - } - grobidPipe } /* def fromBytesWritableLocal(f: Fields): Pipe = { -- cgit v1.2.3