diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-07 09:51:18 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-07 09:51:18 -0700 |
commit | c71b2da70ff7d3b77082db25672f6f3669f2238c (patch) | |
tree | 30f1cad98c781fdd5d1de18ffda232cd4286c72f /scalding/src/main/scala/sandcrawler/GrobidScorable.scala | |
parent | 308b33d889d804380427d2aa112efec77b3e1770 (diff) | |
download | sandcrawler-c71b2da70ff7d3b77082db25672f6f3669f2238c.tar.gz sandcrawler-c71b2da70ff7d3b77082db25672f6f3669f2238c.zip |
Added CrossrefScorable.scala. All code compiles.
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/GrobidScorable.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/GrobidScorable.scala | 13 |
1 files changed, 5 insertions, 8 deletions
diff --git a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala index 5dac64c..8da7708 100644 --- a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala +++ b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala @@ -16,8 +16,9 @@ class GrobidScorable extends Scorable with HBasePipeConversions { args("hbase-table"), args("zookeeper-hosts")) - val pipe0 : Pipe = grobidSource.read - val grobidPipe : TypedPipe[MapFeatures] = pipe0 +// val pipe0 : Pipe = grobidSource.read +// val grobidPipe : TypedPipe[MapFeatures] = pipe0 + grobidSource.read .fromBytesWritable(new Fields("key", "tei_json")) // .debug // Should be 4 tuples for mocked data // TODO: Figure out why this line (used in HBaseCrossrefScoreJob.scala) @@ -26,14 +27,10 @@ class GrobidScorable extends Scorable with HBasePipeConversions { .map { entry => val (key : String, json : String) = (entry._1, entry._2) HBaseCrossrefScore.grobidToSlug(json) match { - case Some(slug) => new MapFeatures(slug, key, json) - case None => new MapFeatures(Scorable.NoSlug, key, json) + case Some(slug) => new MapFeatures(slug, json) + case None => new MapFeatures(Scorable.NoSlug, json) } } - .filter { - _.slug != Scorable.NoSlug - } - grobidPipe } /* def fromBytesWritableLocal(f: Fields): Pipe = { |