diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-07 12:06:40 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-07 12:06:40 -0700 |
commit | ccfeb71ef2a25a479c083051acc0ebb7436e421b (patch) | |
tree | 71f4253dffab556154bf343113ba8b689d0b26ac /scalding/src/main/scala/sandcrawler/GrobidScorable.scala | |
parent | 1fa5352742e3b96993cc325e3055b93d79a66571 (diff) | |
download | sandcrawler-ccfeb71ef2a25a479c083051acc0ebb7436e421b.tar.gz sandcrawler-ccfeb71ef2a25a479c083051acc0ebb7436e421b.zip |
Removed HBaseCrossrefScore{Job,Test} and references thereto.
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/GrobidScorable.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/GrobidScorable.scala | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala index 25e5985..bf36855 100644 --- a/scalding/src/main/scala/sandcrawler/GrobidScorable.scala +++ b/scalding/src/main/scala/sandcrawler/GrobidScorable.scala @@ -12,9 +12,11 @@ import parallelai.spyglass.hbase.HBaseSource class GrobidScorable extends Scorable with HBasePipeConversions { def getFeaturesPipe(args : Args)(implicit flowDef : FlowDef, mode : Mode) = { // TODO: Clean up code after debugging. - val grobidSource = HBaseCrossrefScore.getHBaseSource( + val grobidSource = HBaseBuilder.build( args("hbase-table"), - args("zookeeper-hosts")) + args("zookeeper-hosts"), + List("grobid0:tei_json"), + SourceMode.SCAN_ALL) // val pipe0 : Pipe = grobidSource.read // val grobidPipe : TypedPipe[MapFeatures] = pipe0 @@ -26,7 +28,7 @@ class GrobidScorable extends Scorable with HBasePipeConversions { .toTypedPipe[(String, String)](new Fields("key", "tei_json")) .map { entry => val (key : String, json : String) = (entry._1, entry._2) - HBaseCrossrefScore.grobidToSlug(json) match { + GrobidScorable.grobidToSlug(json) match { case Some(slug) => new MapFeatures(slug, json) case None => new MapFeatures(Scorable.NoSlug, json) } |