diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-13 09:58:27 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-13 09:58:27 -0700 |
commit | 1c6e1234974d8b6e4480a13ff5c4ff861c6d1deb (patch) | |
tree | bf18ec3b4335403fc7f2a4ed9b9379e9cbf25634 /scalding/src/main/scala/sandcrawler/CrossrefScorable.scala | |
parent | 5615428921a45ba6a2fb005b255a28dcbb83b13f (diff) | |
download | sandcrawler-1c6e1234974d8b6e4480a13ff5c4ff861c6d1deb.tar.gz sandcrawler-1c6e1234974d8b6e4480a13ff5c4ff861c6d1deb.zip |
Pipeline works, all tests pass, no scalastyle errors.
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/CrossrefScorable.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/CrossrefScorable.scala | 28 |
1 files changed, 3 insertions, 25 deletions
diff --git a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala index e257152..4558ee6 100644 --- a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala +++ b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala @@ -1,36 +1,14 @@ package sandcrawler -import cascading.flow.FlowDef -import cascading.pipe.Pipe -import cascading.tuple.Fields -import com.twitter.scalding._ -import com.twitter.scalding.typed.TDsl._ -import parallelai.spyglass.hbase.HBaseConstants.SourceMode -import parallelai.spyglass.hbase.HBasePipeConversions -import parallelai.spyglass.hbase.HBaseSource -import TDsl._ -import scala.util.parsing.json.JSONObject - -import java.text.Normalizer -import java.util.Arrays -import java.util.Properties -import java.util.regex.Pattern - import scala.math import scala.util.parsing.json.JSON import scala.util.parsing.json.JSONObject +import cascading.flow.FlowDef import cascading.tuple.Fields import com.twitter.scalding._ -import com.twitter.scalding.typed.CoGrouped -import com.twitter.scalding.typed.Grouped import com.twitter.scalding.typed.TDsl._ -import org.apache.hadoop.hbase.io.ImmutableBytesWritable -import org.apache.hadoop.hbase.util.Bytes -import parallelai.spyglass.base.JobBase -import parallelai.spyglass.hbase.HBaseConstants.SourceMode import parallelai.spyglass.hbase.HBasePipeConversions -import parallelai.spyglass.hbase.HBaseSource class CrossrefScorable extends Scorable with HBasePipeConversions { // TODO: Generalize args so there can be multiple Crossref pipes in one job. @@ -50,8 +28,8 @@ object CrossrefScorable { Scorable.jsonToMap(json) match { case None => MapFeatures(Scorable.NoSlug, json) case Some(map) => { - if ((map contains "titles") && (map contains "DOI")) { - val titles = map("titles").asInstanceOf[List[String]] + if ((map contains "title") && (map contains "DOI")) { + val titles = map("title").asInstanceOf[List[String]] val doi = Scorable.getString(map, "DOI") if (titles.isEmpty || titles == null || doi.isEmpty || doi == null) { new MapFeatures(Scorable.NoSlug, json) |