aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
diff options
context:
space:
mode:
authorEllen Spertus <ellen.spertus@gmail.com>2018-08-13 09:58:27 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-08-13 09:58:27 -0700
commit1c6e1234974d8b6e4480a13ff5c4ff861c6d1deb (patch)
treebf18ec3b4335403fc7f2a4ed9b9379e9cbf25634 /scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
parent5615428921a45ba6a2fb005b255a28dcbb83b13f (diff)
downloadsandcrawler-1c6e1234974d8b6e4480a13ff5c4ff861c6d1deb.tar.gz
sandcrawler-1c6e1234974d8b6e4480a13ff5c4ff861c6d1deb.zip
Pipeline works, all tests pass, no scalastyle errors.
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/CrossrefScorable.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/CrossrefScorable.scala28
1 files changed, 3 insertions, 25 deletions
diff --git a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
index e257152..4558ee6 100644
--- a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
+++ b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
@@ -1,36 +1,14 @@
package sandcrawler
-import cascading.flow.FlowDef
-import cascading.pipe.Pipe
-import cascading.tuple.Fields
-import com.twitter.scalding._
-import com.twitter.scalding.typed.TDsl._
-import parallelai.spyglass.hbase.HBaseConstants.SourceMode
-import parallelai.spyglass.hbase.HBasePipeConversions
-import parallelai.spyglass.hbase.HBaseSource
-import TDsl._
-import scala.util.parsing.json.JSONObject
-
-import java.text.Normalizer
-import java.util.Arrays
-import java.util.Properties
-import java.util.regex.Pattern
-
import scala.math
import scala.util.parsing.json.JSON
import scala.util.parsing.json.JSONObject
+import cascading.flow.FlowDef
import cascading.tuple.Fields
import com.twitter.scalding._
-import com.twitter.scalding.typed.CoGrouped
-import com.twitter.scalding.typed.Grouped
import com.twitter.scalding.typed.TDsl._
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable
-import org.apache.hadoop.hbase.util.Bytes
-import parallelai.spyglass.base.JobBase
-import parallelai.spyglass.hbase.HBaseConstants.SourceMode
import parallelai.spyglass.hbase.HBasePipeConversions
-import parallelai.spyglass.hbase.HBaseSource
class CrossrefScorable extends Scorable with HBasePipeConversions {
// TODO: Generalize args so there can be multiple Crossref pipes in one job.
@@ -50,8 +28,8 @@ object CrossrefScorable {
Scorable.jsonToMap(json) match {
case None => MapFeatures(Scorable.NoSlug, json)
case Some(map) => {
- if ((map contains "titles") && (map contains "DOI")) {
- val titles = map("titles").asInstanceOf[List[String]]
+ if ((map contains "title") && (map contains "DOI")) {
+ val titles = map("title").asInstanceOf[List[String]]
val doi = Scorable.getString(map, "DOI")
if (titles.isEmpty || titles == null || doi.isEmpty || doi == null) {
new MapFeatures(Scorable.NoSlug, json)