From 98f78c0ef17436f87991169b4a7bedadf602527a Mon Sep 17 00:00:00 2001 From: Ellen Spertus Date: Mon, 27 Aug 2018 18:05:21 -0700 Subject: replaced NoSlug with proper use of Option --- .../src/main/scala/sandcrawler/CrossrefScorable.scala | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'scalding/src/main/scala/sandcrawler/CrossrefScorable.scala') diff --git a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala index f51c210..c13945f 100644 --- a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala +++ b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala @@ -17,7 +17,7 @@ class CrossrefScorable extends Scorable with HBasePipeConversions { TextLine(args("crossref-input")) } - def getFeaturesPipe(args : Args)(implicit mode : Mode, flowDef : FlowDef) : TypedPipe[MapFeatures] = { + def getFeaturesPipe(args : Args)(implicit mode : Mode, flowDef : FlowDef) : TypedPipe[Option[MapFeatures]] = { getSource(args).read .toTypedPipe[String](new Fields("line")) .filter { CrossrefScorable.keepRecord(_) } @@ -116,22 +116,25 @@ object CrossrefScorable { } } - def jsonToMapFeatures(json : String) : MapFeatures = { + def jsonToMapFeatures(json : String) : Option[MapFeatures] = { Scorable.jsonToMap(json) match { - case None => MapFeatures(Scorable.NoSlug, json) + case None => None case Some(map) => mapToTitle(map) match { - case None => MapFeatures(Scorable.NoSlug, json) + case None => None case Some(title) => { val doi = Scorable.getString(map, "DOI") val authors: List[String] = mapToAuthorList(map) val year: Int = mapToYear(map).getOrElse(0) val contentType: String = map.get("type").map(e => e.asInstanceOf[String]).getOrElse("MISSING-CONTENT-TYPE") if (doi.isEmpty || doi == null || authors.length == 0 || !(ContentTypeWhitelist contains contentType)) { - MapFeatures(Scorable.NoSlug, json) + None } else { val sf : ScorableFeatures = ScorableFeatures.create(title=title, authors=authors, doi=doi.toLowerCase(), year=year) - MapFeatures(sf.toSlug, sf.toString) + sf.toSlug match { + case None => None + case Some(slug) => Some(MapFeatures(slug, sf.toString)) + } } } } -- cgit v1.2.3