aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/ScorableFeatures.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/ScorableFeatures.scala21
1 files changed, 14 insertions, 7 deletions
diff --git a/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala b/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala
index 241db79..be2b495 100644
--- a/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala
+++ b/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala
@@ -35,20 +35,27 @@ class ScorableFeatures private(title : String, authors : List[Any] = List(), yea
JSONObject(toMap).toString
}
- def toSlug() : String = {
+ def toSlug() : Option[String] = {
if (title == null) {
- Scorable.NoSlug
+ None
} else {
val unaccented = StringUtilities.removeAccents(title)
// Remove punctuation
val slug = StringUtilities.removePunctuation((unaccented.toLowerCase())).replaceAll("\\s", "")
if (slug.isEmpty
- || slug == null
- || (ScorableFeatures.SlugBlacklist contains slug)
- || (slug.length < ScorableFeatures.MinSlugLength)) Scorable.NoSlug else slug
+ || slug == null
+ || (ScorableFeatures.SlugBlacklist contains slug)
+ || (slug.length < ScorableFeatures.MinSlugLength)) {
+ None
+ } else {
+ Some(slug)
+ }
}
}
- def toMapFeatures : MapFeatures =
- MapFeatures(toSlug, toString)
+ def toMapFeatures : Option[MapFeatures] =
+ toSlug match {
+ case None => None
+ case Some(slug) => Some(MapFeatures(slug, toString))
+ }
}