diff options
Diffstat (limited to 'scalding/src/main')
| -rw-r--r-- | scalding/src/main/scala/sandcrawler/Scorable.scala | 11 | 
1 files changed, 6 insertions, 5 deletions
| diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 9c8da69..929461b 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -66,13 +66,14 @@ object Scorable {    // This guarantees it will have all of the fields needed to compute    // the ultimate score, which are a superset of those needed for a slug.    def mapToSlug(map : Map[String, Any]) : String = { -    val unaccented = StringUtilities.removeAccents(getString(map, "title")) -    // Remove punctuation after splitting on colon. -    val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())) -    if (slug.isEmpty || slug == null) { +    val title = getString(map, "title") +    if (title == null) {        NoSlug      } else { -      slug +      val unaccented = StringUtilities.removeAccents(title) +      // Remove punctuation after splitting on colon. +      val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())).replaceAll("\\s", "") +      if (slug.isEmpty || slug == null) NoSlug else slug      }    } | 
