aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/main')
-rw-r--r--scalding/src/main/scala/sandcrawler/Scorable.scala11
1 files changed, 6 insertions, 5 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala
index 9c8da69..929461b 100644
--- a/scalding/src/main/scala/sandcrawler/Scorable.scala
+++ b/scalding/src/main/scala/sandcrawler/Scorable.scala
@@ -66,13 +66,14 @@ object Scorable {
// This guarantees it will have all of the fields needed to compute
// the ultimate score, which are a superset of those needed for a slug.
def mapToSlug(map : Map[String, Any]) : String = {
- val unaccented = StringUtilities.removeAccents(getString(map, "title"))
- // Remove punctuation after splitting on colon.
- val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase()))
- if (slug.isEmpty || slug == null) {
+ val title = getString(map, "title")
+ if (title == null) {
NoSlug
} else {
- slug
+ val unaccented = StringUtilities.removeAccents(title)
+ // Remove punctuation after splitting on colon.
+ val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())).replaceAll("\\s", "")
+ if (slug.isEmpty || slug == null) NoSlug else slug
}
}