diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-12 18:08:51 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-12 18:08:51 -0700 |
commit | 31354b1a6062c5c56a30610f68fa48c82a7e83f0 (patch) | |
tree | a730150c3f29ea76579ee6928a7c2db9e5b22eac /scalding/src/main/scala | |
parent | 728e50a33cec921c9a624439f2e1c8561a6e12ce (diff) | |
download | sandcrawler-31354b1a6062c5c56a30610f68fa48c82a7e83f0.tar.gz sandcrawler-31354b1a6062c5c56a30610f68fa48c82a7e83f0.zip |
Tests pass.
Diffstat (limited to 'scalding/src/main/scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/Scorable.scala | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 9c8da69..929461b 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -66,13 +66,14 @@ object Scorable { // This guarantees it will have all of the fields needed to compute // the ultimate score, which are a superset of those needed for a slug. def mapToSlug(map : Map[String, Any]) : String = { - val unaccented = StringUtilities.removeAccents(getString(map, "title")) - // Remove punctuation after splitting on colon. - val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())) - if (slug.isEmpty || slug == null) { + val title = getString(map, "title") + if (title == null) { NoSlug } else { - slug + val unaccented = StringUtilities.removeAccents(title) + // Remove punctuation after splitting on colon. + val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())).replaceAll("\\s", "") + if (slug.isEmpty || slug == null) NoSlug else slug } } |