diff options
Diffstat (limited to 'scalding/src/main')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/ScorableFeatures.scala | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala b/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala index 696b2ef..8ed3369 100644 --- a/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala +++ b/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala @@ -32,8 +32,8 @@ class ScorableFeatures(title : String, year: Int = 0, doi : String = "", sha1: S Scorable.NoSlug } else { val unaccented = StringUtilities.removeAccents(title) - // Remove punctuation after splitting on colon. - val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())).replaceAll("\\s", "") + // Remove punctuation + val slug = StringUtilities.removePunctuation((unaccented.toLowerCase())).replaceAll("\\s", "") if (slug.isEmpty || slug == null || (slugBlacklist contains slug)) Scorable.NoSlug else slug } } |