diff options
Diffstat (limited to 'scalding')
| -rw-r--r-- | scalding/src/main/scala/sandcrawler/StringUtilities.scala | 2 | ||||
| -rw-r--r-- | scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala | 2 | 
2 files changed, 2 insertions, 2 deletions
| diff --git a/scalding/src/main/scala/sandcrawler/StringUtilities.scala b/scalding/src/main/scala/sandcrawler/StringUtilities.scala index e03b60d..9150ced 100644 --- a/scalding/src/main/scala/sandcrawler/StringUtilities.scala +++ b/scalding/src/main/scala/sandcrawler/StringUtilities.scala @@ -36,7 +36,7 @@ object StringUtilities {    // Source: https://stackoverflow.com/a/30076541/631051    def removePunctuation(s: String) : String = { -    s.replaceAll("""[\p{Punct}’·“”‘’“”«»「」]""", "") +    s.replaceAll("""[\p{Punct}’·“”‘’“”«»「」¿–±§ʿ]""", "")    }    // Adapted from: https://stackoverflow.com/a/16018452/631051 diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala index 474f69a..450c169 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala @@ -49,7 +49,7 @@ class ScorableFeaturesTest extends FlatSpec with Matchers {    }    it should "strip special characters" in { -    titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_’·“”‘’“”«»「」") shouldBe Scorable.NoSlug +    titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_’·“”‘’“”«»「」¿–±§ʿ") shouldBe Scorable.NoSlug      // TODO: titleToSlug("©™₨№…") shouldBe Scorable.NoSlug      // TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug    } | 
