diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2018-08-15 22:43:10 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2018-08-15 22:43:10 -0700 | 
| commit | 2277c2f793a007fa3a347af23fca35f4a3eafeef (patch) | |
| tree | a80cfc82286e0bf5ed709b7c8bea4d45d3dbc49e /scalding/src/main | |
| parent | 3f668933d71b82555e89a3bfefe83039ff7ddbfb (diff) | |
| download | sandcrawler-2277c2f793a007fa3a347af23fca35f4a3eafeef.tar.gz sandcrawler-2277c2f793a007fa3a347af23fca35f4a3eafeef.zip | |
do strip periods ('.')
Diffstat (limited to 'scalding/src/main')
| -rw-r--r-- | scalding/src/main/scala/sandcrawler/StringUtilities.scala | 2 | 
1 files changed, 1 insertions, 1 deletions
| diff --git a/scalding/src/main/scala/sandcrawler/StringUtilities.scala b/scalding/src/main/scala/sandcrawler/StringUtilities.scala index 6eeff7e..2745875 100644 --- a/scalding/src/main/scala/sandcrawler/StringUtilities.scala +++ b/scalding/src/main/scala/sandcrawler/StringUtilities.scala @@ -36,7 +36,7 @@ object StringUtilities {    // Source: https://stackoverflow.com/a/30076541/631051    def removePunctuation(s: String) : String = { -    s.replaceAll("""[\p{Punct}&&[^.]]""", "") +    s.replaceAll("""[\p{Punct}]""", "")    }    // Adapted from: https://stackoverflow.com/a/16018452/631051 | 
