diff options
| author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-07-28 20:05:17 -0700 | 
|---|---|---|
| committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-07-28 20:05:17 -0700 | 
| commit | dd0df0fe3574352011d6a0fe3c12e59b0a4b8259 (patch) | |
| tree | 951ab504ce4e00ddfe79221c4ffdf1f9768f3368 /scalding/src/test | |
| parent | 304196e01e69826047e5e14af949d5efc80d1ece (diff) | |
| download | sandcrawler-dd0df0fe3574352011d6a0fe3c12e59b0a4b8259.tar.gz sandcrawler-dd0df0fe3574352011d6a0fe3c12e59b0a4b8259.zip | |
Added accent removal to titleToSlug().
Diffstat (limited to 'scalding/src/test')
| -rw-r--r-- | scalding/src/test/scala/sandcrawler/HBaseCrossrefScoreTest.scala | 25 | 
1 files changed, 24 insertions, 1 deletions
| diff --git a/scalding/src/test/scala/sandcrawler/HBaseCrossrefScoreTest.scala b/scalding/src/test/scala/sandcrawler/HBaseCrossrefScoreTest.scala index e4cab95..655dda1 100644 --- a/scalding/src/test/scala/sandcrawler/HBaseCrossrefScoreTest.scala +++ b/scalding/src/test/scala/sandcrawler/HBaseCrossrefScoreTest.scala @@ -162,7 +162,30 @@ class HBaseCrossrefScoreTest extends FlatSpec with Matchers {    it should "return None if given a malformed json string" in {      val slug = HBaseCrossrefScore.grobidToSlug(MalformedCrossrefString) -     slug shouldBe None +    slug shouldBe None +  } + +  "removeAccents()" should "handle the empty string" in { +    HBaseCrossrefScore.removeAccents("") shouldBe "" +  } + +  it should "not change a string with unaccented characters" in { +    HBaseCrossrefScore.removeAccents("abc123") shouldBe "abc123" +  } + +  it should "remove accents from Ls" in { +    HBaseCrossrefScore.removeAccents("E\u0141\u0142en") shouldBe "ELlen" +  } + +  it should "remove accents from Es without changing case" in { +    val result = HBaseCrossrefScore.removeAccents("\u00e9") +    result should have length 1 +    result shouldBe "e" +  } + +  it should "convert the ø in Soren" in { +    HBaseCrossrefScore.removeAccents("Søren") shouldBe "Soren" +    HBaseCrossrefScore.removeAccents("SØREN") shouldBe "SOREN"    }    //  Pipeline tests | 
