diff options
Diffstat (limited to 'scalding/src')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/Scorable.scala | 2 | ||||
-rw-r--r-- | scalding/src/test/scala/sandcrawler/ScorableTest.scala | 8 |
2 files changed, 9 insertions, 1 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 5aac032..f7eb95d 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -72,7 +72,7 @@ object Scorable { getStringOption(json2, "title") match { case None => 0 case Some(title2) => - (StringUtilities.similarity(title1, title2) * MaxScore).toInt + (StringUtilities.similarity(title1.toLowerCase, title2.toLowerCase) * MaxScore).toInt } } } diff --git a/scalding/src/test/scala/sandcrawler/ScorableTest.scala b/scalding/src/test/scala/sandcrawler/ScorableTest.scala index f63bef8..2094543 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableTest.scala @@ -70,4 +70,12 @@ class ScorableTest extends FlatSpec with Matchers { new ReduceFeatures(JsonString), new ReduceFeatures(JsonString)) score shouldBe Scorable.MaxScore } + + "computeOutput()" should "be case-insensitive" in { + val left = JsonString.replace("<<TITLE>>", "A TITLE UPPER CASE") + val right = JsonString.replace("<<TITLE>>", "a title upper case") + val score = Scorable.computeSimilarity( + new ReduceFeatures(left), new ReduceFeatures(right)) + score shouldBe Scorable.MaxScore + } } |