aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--scalding/src/main/scala/sandcrawler/Scorable.scala2
-rw-r--r--scalding/src/test/scala/sandcrawler/ScorableTest.scala8
2 files changed, 9 insertions, 1 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala
index 5aac032..f7eb95d 100644
--- a/scalding/src/main/scala/sandcrawler/Scorable.scala
+++ b/scalding/src/main/scala/sandcrawler/Scorable.scala
@@ -72,7 +72,7 @@ object Scorable {
getStringOption(json2, "title") match {
case None => 0
case Some(title2) =>
- (StringUtilities.similarity(title1, title2) * MaxScore).toInt
+ (StringUtilities.similarity(title1.toLowerCase, title2.toLowerCase) * MaxScore).toInt
}
}
}
diff --git a/scalding/src/test/scala/sandcrawler/ScorableTest.scala b/scalding/src/test/scala/sandcrawler/ScorableTest.scala
index f63bef8..2094543 100644
--- a/scalding/src/test/scala/sandcrawler/ScorableTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScorableTest.scala
@@ -70,4 +70,12 @@ class ScorableTest extends FlatSpec with Matchers {
new ReduceFeatures(JsonString), new ReduceFeatures(JsonString))
score shouldBe Scorable.MaxScore
}
+
+ "computeOutput()" should "be case-insensitive" in {
+ val left = JsonString.replace("<<TITLE>>", "A TITLE UPPER CASE")
+ val right = JsonString.replace("<<TITLE>>", "a title upper case")
+ val score = Scorable.computeSimilarity(
+ new ReduceFeatures(left), new ReduceFeatures(right))
+ score shouldBe Scorable.MaxScore
+ }
}