From b4eac17049e19d33b1a55664a7258c0f62f8a8c7 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 27 Aug 2018 16:40:15 -0700 Subject: make similarity score case-insensitive --- scalding/src/main/scala/sandcrawler/Scorable.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scalding/src/main') diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 5aac032..f7eb95d 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -72,7 +72,7 @@ object Scorable { getStringOption(json2, "title") match { case None => 0 case Some(title2) => - (StringUtilities.similarity(title1, title2) * MaxScore).toInt + (StringUtilities.similarity(title1.toLowerCase, title2.toLowerCase) * MaxScore).toInt } } } -- cgit v1.2.3