diff options
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/Scorable.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/Scorable.scala | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 5d67044..d9c38e8 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -60,6 +60,25 @@ object Scorable { val MaxScore = 1000 + def selfMatchable(features1 : ReduceFeatures, features2 : ReduceFeatures) : Boolean = { + val json1 = jsonToMap(features1.json) + val json2 = jsonToMap(features2.json) + + ( + getStringOption(json1, "fatcat_release") != None && + getStringOption(json2, "fatcat_release") != None && + getStringOption(json1, "fatcat_release") != getStringOption(json2, "fatcat_release") && + (getStringOption(json1, "fatcat_work") match { + case None => false + case Some(work1) => getStringOption(json2, "fatcat_work") match { + case None => false + // this last check ensures we don't double-match + case Some(work2) => work1 > work2 + } + }) + ) + } + def computeSimilarity(features1 : ReduceFeatures, features2 : ReduceFeatures) : Int = { val json1 = jsonToMap(features1.json) val json2 = jsonToMap(features2.json) |