diff options
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/ScorableFeatures.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/ScorableFeatures.scala | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala b/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala index 9eb03f7..241db79 100644 --- a/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala +++ b/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala @@ -3,6 +3,7 @@ package sandcrawler import java.io.InputStream import scala.io.Source +import scala.util.parsing.json.JSONArray import scala.util.parsing.json.JSONObject object ScorableFeatures { @@ -13,9 +14,10 @@ object ScorableFeatures { val MinSlugLength = 8 // Static factory method - def create(title : String, year : Int = 0, doi : String = "", sha1 : String = "") : ScorableFeatures = { + def create(title : String, authors : List[Any] = List(), year : Int = 0, doi : String = "", sha1 : String = "") : ScorableFeatures = { new ScorableFeatures( title=if (title == null) "" else title, + authors=if (authors == null) List() else authors.map(a => if (a == null) "" else a), year=year, doi=if (doi == null) "" else doi, sha1=if (sha1 == null) "" else sha1) @@ -24,13 +26,14 @@ object ScorableFeatures { // Contains features needed to make slug and to score (in combination // with a second ScorableFeatures). Create with above static factory method. -class ScorableFeatures private(title : String, year: Int = 0, doi : String = "", sha1: String = "") { +class ScorableFeatures private(title : String, authors : List[Any] = List(), year: Int = 0, doi : String = "", sha1: String = "") { def toMap() : Map[String, Any] = - Map("title" -> title, "year" -> year, "doi" -> doi, "sha1" -> sha1) + Map("title" -> title, "authors" -> JSONArray(authors), "year" -> year, "doi" -> doi, "sha1" -> sha1) - override def toString() : String = + override def toString() : String = { JSONObject(toMap).toString + } def toSlug() : String = { if (title == null) { |