diff options
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/ScorableFeatures.scala')
| -rw-r--r-- | scalding/src/main/scala/sandcrawler/ScorableFeatures.scala | 34 | 
1 files changed, 21 insertions, 13 deletions
| diff --git a/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala b/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala index 8ed3369..d9461e7 100644 --- a/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala +++ b/scalding/src/main/scala/sandcrawler/ScorableFeatures.scala @@ -2,12 +2,20 @@ package sandcrawler  import scala.util.parsing.json.JSONObject +object ScorableFeatures { +  def create(title : String, year : Int = 0, doi : String = "", sha1 : String = "") : ScorableFeatures = { +    new ScorableFeatures( +      title=if (title == null) "" else title, +      year=year, +      doi=if (doi == null) "" else doi, +      sha1=if (sha1 == null) "" else sha1) +  } +}  // Contains features needed to make slug and to score (in combination -// with a second ScorableFeatures). -class ScorableFeatures(title : String, year: Int = 0, doi : String = "", sha1: String = "") { - -  val slugBlacklist = Set( "abbreviations", "abstract", "acknowledgements", +// with a second ScorableFeatures). Create with above static factory method. +class ScorableFeatures private(title : String, year: Int = 0, doi : String = "", sha1: String = "") { +  val SlugBlacklist = Set( "abbreviations", "abstract", "acknowledgements",      "article", "authorreply", "authorsreply", "bookreview", "bookreviews",      "casereport", "commentary", "commentaryon", "commenton", "commentto",      "contents", "correspondence", "dedication", "editorialadvisoryboard", @@ -16,15 +24,15 @@ class ScorableFeatures(title : String, year: Int = 0, doi : String = "", sha1: S      "references", "results", "review", "reviewarticle", "summary", "title",      "name") -  def toMap() : Map[String, Any] = { -    Map("title" -> (if (title == null) "" else title), -        "year" -> year, -        "doi" -> (if (doi == null) "" else doi), -        "sha1" -> (if (sha1 == null) "" else sha1)) -  } +  def toMap() : Map[String, Any] = Map("title" -> title, "year" -> year, "doi" -> doi, "sha1" -> sha1)    override def toString() : String = { -    JSONObject(toMap()).toString +    val myMap = toMap() +    assert(myMap("title") != null) +    assert(myMap("year") != null) +    assert(myMap("doi") != null) +    assert(myMap("sha1") != null) +    JSONObject(myMap).toString    }    def toSlug() : String = { @@ -34,11 +42,11 @@ class ScorableFeatures(title : String, year: Int = 0, doi : String = "", sha1: S        val unaccented = StringUtilities.removeAccents(title)        // Remove punctuation        val slug = StringUtilities.removePunctuation((unaccented.toLowerCase())).replaceAll("\\s", "") -      if (slug.isEmpty || slug == null || (slugBlacklist contains slug)) Scorable.NoSlug else slug +      if (slug.isEmpty || slug == null || (SlugBlacklist contains slug)) Scorable.NoSlug else slug      }    } -  def toMapFeatures = { +  def toMapFeatures : MapFeatures = {      MapFeatures(toSlug, toString)    }  } | 
