aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
diff options
context:
space:
mode:
authorEllen Spertus <ellen.spertus@gmail.com>2018-08-07 11:05:23 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-08-07 11:05:23 -0700
commit8dc3bf5c6f68d1fffa9f940ba1024ed95e76ed64 (patch)
treef515c25882aebeb5edb8d8a13e06e457e19a4fb4 /scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
parent408123177b9e8afd145ea0f0fa1d6bb449f1bd20 (diff)
downloadsandcrawler-8dc3bf5c6f68d1fffa9f940ba1024ed95e76ed64.tar.gz
sandcrawler-8dc3bf5c6f68d1fffa9f940ba1024ed95e76ed64.zip
Added GrobidScorableTest, minor improvements.
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/CrossrefScorable.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/CrossrefScorable.scala19
1 files changed, 18 insertions, 1 deletions
diff --git a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
index 0849aff..cf5849c 100644
--- a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
+++ b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
@@ -15,10 +15,27 @@ class CrossrefScorable extends Scorable {
.read
.toTypedPipe[String](new Fields("line"))
.map{ json : String =>
- HBaseCrossrefScore.crossrefToSlug(json) match {
+ CrossrefScorable.crossrefToSlug(json) match {
case Some(slug) => new MapFeatures(slug, json)
case None => new MapFeatures(Scorable.NoSlug, json)
}
}
}
}
+
+object CrossrefScorable {
+ def crossrefToSlug(json : String) : Option[String] = {
+ Scorable.jsonToMap(json) match {
+ case None => None
+ case Some(map) => {
+ if (map contains "title") {
+ // TODO: Don't ignore titles after the first.
+ val title = map("title").asInstanceOf[List[String]](0)
+ Some(Scorable.titleToSlug(title))
+ } else {
+ None
+ }
+ }
+ }
+ }
+}