aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/BibjsonScorable.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/BibjsonScorable.scala15
1 files changed, 9 insertions, 6 deletions
diff --git a/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala b/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala
index 0d26d75..abf9220 100644
--- a/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala
+++ b/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala
@@ -15,7 +15,7 @@ class BibjsonScorable extends Scorable {
TextLine(args("bibjson-input"))
}
- def getFeaturesPipe(args : Args)(implicit mode : Mode, flowDef : FlowDef) : TypedPipe[MapFeatures] = {
+ def getFeaturesPipe(args : Args)(implicit mode : Mode, flowDef : FlowDef) : TypedPipe[Option[MapFeatures]] = {
getSource(args).read
.toTypedPipe[String](new Fields("line"))
.map { BibjsonScorable.bibjsonToMapFeatures(_) }
@@ -23,9 +23,9 @@ class BibjsonScorable extends Scorable {
}
object BibjsonScorable {
- def bibjsonToMapFeatures(json : String) : MapFeatures = {
+ def bibjsonToMapFeatures(json : String) : Option[MapFeatures] = {
Scorable.jsonToMap(json) match {
- case None => MapFeatures(Scorable.NoSlug, json)
+ case None => None
case Some(map) => {
if (map contains "title") {
val title = Scorable.getString(map, "title")
@@ -33,13 +33,16 @@ object BibjsonScorable {
val sha1 = Scorable.getString(map, "sha")
// TODO: year, authors (if available)
if (title == null || title.isEmpty) {
- new MapFeatures(Scorable.NoSlug, json)
+ None
} else {
val sf : ScorableFeatures = ScorableFeatures.create(title=title, doi=doi, sha1=sha1)
- new MapFeatures(sf.toSlug, sf.toString)
+ sf.toSlug match {
+ case None => None
+ case Some(slug) => Some(MapFeatures(slug, sf.toString))
+ }
}
} else {
- new MapFeatures(Scorable.NoSlug, json)
+ None
}
}
}