diff options
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/BibjsonScorable.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/BibjsonScorable.scala | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala b/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala index 0d26d75..abf9220 100644 --- a/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala +++ b/scalding/src/main/scala/sandcrawler/BibjsonScorable.scala @@ -15,7 +15,7 @@ class BibjsonScorable extends Scorable { TextLine(args("bibjson-input")) } - def getFeaturesPipe(args : Args)(implicit mode : Mode, flowDef : FlowDef) : TypedPipe[MapFeatures] = { + def getFeaturesPipe(args : Args)(implicit mode : Mode, flowDef : FlowDef) : TypedPipe[Option[MapFeatures]] = { getSource(args).read .toTypedPipe[String](new Fields("line")) .map { BibjsonScorable.bibjsonToMapFeatures(_) } @@ -23,9 +23,9 @@ class BibjsonScorable extends Scorable { } object BibjsonScorable { - def bibjsonToMapFeatures(json : String) : MapFeatures = { + def bibjsonToMapFeatures(json : String) : Option[MapFeatures] = { Scorable.jsonToMap(json) match { - case None => MapFeatures(Scorable.NoSlug, json) + case None => None case Some(map) => { if (map contains "title") { val title = Scorable.getString(map, "title") @@ -33,13 +33,16 @@ object BibjsonScorable { val sha1 = Scorable.getString(map, "sha") // TODO: year, authors (if available) if (title == null || title.isEmpty) { - new MapFeatures(Scorable.NoSlug, json) + None } else { val sf : ScorableFeatures = ScorableFeatures.create(title=title, doi=doi, sha1=sha1) - new MapFeatures(sf.toSlug, sf.toString) + sf.toSlug match { + case None => None + case Some(slug) => Some(MapFeatures(slug, sf.toString)) + } } } else { - new MapFeatures(Scorable.NoSlug, json) + None } } } |