aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-27 16:35:52 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-27 16:35:52 -0700
commit309f40b66d474f12c0cfe60c449d43ae4bacb912 (patch)
tree510aa40e89d1563f98abd435d07f376487c626ac /scalding/src/main
parentf8a0c99b270ebcd6e239c6e26190cf7200389ced (diff)
downloadsandcrawler-309f40b66d474f12c0cfe60c449d43ae4bacb912.tar.gz
sandcrawler-309f40b66d474f12c0cfe60c449d43ae4bacb912.zip
basic crossref subtitle concatination support
Diffstat (limited to 'scalding/src/main')
-rw-r--r--scalding/src/main/scala/sandcrawler/CrossrefScorable.scala23
1 files changed, 22 insertions, 1 deletions
diff --git a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
index 039fa85..f51c210 100644
--- a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
+++ b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
@@ -60,7 +60,28 @@ object CrossrefScorable {
if (titles.isEmpty || titles == null) {
None
} else {
- val title = titles(0)
+ val baseTitle: String = titles(0)
+ // TODO(bnewbold): this code block is horrible
+ val baseSubtitle: String = if (map contains "subtitle") {
+ val subtitles = map("subtitle").asInstanceOf[List[String]]
+ if (!subtitles.isEmpty && subtitles != null) {
+ val sub = subtitles(0)
+ if (sub != null && !sub.isEmpty && baseTitle != null) {
+ sub
+ } else {
+ ""
+ }
+ } else {
+ ""
+ }
+ } else {
+ ""
+ }
+ val title = if (baseSubtitle.isEmpty) {
+ baseTitle
+ } else {
+ baseTitle.concat(": ".concat(baseSubtitle))
+ }
if (title == null || title.isEmpty || title.length > Scorable.MaxTitleLength) None else Some(title)
}
} else {