aboutsummaryrefslogtreecommitdiffstats
path: root/scalding
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-23 17:17:04 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-23 17:17:06 -0700
commit0e847f052b3347e74921361fa2ab001d54c75105 (patch)
tree8894e335ebdc5bf36faad08b6ecb707af3c8056d /scalding
parent1f0e447cfb4c739a9a39b9af27a0c86d2d46bc7e (diff)
downloadsandcrawler-0e847f052b3347e74921361fa2ab001d54c75105.tar.gz
sandcrawler-0e847f052b3347e74921361fa2ab001d54c75105.zip
increase MaxTitleLength from 255 to 1023
Motivated after finding some long titles with MathML mixed in. Until this issue can be investigated further, bumping this limit to pass the handful of matches found.
Diffstat (limited to 'scalding')
-rw-r--r--scalding/src/main/scala/sandcrawler/Scorable.scala2
1 files changed, 1 insertions, 1 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala
index c704ed9..5aac032 100644
--- a/scalding/src/main/scala/sandcrawler/Scorable.scala
+++ b/scalding/src/main/scala/sandcrawler/Scorable.scala
@@ -30,7 +30,7 @@ abstract class Scorable {
}
object Scorable {
- val MaxTitleLength = 255
+ val MaxTitleLength = 1023
val NoSlug = "NO SLUG" // Used for slug if title is empty or unparseable
def isValidSlug(slug : String) : Boolean = {