diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-22 12:52:03 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-22 12:52:03 -0700 |
commit | f54e47ace6dd041e78e10ee25573c6ad3de808eb (patch) | |
tree | 21931c4efd9997d69227623aba0cff224907dd44 /scalding/src/main/scala/sandcrawler/Scorable.scala | |
parent | b628b7026ab8e7abf4beeaaad99d831b49578483 (diff) | |
download | sandcrawler-f54e47ace6dd041e78e10ee25573c6ad3de808eb.tar.gz sandcrawler-f54e47ace6dd041e78e10ee25573c6ad3de808eb.zip |
Added title length filtering to GrobidScorable
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/Scorable.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/Scorable.scala | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 9b9c633..c704ed9 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -30,6 +30,7 @@ abstract class Scorable { } object Scorable { + val MaxTitleLength = 255 val NoSlug = "NO SLUG" // Used for slug if title is empty or unparseable def isValidSlug(slug : String) : Boolean = { |