aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-07 00:53:03 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-07 00:53:03 +0100
commit3963772ae9401395eb17fca16de9a27fe7bf6681 (patch)
treeb1866529839f72c632c7085ee51bfc22f9691370
parente5c8e80fa246899fe95008fe7b599b6efe0e686e (diff)
downloadfuzzycat-3963772ae9401395eb17fca16de9a27fe7bf6681.tar.gz
fuzzycat-3963772ae9401395eb17fca16de9a27fe7bf6681.zip
emit title slug
-rw-r--r--fuzzycat/build.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/fuzzycat/build.py b/fuzzycat/build.py
index a37453d..ee128c8 100644
--- a/fuzzycat/build.py
+++ b/fuzzycat/build.py
@@ -99,6 +99,6 @@ class NgramLookup:
tokens = [tok for tok in word_tokenize(title.lower()) if tok not in self.stopwords]
prefix = "-".join(tokens[:self.n])
suffix = "-".join(tokens[-self.n:])
- print("{}\t{}\t{}".format(id, prefix, suffix))
+ print("{}\t{}\t{}\t{}-{}".format(id, prefix, suffix, prefix, suffix))
except KeyError as exc:
print("skipping doc w/o title: {} - {}".format(line, exc), file=sys.stderr)