aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-25 13:00:54 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-25 13:00:54 +0100
commit4a9633f0f989f4103a5c35721c5984e21a5d2192 (patch)
tree3f6c819fc951ca16844df06ba61cfab209e74466 /tests
parent368d2f75b5ef80bba822f38632787c2918317200 (diff)
downloadfuzzycat-4a9633f0f989f4103a5c35721c5984e21a5d2192.tar.gz
fuzzycat-4a9633f0f989f4103a5c35721c5984e21a5d2192.zip
move enums into common
Diffstat (limited to 'tests')
-rw-r--r--tests/test_utils.py37
1 files changed, 36 insertions, 1 deletions
diff --git a/tests/test_utils.py b/tests/test_utils.py
index ea188a4..bc0d918 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,6 @@
import pytest
-from fuzzycat.utils import author_similarity_score, cut, slugify_string
+from fuzzycat.utils import author_similarity_score, cut, slugify_string, jaccard, token_n_grams, tokenize_string, nwise
def test_slugify_string():
@@ -28,3 +28,38 @@ def test_author_similarity_score():
assert author_similarity_score("", "") == 0.0
assert author_similarity_score("Gregor Samsa", "G. Samsa") == 0.42857142857142855
assert author_similarity_score("Geronimo Samsa", "G. Samsa") == 0.375
+
+
+def test_jaccard():
+ assert jaccard(set(), set()) == 0
+ assert jaccard(set(["a"]), set()) == 0
+ assert jaccard(set(["a"]), set(["a"])) == 1.0
+ assert jaccard(set(["a", "b"]), set(["a"])) == 0.5
+ assert jaccard(set(["a"]), set(["a", "b"])) == 0.5
+ assert jaccard(set(["a", "b", "c"]), set(["a", "c"])) == 2 / 3
+
+
+def test_token_n_grams():
+ assert token_n_grams("") == []
+ assert token_n_grams("a") == ["a"]
+ assert token_n_grams("abc") == ["ab", "c"]
+ assert token_n_grams("abc", n=3) == ["abc"]
+ assert token_n_grams("abc", n=1) == ["a", "b", "c"]
+ assert token_n_grams("abc hello world", n=3) == ["abc", "hel", "lo", "wor", "ld"]
+
+
+def test_tokenize_string():
+ assert tokenize_string("") == []
+ assert tokenize_string("a") == ["a"]
+ assert tokenize_string("a b") == ["a", "b"]
+ assert tokenize_string("a b ") == ["a", "b"]
+ assert tokenize_string("a b=c") == ["a", "b=c"]
+ assert tokenize_string("a b 1999") == ["a", "b", "1999"]
+ assert tokenize_string("a?b*1999") == ["a?b*1999"]
+
+
+def test_nwise():
+ assert list(nwise("1234")) == [("1", "2"), ("3", "4")]
+ assert list(nwise("1234", n=1)) == [("1", ), ("2", ), ("3", ), ("4", )]
+ assert list(nwise([1, 2, 3, 4, 5], n=3)) == [(1, 2, 3), (4, 5)]
+