aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_cluster.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-05 16:02:38 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-05 16:02:38 +0100
commita125f6d1354bb2e38e774c7e204d8a640555fca0 (patch)
tree5396f9f5b1ddc6c31979c1f0ad5bf52cd63a32bf /tests/test_cluster.py
parent242ad04f821294b27e1cbc85beed06099a764d5f (diff)
downloadfuzzycat-a125f6d1354bb2e38e774c7e204d8a640555fca0.tar.gz
fuzzycat-a125f6d1354bb2e38e774c7e204d8a640555fca0.zip
add cluster tests
Diffstat (limited to 'tests/test_cluster.py')
-rw-r--r--tests/test_cluster.py138
1 files changed, 119 insertions, 19 deletions
diff --git a/tests/test_cluster.py b/tests/test_cluster.py
index aa015a6..7439e15 100644
--- a/tests/test_cluster.py
+++ b/tests/test_cluster.py
@@ -1,9 +1,12 @@
import pytest
import collections
-from fuzzycat.cluster import release_key_title, release_key_title_normalized, release_key_title_nysiis
+import tempfile
+from fuzzycat.cluster import release_key_title, release_key_title_normalized, release_key_title_nysiis, sort_by_column, group_by
+import os
Case = collections.namedtuple("Case", 'input output')
+
def test_release_key_title():
with pytest.raises(KeyError):
release_key_title({})
@@ -14,9 +17,18 @@ def test_release_key_title():
with pytest.raises(ValueError, match='title.*missing'):
release_key_title({'ident': '', 'title': ''})
cases = (
- Case(input={'ident': '', 'title': 'simhash'}, output=('', 'simhash')),
- Case(input={'ident': '', 'title': 'Simhash'}, output=('', 'Simhash')),
- Case(input={'ident': '', 'title': 'Sim hash'}, output=('', 'Sim hash')),
+ Case(input={
+ 'ident': '',
+ 'title': 'simhash'
+ }, output=('', 'simhash')),
+ Case(input={
+ 'ident': '',
+ 'title': 'Simhash'
+ }, output=('', 'Simhash')),
+ Case(input={
+ 'ident': '',
+ 'title': 'Sim hash'
+ }, output=('', 'Sim hash')),
)
for case in cases:
assert case.output == release_key_title(case.input)
@@ -24,25 +36,113 @@ def test_release_key_title():
def test_release_key_title_normalized():
cases = (
- Case(input={'ident': '', 'title': 'simhash'}, output=('', 'simhash')),
- Case(input={'ident': '', 'title': 'Simhash'}, output=('', 'simhash')),
- Case(input={'ident': '', 'title': 'Sim hash'}, output=('', 'simhash')),
- Case(input={'ident': '', 'title': 'THE year 1929'}, output=('', 'theyear1929')),
- Case(input={'ident': '', 'title': '2019?'}, output=('', '2019')),
- Case(input={'ident': '123', 'title': 'H~~2019?'}, output=('123', 'h2019')),
+ Case(input={
+ 'ident': '',
+ 'title': 'simhash'
+ }, output=('', 'simhash')),
+ Case(input={
+ 'ident': '',
+ 'title': 'Simhash'
+ }, output=('', 'simhash')),
+ Case(input={
+ 'ident': '',
+ 'title': 'Sim hash'
+ }, output=('', 'simhash')),
+ Case(input={
+ 'ident': '',
+ 'title': 'THE year 1929'
+ }, output=('', 'theyear1929')),
+ Case(input={
+ 'ident': '',
+ 'title': '2019?'
+ }, output=('', '2019')),
+ Case(input={
+ 'ident': '123',
+ 'title': 'H~~2019?'
+ }, output=('123', 'h2019')),
)
for case in cases:
- assert case.output == release_key_title_normalized(case.input), 'failed case {}'.format(case.input)
+ assert case.output == release_key_title_normalized(case.input), 'failed case {}'.format(
+ case.input)
+
def test_release_key_title_nysiis():
cases = (
- Case(input={'ident': '', 'title': 'simhash'}, output=('', 'SANAS')),
- Case(input={'ident': '', 'title': 'Simhash'}, output=('', 'SANAS')),
- Case(input={'ident': '', 'title': 'Sim hash'}, output=('', 'SANAS')),
- Case(input={'ident': '', 'title': 'THE year 1929'}, output=('', 'TAR')),
- Case(input={'ident': '', 'title': '2019?'}, output=('', '')),
- Case(input={'ident': '123', 'title': 'H~~2019?'}, output=('123', 'H')),
- Case(input={'ident': '123', 'title': '世界'}, output=('123', '')),
+ Case(input={
+ 'ident': '',
+ 'title': 'simhash'
+ }, output=('', 'SANAS')),
+ Case(input={
+ 'ident': '',
+ 'title': 'Simhash'
+ }, output=('', 'SANAS')),
+ Case(input={
+ 'ident': '',
+ 'title': 'Sim hash'
+ }, output=('', 'SANAS')),
+ Case(input={
+ 'ident': '',
+ 'title': 'THE year 1929'
+ }, output=('', 'TAR')),
+ Case(input={
+ 'ident': '',
+ 'title': '2019?'
+ }, output=('', '')),
+ Case(input={
+ 'ident': '123',
+ 'title': 'H~~2019?'
+ }, output=('123', 'H')),
+ Case(input={
+ 'ident': '123',
+ 'title': '世界'
+ }, output=('123', '')),
)
for case in cases:
- assert case.output == release_key_title_nysiis(case.input), 'failed case {}'.format(case.input)
+ assert case.output == release_key_title_nysiis(case.input), 'failed case {}'.format(
+ case.input)
+
+
+def test_sort_by_column():
+ with tempfile.NamedTemporaryFile(delete=False, mode="w") as tf:
+ tf.write("2 b\n")
+ tf.write("2 a\n")
+ tf.write("9 d\n")
+ tf.write("1 c\n")
+
+ fn = sort_by_column(tf.name, opts='-k 2')
+ with open(fn) as f:
+ lines = [v.strip() for v in f]
+ assert lines == ['2 a', '2 b', '1 c', '9 d']
+
+ fn = sort_by_column(tf.name, opts='-k 1')
+ with open(fn) as f:
+ lines = [v.strip() for v in f]
+ assert lines == ['1 c', '2 a', '2 b', '9 d']
+
+ fn = sort_by_column(tf.name, opts='-k 3')
+ with open(fn) as f:
+ lines = [v.strip() for v in f]
+ assert lines == ['1 c', '2 a', '2 b', '9 d']
+
+
+def test_group_by():
+ Case = collections.namedtuple("Case", "seq keyfunc valuefunc result")
+ cases = (
+ Case(["0", "1"], lambda v: v, lambda v: v, [{
+ 'k': '0',
+ 'v': ['0']
+ }, {
+ 'k': '1',
+ 'v': ['1']
+ }]),
+ Case(["a 1", "a 2", "b 3"], lambda v: v.split()[0], lambda v: v.split()[1], [{
+ 'k': 'a',
+ 'v': ['1', '2']
+ }, {
+ 'k': 'b',
+ 'v': ['3']
+ }]),
+ )
+
+ for case in cases:
+ assert case.result == list(group_by(case.seq, case.keyfunc, case.valuefunc))