diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/data/zstd/empty.txt | 0 | ||||
-rw-r--r-- | tests/data/zstd/empty.txt.zst | bin | 0 -> 13 bytes | |||
-rw-r--r-- | tests/data/zstd/lines.txt | 9 | ||||
-rw-r--r-- | tests/data/zstd/lines.txt.zst | bin | 0 -> 31 bytes | |||
-rw-r--r-- | tests/data/zstd/single.txt | 1 | ||||
-rw-r--r-- | tests/data/zstd/single.txt.zst | bin | 0 -> 18 bytes | |||
-rw-r--r-- | tests/test_cluster.py | 13 | ||||
-rw-r--r-- | tests/test_utils.py | 16 |
8 files changed, 31 insertions, 8 deletions
diff --git a/tests/data/zstd/empty.txt b/tests/data/zstd/empty.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/data/zstd/empty.txt diff --git a/tests/data/zstd/empty.txt.zst b/tests/data/zstd/empty.txt.zst Binary files differnew file mode 100644 index 0000000..e58c09d --- /dev/null +++ b/tests/data/zstd/empty.txt.zst diff --git a/tests/data/zstd/lines.txt b/tests/data/zstd/lines.txt new file mode 100644 index 0000000..0719398 --- /dev/null +++ b/tests/data/zstd/lines.txt @@ -0,0 +1,9 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/data/zstd/lines.txt.zst b/tests/data/zstd/lines.txt.zst Binary files differnew file mode 100644 index 0000000..bc9be49 --- /dev/null +++ b/tests/data/zstd/lines.txt.zst diff --git a/tests/data/zstd/single.txt b/tests/data/zstd/single.txt new file mode 100644 index 0000000..4b37d57 --- /dev/null +++ b/tests/data/zstd/single.txt @@ -0,0 +1 @@ +zzzz diff --git a/tests/data/zstd/single.txt.zst b/tests/data/zstd/single.txt.zst Binary files differnew file mode 100644 index 0000000..47e377f --- /dev/null +++ b/tests/data/zstd/single.txt.zst diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 3ad32a7..793798b 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -109,20 +109,19 @@ def test_release_key_title_nysiis(): def test_cluster(): sio = io.StringIO() - cluster = Cluster([ - json.dumps(line) for line in [ + lines = [ + json.dumps(doc) for doc in [ { "title": "hello world", - "ident": 1 + "ident": 1, }, { "title": "hello world!", - "ident": 2 + "ident": 2, }, ] - ], - release_key_title_normalized, - output=sio) + ] + cluster = Cluster(lines, release_key_title_normalized, output=sio) stats = cluster.run() assert stats == { "key_fail": 0, diff --git a/tests/test_utils.py b/tests/test_utils.py index fa930fe..29b125b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,9 @@ import pytest +import os from fuzzycat.utils import (author_similarity_score, cut, jaccard, nwise, slugify_string, - token_n_grams, tokenize_string, parse_page_string, dict_key_exists) + token_n_grams, tokenize_string, parse_page_string, dict_key_exists, + zstdlines) def test_slugify_string(): @@ -84,3 +86,15 @@ def test_page_page_string(): assert parse_page_string("123-125") == (123, 125, 3) assert parse_page_string("123-124a") == (123, 124, 2) assert parse_page_string("1-1000") == (1, 1000, 1000) + + +def test_zstdlines(): + test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data/zstd") + examples = ( + (os.path.join(test_dir, "lines.txt.zst"), os.path.join(test_dir, "lines.txt")), + (os.path.join(test_dir, "empty.txt.zst"), os.path.join(test_dir, "empty.txt")), + (os.path.join(test_dir, "single.txt.zst"), os.path.join(test_dir, "single.txt")), + ) + for zfn, fn in examples: + with open(fn) as f: + assert [s.strip() for s in f.readlines()] == list(zstdlines(zfn)) |