aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/data/zstd/empty.txt0
-rw-r--r--tests/data/zstd/empty.txt.zstbin0 -> 13 bytes
-rw-r--r--tests/data/zstd/lines.txt9
-rw-r--r--tests/data/zstd/lines.txt.zstbin0 -> 31 bytes
-rw-r--r--tests/data/zstd/single.txt1
-rw-r--r--tests/data/zstd/single.txt.zstbin0 -> 18 bytes
-rw-r--r--tests/test_cluster.py13
-rw-r--r--tests/test_utils.py16
8 files changed, 31 insertions, 8 deletions
diff --git a/tests/data/zstd/empty.txt b/tests/data/zstd/empty.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/data/zstd/empty.txt
diff --git a/tests/data/zstd/empty.txt.zst b/tests/data/zstd/empty.txt.zst
new file mode 100644
index 0000000..e58c09d
--- /dev/null
+++ b/tests/data/zstd/empty.txt.zst
Binary files differ
diff --git a/tests/data/zstd/lines.txt b/tests/data/zstd/lines.txt
new file mode 100644
index 0000000..0719398
--- /dev/null
+++ b/tests/data/zstd/lines.txt
@@ -0,0 +1,9 @@
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/tests/data/zstd/lines.txt.zst b/tests/data/zstd/lines.txt.zst
new file mode 100644
index 0000000..bc9be49
--- /dev/null
+++ b/tests/data/zstd/lines.txt.zst
Binary files differ
diff --git a/tests/data/zstd/single.txt b/tests/data/zstd/single.txt
new file mode 100644
index 0000000..4b37d57
--- /dev/null
+++ b/tests/data/zstd/single.txt
@@ -0,0 +1 @@
+zzzz
diff --git a/tests/data/zstd/single.txt.zst b/tests/data/zstd/single.txt.zst
new file mode 100644
index 0000000..47e377f
--- /dev/null
+++ b/tests/data/zstd/single.txt.zst
Binary files differ
diff --git a/tests/test_cluster.py b/tests/test_cluster.py
index 3ad32a7..793798b 100644
--- a/tests/test_cluster.py
+++ b/tests/test_cluster.py
@@ -109,20 +109,19 @@ def test_release_key_title_nysiis():
def test_cluster():
sio = io.StringIO()
- cluster = Cluster([
- json.dumps(line) for line in [
+ lines = [
+ json.dumps(doc) for doc in [
{
"title": "hello world",
- "ident": 1
+ "ident": 1,
},
{
"title": "hello world!",
- "ident": 2
+ "ident": 2,
},
]
- ],
- release_key_title_normalized,
- output=sio)
+ ]
+ cluster = Cluster(lines, release_key_title_normalized, output=sio)
stats = cluster.run()
assert stats == {
"key_fail": 0,
diff --git a/tests/test_utils.py b/tests/test_utils.py
index fa930fe..29b125b 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,9 @@
import pytest
+import os
from fuzzycat.utils import (author_similarity_score, cut, jaccard, nwise, slugify_string,
- token_n_grams, tokenize_string, parse_page_string, dict_key_exists)
+ token_n_grams, tokenize_string, parse_page_string, dict_key_exists,
+ zstdlines)
def test_slugify_string():
@@ -84,3 +86,15 @@ def test_page_page_string():
assert parse_page_string("123-125") == (123, 125, 3)
assert parse_page_string("123-124a") == (123, 124, 2)
assert parse_page_string("1-1000") == (1, 1000, 1000)
+
+
+def test_zstdlines():
+ test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data/zstd")
+ examples = (
+ (os.path.join(test_dir, "lines.txt.zst"), os.path.join(test_dir, "lines.txt")),
+ (os.path.join(test_dir, "empty.txt.zst"), os.path.join(test_dir, "empty.txt")),
+ (os.path.join(test_dir, "single.txt.zst"), os.path.join(test_dir, "single.txt")),
+ )
+ for zfn, fn in examples:
+ with open(fn) as f:
+ assert [s.strip() for s in f.readlines()] == list(zstdlines(zfn))