aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat/cluster.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-13 02:14:26 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-13 02:14:26 +0100
commit1f91606501754bf8d3fa8b3075a05c147470c7bb (patch)
tree87e7a93e825fca250835533d536b597323659436 /fuzzycat/cluster.py
parent4eab32b1c5929b2d7f2e6d8fed76bdb49bf9c699 (diff)
downloadfuzzycat-1f91606501754bf8d3fa8b3075a05c147470c7bb.tar.gz
fuzzycat-1f91606501754bf8d3fa8b3075a05c147470c7bb.zip
wip: verification
Output currently (1m sample): { "unique": 916075, "too_large": 575, "dummy": 10307, "contrib_miss": 27215, "short_title": 1379, "arxiv_v": 8943 }
Diffstat (limited to 'fuzzycat/cluster.py')
-rw-r--r--fuzzycat/cluster.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py
index 87b010e..8eb409c 100644
--- a/fuzzycat/cluster.py
+++ b/fuzzycat/cluster.py
@@ -106,7 +106,6 @@ class KeyDoc:
"""
ident: str
title: str
- contribs: List[Contrib] = field(default_factory=list)
@dataclass
@@ -430,7 +429,8 @@ class Cluster:
key: Callable[[Any], str] = None) -> Generator[Any, None, None]:
"""
Extract a key from elements of an iterable and group them. Just as
- uniq(1), the iterable must be ordered for this to work.
+ uniq(1), the iterable must be ordered (by the key that is extracted)
+ for this to work.
"""
for k, g in itertools.groupby(seq, key=key):
items = list(g)