aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-17 23:04:11 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-17 23:04:11 +0100
commit2ad6ea7f9d2b77ca766caaf9500ad06381177694 (patch)
tree33cee2b80822868120be98c4770aae621cce68b3 /fuzzycat
parent01e95738c6d2bbbb4eb07a0e48db7866bfdbc302 (diff)
downloadfuzzycat-2ad6ea7f9d2b77ca766caaf9500ad06381177694.tar.gz
fuzzycat-2ad6ea7f9d2b77ca766caaf9500ad06381177694.zip
cluster: log progress
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/cluster.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py
index 8eb409c..0aa2fd4 100644
--- a/fuzzycat/cluster.py
+++ b/fuzzycat/cluster.py
@@ -379,7 +379,9 @@ class Cluster:
Outline: json -> tsv -> sort -> group -> json
"""
with tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=self.prefix) as tf:
- for line in self.iterable:
+ for i, line in enumerate(self.iterable):
+ if i % 100000 == 0:
+ print("@{}".format(i), file=sys.stderr)
try:
doc = json.loads(line)
id, key = self.key(doc)