From 2ad6ea7f9d2b77ca766caaf9500ad06381177694 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 17 Nov 2020 23:04:11 +0100 Subject: cluster: log progress --- fuzzycat/cluster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py index 8eb409c..0aa2fd4 100644 --- a/fuzzycat/cluster.py +++ b/fuzzycat/cluster.py @@ -379,7 +379,9 @@ class Cluster: Outline: json -> tsv -> sort -> group -> json """ with tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=self.prefix) as tf: - for line in self.iterable: + for i, line in enumerate(self.iterable): + if i % 100000 == 0: + print("@{}".format(i), file=sys.stderr) try: doc = json.loads(line) id, key = self.key(doc) -- cgit v1.2.3