diff options
-rw-r--r-- | fuzzycat/cluster.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py index 8eb409c..0aa2fd4 100644 --- a/fuzzycat/cluster.py +++ b/fuzzycat/cluster.py @@ -379,7 +379,9 @@ class Cluster: Outline: json -> tsv -> sort -> group -> json """ with tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=self.prefix) as tf: - for line in self.iterable: + for i, line in enumerate(self.iterable): + if i % 100000 == 0: + print("@{}".format(i), file=sys.stderr) try: doc = json.loads(line) id, key = self.key(doc) |