diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-11-17 23:04:11 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-11-17 23:04:11 +0100 |
commit | 2ad6ea7f9d2b77ca766caaf9500ad06381177694 (patch) | |
tree | 33cee2b80822868120be98c4770aae621cce68b3 | |
parent | 01e95738c6d2bbbb4eb07a0e48db7866bfdbc302 (diff) | |
download | fuzzycat-2ad6ea7f9d2b77ca766caaf9500ad06381177694.tar.gz fuzzycat-2ad6ea7f9d2b77ca766caaf9500ad06381177694.zip |
cluster: log progress
-rw-r--r-- | fuzzycat/cluster.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py index 8eb409c..0aa2fd4 100644 --- a/fuzzycat/cluster.py +++ b/fuzzycat/cluster.py @@ -379,7 +379,9 @@ class Cluster: Outline: json -> tsv -> sort -> group -> json """ with tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=self.prefix) as tf: - for line in self.iterable: + for i, line in enumerate(self.iterable): + if i % 100000 == 0: + print("@{}".format(i), file=sys.stderr) try: doc = json.loads(line) id, key = self.key(doc) |