aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/common.py')
-rw-r--r--python/fatcat_tools/importers/common.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index cd51a24c..2136d1da 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -916,3 +916,14 @@ def make_kafka_consumer(
)
print("Consuming from kafka topic {}, group {}".format(topic_name, group))
return consumer
+
+
+def filesets_very_similar(a: FilesetEntity, b: FilesetEntity) -> bool:
+ """
+ This helper method checks if two Fileset entities are effectively equivalent: same set of files with comparable hashes.
+
+ Uses a set() of SHA1 hashes to test for equivalence.
+ """
+ a_hashes = set([f.sha1 for f in a.manifest])
+ b_hashes = set([f.sha1 for f in b.manifest])
+ return a_hashes == b_hashes