aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/common.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2022-03-23 17:55:54 -0700
committerBryan Newbold <bnewbold@robocracy.org>2022-03-23 18:38:56 -0700
commit515b5ecc6e75aae834958d74883426230532f10d (patch)
tree1d5e00696bf5272e7bda7c4ff275d9e8acaaa479 /python/fatcat_tools/importers/common.py
parentea6ccd227e0f62f5f9e7a66ba8bc90b18a2ca097 (diff)
downloadfatcat-515b5ecc6e75aae834958d74883426230532f10d.tar.gz
fatcat-515b5ecc6e75aae834958d74883426230532f10d.zip
ingest fileset fixes, and some test coverage
Diffstat (limited to 'python/fatcat_tools/importers/common.py')
-rw-r--r--python/fatcat_tools/importers/common.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index cd51a24c..475cb97a 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -916,3 +916,14 @@ def make_kafka_consumer(
)
print("Consuming from kafka topic {}, group {}".format(topic_name, group))
return consumer
+
+
+def filesets_very_similar(a: FilesetEntity, b: FilesetEntity) -> bool:
+ """
+ This helper method checks if two Fileset entities are effectively equivalent: same set of files with comparable hashes.
+
+ Uses a set() of SHA1 hashes to test for equivalence.
+ """
+ a_hashes = set([f.sha1 for f in a.manifest])
+ b_hashes = set([f.sha1 for f in b.manifest])
+ return a == b