aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/common.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2022-04-07 14:44:01 -0700
committerBryan Newbold <bnewbold@robocracy.org>2022-04-07 14:44:01 -0700
commitede98644a89afd15d903061e0998dbd08851df6d (patch)
tree17c54c5764adb2f5d67aa750174f635e0fb1cdc8 /python/fatcat_tools/importers/common.py
parent2ef72e0c769e94401568ab42def30ddb5268fa98 (diff)
parent0aaa2a839d7a14716ee1a84b730203a7953dc5e0 (diff)
downloadfatcat-ede98644a89afd15d903061e0998dbd08851df6d.tar.gz
fatcat-ede98644a89afd15d903061e0998dbd08851df6d.zip
Merge branch 'bnewbold-dataset-ingest-fixes'
Diffstat (limited to 'python/fatcat_tools/importers/common.py')
-rw-r--r--python/fatcat_tools/importers/common.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index cd51a24c..2136d1da 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -916,3 +916,14 @@ def make_kafka_consumer(
)
print("Consuming from kafka topic {}, group {}".format(topic_name, group))
return consumer
+
+
+def filesets_very_similar(a: FilesetEntity, b: FilesetEntity) -> bool:
+ """
+ This helper method checks if two Fileset entities are effectively equivalent: same set of files with comparable hashes.
+
+ Uses a set() of SHA1 hashes to test for equivalence.
+ """
+ a_hashes = set([f.sha1 for f in a.manifest])
+ b_hashes = set([f.sha1 for f in b.manifest])
+ return a_hashes == b_hashes