From 515b5ecc6e75aae834958d74883426230532f10d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 23 Mar 2022 17:55:54 -0700 Subject: ingest fileset fixes, and some test coverage --- python/fatcat_tools/importers/common.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'python/fatcat_tools/importers/common.py') diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index cd51a24c..475cb97a 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -916,3 +916,14 @@ def make_kafka_consumer( ) print("Consuming from kafka topic {}, group {}".format(topic_name, group)) return consumer + + +def filesets_very_similar(a: FilesetEntity, b: FilesetEntity) -> bool: + """ + This helper method checks if two Fileset entities are effectively equivalent: same set of files with comparable hashes. + + Uses a set() of SHA1 hashes to test for equivalence. + """ + a_hashes = set([f.sha1 for f in a.manifest]) + b_hashes = set([f.sha1 for f in b.manifest]) + return a == b -- cgit v1.2.3 From 929f6d1020362a8065d1e0c95d2ee67c88f89b33 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 23 Mar 2022 18:54:42 -0700 Subject: fix typo in fileset comparison helper --- python/fatcat_tools/importers/common.py | 2 +- python/tests/import_ingest.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'python/fatcat_tools/importers/common.py') diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 475cb97a..2136d1da 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -926,4 +926,4 @@ def filesets_very_similar(a: FilesetEntity, b: FilesetEntity) -> bool: """ a_hashes = set([f.sha1 for f in a.manifest]) b_hashes = set([f.sha1 for f in b.manifest]) - return a == b + return a_hashes == b_hashes diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py index d86d6d7b..44dd5a0b 100644 --- a/python/tests/import_ingest.py +++ b/python/tests/import_ingest.py @@ -235,7 +235,6 @@ def test_ingest_fileset_importer(ingest_fileset_importer): counts = JsonLinePusher(ingest_fileset_importer, f).run() assert counts["insert"] == 0 - assert counts["exists"] == 0 - assert counts["skip"] == 20 + assert counts["exists"] == 7 + assert counts["skip"] == 13 assert counts["skip-release-not-found"] == 13 - assert counts["skip-release-has-fileset"] == 7 -- cgit v1.2.3