diff options
author | bnewbold <bnewbold@archive.org> | 2021-11-25 00:36:34 +0000 |
---|---|---|
committer | bnewbold <bnewbold@archive.org> | 2021-11-25 00:36:34 +0000 |
commit | 5bc5eeed5e3ba54c2129c4233b881291c5fa7449 (patch) | |
tree | 88392dace6857836cab80f3ea6d0980c20a24376 /python/tests/merge_files.py | |
parent | b14fca89f41b5ba2b85bf033844da211fa5c3c8b (diff) | |
parent | 62bf6202d002e96ffd81d7e4634502be1886d5c3 (diff) | |
download | fatcat-5bc5eeed5e3ba54c2129c4233b881291c5fa7449.tar.gz fatcat-5bc5eeed5e3ba54c2129c4233b881291c5fa7449.zip |
Merge branch 'bnewbold-mergers' into 'master'
entity mergers framework
See merge request webgroup/fatcat!133
Diffstat (limited to 'python/tests/merge_files.py')
-rw-r--r-- | python/tests/merge_files.py | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/python/tests/merge_files.py b/python/tests/merge_files.py new file mode 100644 index 00000000..c33c6f6c --- /dev/null +++ b/python/tests/merge_files.py @@ -0,0 +1,160 @@ +from fatcat_openapi_client import FileEntity, FileUrl +from fixtures import api + +from fatcat_tools.mergers.files import FileMerger + + +def test_choose_primary_file(api) -> None: + + fm = FileMerger(api=api) + fe_partial = FileEntity( + ident="aaaasb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3aaaaaa", + ) + fe_norelease = FileEntity( + ident="bbbbsb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb", + md5="d2c7318315bfc7d3aab0db933e95e632", + sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163", + size=60719, + mimetype="application/pdf", + ) + fe_nourls = FileEntity( + ident="ccccsb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb", + md5="d2c7318315bfc7d3aab0db933e95e632", + sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163", + size=60719, + mimetype="application/pdf", + release_ids=["dlrxjg7mxrayxfltget7fqcrjy"], + ) + fe_complete = FileEntity( + ident="ddddsb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb", + md5="d2c7318315bfc7d3aab0db933e95e632", + sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163", + size=60719, + mimetype="application/pdf", + release_ids=["dlrxjg7mxrayxfltget7fqcrjy"], + urls=[ + FileUrl(rel="web", url="http://aughty.org/pdf/future_open.pdf"), + ], + extra=dict(asdf=123), + ) + fe_pseudo_complete = FileEntity( + ident="eeeesb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb", + sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163", + size=60719, + mimetype="application/pdf", + release_ids=["dlrxjg7mxrayxfltget7fqcrjy"], + urls=[ + FileUrl(rel="web", url="http://aughty.org/pdf/future_open.pdf"), + ], + extra=dict(asdf=123), + ) + + assert fm.choose_primary_file([fe_partial, fe_norelease]) == "bbbbsb5apzfhbbxxc7rgu2yw6m" + assert ( + fm.choose_primary_file([fe_partial, fe_nourls, fe_norelease]) + == "ccccsb5apzfhbbxxc7rgu2yw6m" + ) + assert ( + fm.choose_primary_file([fe_partial, fe_complete, fe_nourls, fe_norelease]) + == "ddddsb5apzfhbbxxc7rgu2yw6m" + ) + assert ( + fm.choose_primary_file([fe_partial, fe_pseudo_complete, fe_nourls, fe_norelease]) + == "ccccsb5apzfhbbxxc7rgu2yw6m" + ) + + +def test_merge_file_metadata_from(api) -> None: + fm = FileMerger(api=api) + fe_partial = FileEntity( + ident="aaaasb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3aaaaaa", + ) + fe_norelease = FileEntity( + ident="bbbbsb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb", + md5="d2c7318315bfc7d3aab0db933e95e632", + sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163", + size=60719, + mimetype="application/pdf", + ) + fe_nourls = FileEntity( + ident="ccccsb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb", + md5="d2c7318315bfc7d3aab0db933e95e632", + sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163", + size=60719, + mimetype="application/pdf", + release_ids=["dlrxjg7mxrayxfltget7fqcrjy"], + ) + fe_complete = FileEntity( + ident="ddddsb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb", + md5="ddddddd315bfc7d3aab0db933e95e632", + sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163", + size=60719, + mimetype="application/pdf", + release_ids=["dlrxjg7mxrayxfltget7fqcrjy"], + urls=[ + FileUrl(rel="web", url="http://aughty.org/pdf/future_open.pdf"), + ], + extra=dict(asdf=123), + ) + fe_pseudo_complete = FileEntity( + ident="eeeesb5apzfhbbxxc7rgu2yw6m", + sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb", + sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163", + size=60719, + mimetype="application/pdf", + release_ids=["dlrxjg7mxrayxfltget7fqcrjy"], + urls=[ + FileUrl(rel="web", url="http://aughty.org/pdf/future_open.pdf"), + ], + extra=dict(asdf=123), + ) + fe_another_release_id = FileEntity( + ident="fffffffapzfhbbxxc7rgu2yw6m", + release_ids=["qqqqqg7mxrayxfltget7fqcrjy"], + ) + fe_another_url = FileEntity( + ident="zzzzzzzapzfhbbxxc7rgu2yw6m", + urls=[ + FileUrl(rel="repository", url="http://someuni.edu/repo/file.pdf"), + ], + ) + fe_more_extra = FileEntity( + ident="fffffffapzfhbbxxc7rgu2yw6m", + release_ids=["qqqqqg7mxrayxfltget7fqcrjy"], + extra=dict(thang=456), + ) + + assert fm.merge_file_metadata_from(fe_nourls, fe_partial) is False + assert fm.merge_file_metadata_from(fe_complete, fe_pseudo_complete) is False + assert fm.merge_file_metadata_from(fe_complete, fe_complete) is False + assert fm.merge_file_metadata_from(fe_partial, fe_norelease) is True + assert fe_partial.md5 == fe_norelease.md5 + assert fe_partial.size == fe_norelease.size + assert fm.merge_file_metadata_from(fe_partial, fe_complete) is True + assert fe_partial.md5 != fe_complete.md5 + assert fe_partial.extra == fe_complete.extra + assert set([(u.rel, u.url) for u in fe_partial.urls or []]) == set( + [(u.rel, u.url) for u in fe_complete.urls or []] + ) + assert fe_partial.release_ids == fe_complete.release_ids + assert fm.merge_file_metadata_from(fe_partial, fe_another_release_id) is True + assert fe_partial.release_ids == [ + "dlrxjg7mxrayxfltget7fqcrjy", + "qqqqqg7mxrayxfltget7fqcrjy", + ] + assert fm.merge_file_metadata_from(fe_partial, fe_another_release_id) is False + assert fm.merge_file_metadata_from(fe_partial, fe_more_extra) is True + assert fe_partial.extra == dict(asdf=123, thang=456) + assert fm.merge_file_metadata_from(fe_partial, fe_more_extra) is False + assert fm.merge_file_metadata_from(fe_partial, fe_another_url) is True + assert fe_partial.urls[-1].url == "http://someuni.edu/repo/file.pdf" + assert fm.merge_file_metadata_from(fe_partial, fe_another_url) is False |