aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/merge_files.py
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2021-11-25 00:36:34 +0000
committerbnewbold <bnewbold@archive.org>2021-11-25 00:36:34 +0000
commit5bc5eeed5e3ba54c2129c4233b881291c5fa7449 (patch)
tree88392dace6857836cab80f3ea6d0980c20a24376 /python/tests/merge_files.py
parentb14fca89f41b5ba2b85bf033844da211fa5c3c8b (diff)
parent62bf6202d002e96ffd81d7e4634502be1886d5c3 (diff)
downloadfatcat-5bc5eeed5e3ba54c2129c4233b881291c5fa7449.tar.gz
fatcat-5bc5eeed5e3ba54c2129c4233b881291c5fa7449.zip
Merge branch 'bnewbold-mergers' into 'master'
entity mergers framework See merge request webgroup/fatcat!133
Diffstat (limited to 'python/tests/merge_files.py')
-rw-r--r--python/tests/merge_files.py160
1 files changed, 160 insertions, 0 deletions
diff --git a/python/tests/merge_files.py b/python/tests/merge_files.py
new file mode 100644
index 00000000..c33c6f6c
--- /dev/null
+++ b/python/tests/merge_files.py
@@ -0,0 +1,160 @@
+from fatcat_openapi_client import FileEntity, FileUrl
+from fixtures import api
+
+from fatcat_tools.mergers.files import FileMerger
+
+
+def test_choose_primary_file(api) -> None:
+
+ fm = FileMerger(api=api)
+ fe_partial = FileEntity(
+ ident="aaaasb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3aaaaaa",
+ )
+ fe_norelease = FileEntity(
+ ident="bbbbsb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb",
+ md5="d2c7318315bfc7d3aab0db933e95e632",
+ sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163",
+ size=60719,
+ mimetype="application/pdf",
+ )
+ fe_nourls = FileEntity(
+ ident="ccccsb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb",
+ md5="d2c7318315bfc7d3aab0db933e95e632",
+ sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163",
+ size=60719,
+ mimetype="application/pdf",
+ release_ids=["dlrxjg7mxrayxfltget7fqcrjy"],
+ )
+ fe_complete = FileEntity(
+ ident="ddddsb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb",
+ md5="d2c7318315bfc7d3aab0db933e95e632",
+ sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163",
+ size=60719,
+ mimetype="application/pdf",
+ release_ids=["dlrxjg7mxrayxfltget7fqcrjy"],
+ urls=[
+ FileUrl(rel="web", url="http://aughty.org/pdf/future_open.pdf"),
+ ],
+ extra=dict(asdf=123),
+ )
+ fe_pseudo_complete = FileEntity(
+ ident="eeeesb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb",
+ sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163",
+ size=60719,
+ mimetype="application/pdf",
+ release_ids=["dlrxjg7mxrayxfltget7fqcrjy"],
+ urls=[
+ FileUrl(rel="web", url="http://aughty.org/pdf/future_open.pdf"),
+ ],
+ extra=dict(asdf=123),
+ )
+
+ assert fm.choose_primary_file([fe_partial, fe_norelease]) == "bbbbsb5apzfhbbxxc7rgu2yw6m"
+ assert (
+ fm.choose_primary_file([fe_partial, fe_nourls, fe_norelease])
+ == "ccccsb5apzfhbbxxc7rgu2yw6m"
+ )
+ assert (
+ fm.choose_primary_file([fe_partial, fe_complete, fe_nourls, fe_norelease])
+ == "ddddsb5apzfhbbxxc7rgu2yw6m"
+ )
+ assert (
+ fm.choose_primary_file([fe_partial, fe_pseudo_complete, fe_nourls, fe_norelease])
+ == "ccccsb5apzfhbbxxc7rgu2yw6m"
+ )
+
+
+def test_merge_file_metadata_from(api) -> None:
+ fm = FileMerger(api=api)
+ fe_partial = FileEntity(
+ ident="aaaasb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3aaaaaa",
+ )
+ fe_norelease = FileEntity(
+ ident="bbbbsb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb",
+ md5="d2c7318315bfc7d3aab0db933e95e632",
+ sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163",
+ size=60719,
+ mimetype="application/pdf",
+ )
+ fe_nourls = FileEntity(
+ ident="ccccsb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb",
+ md5="d2c7318315bfc7d3aab0db933e95e632",
+ sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163",
+ size=60719,
+ mimetype="application/pdf",
+ release_ids=["dlrxjg7mxrayxfltget7fqcrjy"],
+ )
+ fe_complete = FileEntity(
+ ident="ddddsb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb",
+ md5="ddddddd315bfc7d3aab0db933e95e632",
+ sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163",
+ size=60719,
+ mimetype="application/pdf",
+ release_ids=["dlrxjg7mxrayxfltget7fqcrjy"],
+ urls=[
+ FileUrl(rel="web", url="http://aughty.org/pdf/future_open.pdf"),
+ ],
+ extra=dict(asdf=123),
+ )
+ fe_pseudo_complete = FileEntity(
+ ident="eeeesb5apzfhbbxxc7rgu2yw6m",
+ sha1="b1beebb5f979121cd234c69b08e3f42af3bbbbbb",
+ sha256="528064c7664a96c79c80c423210f6f9f4fafe949dd59dfd1572a04b906d5e163",
+ size=60719,
+ mimetype="application/pdf",
+ release_ids=["dlrxjg7mxrayxfltget7fqcrjy"],
+ urls=[
+ FileUrl(rel="web", url="http://aughty.org/pdf/future_open.pdf"),
+ ],
+ extra=dict(asdf=123),
+ )
+ fe_another_release_id = FileEntity(
+ ident="fffffffapzfhbbxxc7rgu2yw6m",
+ release_ids=["qqqqqg7mxrayxfltget7fqcrjy"],
+ )
+ fe_another_url = FileEntity(
+ ident="zzzzzzzapzfhbbxxc7rgu2yw6m",
+ urls=[
+ FileUrl(rel="repository", url="http://someuni.edu/repo/file.pdf"),
+ ],
+ )
+ fe_more_extra = FileEntity(
+ ident="fffffffapzfhbbxxc7rgu2yw6m",
+ release_ids=["qqqqqg7mxrayxfltget7fqcrjy"],
+ extra=dict(thang=456),
+ )
+
+ assert fm.merge_file_metadata_from(fe_nourls, fe_partial) is False
+ assert fm.merge_file_metadata_from(fe_complete, fe_pseudo_complete) is False
+ assert fm.merge_file_metadata_from(fe_complete, fe_complete) is False
+ assert fm.merge_file_metadata_from(fe_partial, fe_norelease) is True
+ assert fe_partial.md5 == fe_norelease.md5
+ assert fe_partial.size == fe_norelease.size
+ assert fm.merge_file_metadata_from(fe_partial, fe_complete) is True
+ assert fe_partial.md5 != fe_complete.md5
+ assert fe_partial.extra == fe_complete.extra
+ assert set([(u.rel, u.url) for u in fe_partial.urls or []]) == set(
+ [(u.rel, u.url) for u in fe_complete.urls or []]
+ )
+ assert fe_partial.release_ids == fe_complete.release_ids
+ assert fm.merge_file_metadata_from(fe_partial, fe_another_release_id) is True
+ assert fe_partial.release_ids == [
+ "dlrxjg7mxrayxfltget7fqcrjy",
+ "qqqqqg7mxrayxfltget7fqcrjy",
+ ]
+ assert fm.merge_file_metadata_from(fe_partial, fe_another_release_id) is False
+ assert fm.merge_file_metadata_from(fe_partial, fe_more_extra) is True
+ assert fe_partial.extra == dict(asdf=123, thang=456)
+ assert fm.merge_file_metadata_from(fe_partial, fe_more_extra) is False
+ assert fm.merge_file_metadata_from(fe_partial, fe_another_url) is True
+ assert fe_partial.urls[-1].url == "http://someuni.edu/repo/file.pdf"
+ assert fm.merge_file_metadata_from(fe_partial, fe_another_url) is False