diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 18:13:14 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 18:13:14 -0700 |
commit | cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7 (patch) | |
tree | 5e4034027b51f3ee4d2a488bb2cbb7a75c3bd0d8 /python/tests/clean_files.py | |
parent | 78f08280edea4ff65ca613ad30005c45cc48dea6 (diff) | |
download | fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.tar.gz fatcat-cdfd6b85b386b7bbf9d5a5179ef26970b6e5a4e7.zip |
fmt (black): tests/
Diffstat (limited to 'python/tests/clean_files.py')
-rw-r--r-- | python/tests/clean_files.py | 43 |
1 files changed, 32 insertions, 11 deletions
diff --git a/python/tests/clean_files.py b/python/tests/clean_files.py index c8b935bb..34a00ebb 100644 --- a/python/tests/clean_files.py +++ b/python/tests/clean_files.py @@ -1,4 +1,3 @@ - import copy import pytest @@ -12,6 +11,7 @@ from fatcat_tools.cleanups import FileCleaner def file_cleaner(api): yield FileCleaner(api) + def test_url_cleanups(file_cleaner): f = FileEntity( @@ -20,41 +20,62 @@ def test_url_cleanups(file_cleaner): ) f.urls = [ - FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"), - FileUrl(url="https://web.archive.org/web/None/something.com/blah.pdf", rel="webarchive"), - FileUrl(url="https://archive.org/details/None/something.com/blah.pdf", rel="repository"), + FileUrl( + url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive" + ), + FileUrl( + url="https://web.archive.org/web/None/something.com/blah.pdf", rel="webarchive" + ), + FileUrl( + url="https://archive.org/details/None/something.com/blah.pdf", rel="repository" + ), ] f = file_cleaner.clean_entity(f) # remove None wayback links assert len(f.urls) == 2 for u in f.urls: - assert 'web/None' not in u.url + assert "web/None" not in u.url assert f == file_cleaner.clean_entity(f) assert f == file_cleaner.clean_entity(copy.deepcopy(f)) # rel=repository -> rel=archive for archive.org links - assert f.urls[1].rel == 'archive' + assert f.urls[1].rel == "archive" # short wayback dates f.urls = [ - FileUrl(url="http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"), - FileUrl(url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"), + FileUrl( + url="http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", + rel="webarchive", + ), + FileUrl( + url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", + rel="webarchive", + ), ] f = file_cleaner.clean_entity(f) assert len(f.urls) == 1 - assert f.urls[0].url == 'http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf' + assert ( + f.urls[0].url + == "http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf" + ) assert f == file_cleaner.clean_entity(f) assert f == file_cleaner.clean_entity(copy.deepcopy(f)) f.urls = [ - FileUrl(url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"), + FileUrl( + url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", + rel="webarchive", + ), ] f = file_cleaner.clean_entity(f) assert len(f.urls) == 1 - assert f.urls[0].url == 'http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf' + assert ( + f.urls[0].url + == "http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf" + ) assert f == file_cleaner.clean_entity(f) assert f == file_cleaner.clean_entity(copy.deepcopy(f)) |