aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/clean_files.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/tests/clean_files.py')
-rw-r--r--python/tests/clean_files.py43
1 files changed, 32 insertions, 11 deletions
diff --git a/python/tests/clean_files.py b/python/tests/clean_files.py
index c8b935bb..34a00ebb 100644
--- a/python/tests/clean_files.py
+++ b/python/tests/clean_files.py
@@ -1,4 +1,3 @@
-
import copy
import pytest
@@ -12,6 +11,7 @@ from fatcat_tools.cleanups import FileCleaner
def file_cleaner(api):
yield FileCleaner(api)
+
def test_url_cleanups(file_cleaner):
f = FileEntity(
@@ -20,41 +20,62 @@ def test_url_cleanups(file_cleaner):
)
f.urls = [
- FileUrl(url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"),
- FileUrl(url="https://web.archive.org/web/None/something.com/blah.pdf", rel="webarchive"),
- FileUrl(url="https://archive.org/details/None/something.com/blah.pdf", rel="repository"),
+ FileUrl(
+ url="https://web.archive.org/web/12345542/something.com/blah.pdf", rel="webarchive"
+ ),
+ FileUrl(
+ url="https://web.archive.org/web/None/something.com/blah.pdf", rel="webarchive"
+ ),
+ FileUrl(
+ url="https://archive.org/details/None/something.com/blah.pdf", rel="repository"
+ ),
]
f = file_cleaner.clean_entity(f)
# remove None wayback links
assert len(f.urls) == 2
for u in f.urls:
- assert 'web/None' not in u.url
+ assert "web/None" not in u.url
assert f == file_cleaner.clean_entity(f)
assert f == file_cleaner.clean_entity(copy.deepcopy(f))
# rel=repository -> rel=archive for archive.org links
- assert f.urls[1].rel == 'archive'
+ assert f.urls[1].rel == "archive"
# short wayback dates
f.urls = [
- FileUrl(url="http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"),
- FileUrl(url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"),
+ FileUrl(
+ url="http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf",
+ rel="webarchive",
+ ),
+ FileUrl(
+ url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf",
+ rel="webarchive",
+ ),
]
f = file_cleaner.clean_entity(f)
assert len(f.urls) == 1
- assert f.urls[0].url == 'http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf'
+ assert (
+ f.urls[0].url
+ == "http://web.archive.org/web/20181031120933/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf"
+ )
assert f == file_cleaner.clean_entity(f)
assert f == file_cleaner.clean_entity(copy.deepcopy(f))
f.urls = [
- FileUrl(url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf", rel="webarchive"),
+ FileUrl(
+ url="http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf",
+ rel="webarchive",
+ ),
]
f = file_cleaner.clean_entity(f)
assert len(f.urls) == 1
- assert f.urls[0].url == 'http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf'
+ assert (
+ f.urls[0].url
+ == "http://web.archive.org/web/2018/https://www.jstage.jst.go.jp/article/jsci1978/1/1/1_1_231/_pdf"
+ )
assert f == file_cleaner.clean_entity(f)
assert f == file_cleaner.clean_entity(copy.deepcopy(f))