diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-08 21:54:24 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-08 21:54:24 -0800 |
commit | a68aadc4107fc68dc2748c52dab8a4bd92cca022 (patch) | |
tree | da3da0a847d5c10dee873e8bce8198a39c12ce1f /python/tests/test_html.py | |
parent | 6a701f966b8bc760bf904c0569562b0159e13559 (diff) | |
download | sandcrawler-a68aadc4107fc68dc2748c52dab8a4bd92cca022.tar.gz sandcrawler-a68aadc4107fc68dc2748c52dab8a4bd92cca022.zip |
move some PDF URL extraction into declarative format
Diffstat (limited to 'python/tests/test_html.py')
-rw-r--r-- | python/tests/test_html.py | 8 |
1 files changed, 0 insertions, 8 deletions
diff --git a/python/tests/test_html.py b/python/tests/test_html.py index 7d58a39..9a81852 100644 --- a/python/tests/test_html.py +++ b/python/tests/test_html.py @@ -31,11 +31,3 @@ def test_extract_fulltext_url(): f.read(), ) assert resp['pdf_url'] == "https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0213978&type=printable" - - with open('tests/files/elife_article.html', 'rb') as f: - resp = extract_fulltext_url( - "https://elifesciences.org/articles/44753", - f.read(), - ) - assert resp['pdf_url'] == "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvNDQ3NTMvZWxpZmUtNDQ3NTMtdjIucGRm/elife-44753-v2.pdf?_hash=CfyqOqVryCR4OjcMTfcdpeIWAGZznmh9jXksYKYChCw%3D" - |