aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-08-08 17:02:28 -0700
committerBryan Newbold <bnewbold@archive.org>2020-08-08 17:06:02 -0700
commit92754a7a12ec56dd958d879ececbc4f19e9623b0 (patch)
treef00ee9b8d8cdf766debdd0385390118ac9576c76 /python
parent9d81f6e3f8a4b300c18a831e80880a8e181f812f (diff)
downloadsandcrawler-92754a7a12ec56dd958d879ececbc4f19e9623b0.tar.gz
sandcrawler-92754a7a12ec56dd958d879ececbc4f19e9623b0.zip
fix tests passing str as HTML
Diffstat (limited to 'python')
-rw-r--r--python/tests/test_html.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/python/tests/test_html.py b/python/tests/test_html.py
index 3b59883..7d58a39 100644
--- a/python/tests/test_html.py
+++ b/python/tests/test_html.py
@@ -7,7 +7,7 @@ from sandcrawler.html import extract_fulltext_url
def test_extract_fulltext_url():
- resp = extract_fulltext_url("asdf", "asdf")
+ resp = extract_fulltext_url("asdf", b"asdf")
assert resp == {}
resp = extract_fulltext_url(
@@ -25,14 +25,14 @@ def test_extract_fulltext_url():
assert resp['pdf_url'] == "http://www.example.com/content/271/20/11761.full.pdf"
assert resp['technique'] == "citation_pdf_url"
- with open('tests/files/plos_one_article.html', 'r') as f:
+ with open('tests/files/plos_one_article.html', 'rb') as f:
resp = extract_fulltext_url(
"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0213978",
f.read(),
)
assert resp['pdf_url'] == "https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0213978&type=printable"
- with open('tests/files/elife_article.html', 'r') as f:
+ with open('tests/files/elife_article.html', 'rb') as f:
resp = extract_fulltext_url(
"https://elifesciences.org/articles/44753",
f.read(),