From ab8f4b0f957fa020f94fbb373e4d41f3cbb94293 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 16 Jul 2022 13:07:38 -0700 Subject: HTML: no longer extracting citation_pdf_url in main extract function --- python/tests/test_html.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/python/tests/test_html.py b/python/tests/test_html.py index 614b802..0f951eb 100644 --- a/python/tests/test_html.py +++ b/python/tests/test_html.py @@ -6,27 +6,3 @@ def test_extract_fulltext_url(): resp = extract_fulltext_url("asdf", b"asdf") assert resp == {} - resp = extract_fulltext_url( - "http://dummy-site/", - b""" - - - - -

my big article here

- blah - - """, - ) - assert resp["pdf_url"] == "http://www.example.com/content/271/20/11761.full.pdf" - assert resp["technique"] == "citation_pdf_url" - - with open("tests/files/plos_one_article.html", "rb") as f: - resp = extract_fulltext_url( - "https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0213978", - f.read(), - ) - assert ( - resp["pdf_url"] - == "https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0213978&type=printable" - ) -- cgit v1.2.3