From a68aadc4107fc68dc2748c52dab8a4bd92cca022 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sun, 8 Nov 2020 21:54:24 -0800 Subject: move some PDF URL extraction into declarative format --- python/tests/test_html.py | 8 -------- python/tests/test_html_metadata.py | 4 +++- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'python/tests') diff --git a/python/tests/test_html.py b/python/tests/test_html.py index 7d58a39..9a81852 100644 --- a/python/tests/test_html.py +++ b/python/tests/test_html.py @@ -31,11 +31,3 @@ def test_extract_fulltext_url(): f.read(), ) assert resp['pdf_url'] == "https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0213978&type=printable" - - with open('tests/files/elife_article.html', 'rb') as f: - resp = extract_fulltext_url( - "https://elifesciences.org/articles/44753", - f.read(), - ) - assert resp['pdf_url'] == "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvNDQ3NTMvZWxpZmUtNDQ3NTMtdjIucGRm/elife-44753-v2.pdf?_hash=CfyqOqVryCR4OjcMTfcdpeIWAGZznmh9jXksYKYChCw%3D" - diff --git a/python/tests/test_html_metadata.py b/python/tests/test_html_metadata.py index b428b0d..bf26a98 100644 --- a/python/tests/test_html_metadata.py +++ b/python/tests/test_html_metadata.py @@ -39,6 +39,7 @@ def test_html_metadata_plos() -> None: assert meta.publisher == "Public Library of Science" assert meta.raw_references and "citation_title=Reticuloendotheliosis virus sequences within the genomes of field strains of fowlpox virus display variability;citation_author=P Singh;citation_author=W. M. Schnitzlein;citation_author=D. N. Tripathy;citation_journal_title=J. Virol;citation_volume=77;citation_number=77;citation_first_page=5855;citation_last_page=5862;citation_publication_date=2003;" in meta.raw_references assert meta.release_type == "article-journal" + assert meta.pdf_fulltext_url == "https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0213978&type=printable" def test_html_metadata_elife() -> None: @@ -46,7 +47,7 @@ def test_html_metadata_elife() -> None: with open('tests/files/elife_article.html', 'r') as f: elife_html = f.read() - meta = html_extract_biblio("http://example.org", HTMLParser(elife_html)) + meta = html_extract_biblio("https://elifesciences.org/articles/44753", HTMLParser(elife_html)) assert meta is not None assert meta.title == "Parallel visual circuitry in a basal chordate" assert meta.doi == "10.7554/eLife.44753" @@ -63,6 +64,7 @@ def test_html_metadata_elife() -> None: # 2019-04-18 assert meta.release_date == datetime.date(year=2019, month=4, day=18) assert meta.publisher == "eLife Sciences Publications Limited" + assert meta.pdf_fulltext_url == "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvNDQ3NTMvZWxpZmUtNDQ3NTMtdjIucGRm/elife-44753-v2.pdf?_hash=CfyqOqVryCR4OjcMTfcdpeIWAGZznmh9jXksYKYChCw%3D" def test_html_metadata_peerj() -> None: -- cgit v1.2.3