diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/files/example_crossref_record.json | 225 | ||||
-rw-r--r-- | tests/test_refs_transform.py | 60 | ||||
-rw-r--r-- | tests/test_web.py | 103 |
3 files changed, 385 insertions, 3 deletions
diff --git a/tests/files/example_crossref_record.json b/tests/files/example_crossref_record.json new file mode 100644 index 0000000..d87c7c2 --- /dev/null +++ b/tests/files/example_crossref_record.json @@ -0,0 +1,225 @@ +{ + "doi": "10.1515/jpm-2019-0016", + "record": +{ + "DOI": "10.1111/his.12200", + "ISSN": [ + "0309-0167" + ], + "URL": "http://dx.doi.org/10.1111/his.12200", + "author": [ + { + "affiliation": [], + "family": "Stewart", + "given": "Colin J R" + } + ], + "container-title": [ + "Histopathology" + ], + "content-domain": { + "crossmark-restriction": false, + "domain": [] + }, + "created": { + "date-parts": [ + [ + 2013, + 6, + 3 + ] + ], + "date-time": "2013-06-03T16:37:56Z", + "timestamp": 1370277476000 + }, + "deposited": { + "date-parts": [ + [ + 2017, + 6, + 21 + ] + ], + "date-time": "2017-06-21T14:04:36Z", + "timestamp": 1498053876000 + }, + "indexed": { + "date-parts": [ + [ + 2020, + 7, + 28 + ] + ], + "date-time": "2020-07-28T14:37:55Z", + "timestamp": 1595947075455 + }, + "is-referenced-by-count": 0, + "issn-type": [ + { + "type": "print", + "value": "0309-0167" + } + ], + "issued": { + "date-parts": [ + [ + 2013, + 7 + ] + ] + }, + "license": [ + { + "URL": "http://doi.wiley.com/10.1002/tdm_license_1.1", + "content-version": "tdm", + "delay-in-days": 792, + "start": { + "date-parts": [ + [ + 2015, + 9, + 1 + ] + ], + "date-time": "2015-09-01T00:00:00Z", + "timestamp": 1441065600000 + } + } + ], + "link": [ + { + "URL": "https://api.wiley.com/onlinelibrary/tdm/v1/articles/10.1111%2Fhis.12200", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "member": "311", + "original-title": [], + "page": "n/a-n/a", + "prefix": "10.1111", + "published-online": { + "date-parts": [ + [ + 2013, + 7, + 16 + ] + ] + }, + "published-print": { + "date-parts": [ + [ + 2013, + 7 + ] + ] + }, + "publisher": "Wiley", + "reference": [ + { + "DOI": "10.5858/arpa.2012-0112-RA", + "article-title": "The separation of benign and malignant mesothelial proliferations", + "author": "Churg", + "doi-asserted-by": "crossref", + "first-page": "1217", + "journal-title": "Arch. Pathol. Lab. Med.", + "key": "10.1111/his.12200-BIB0001|his12200-cit-0001", + "volume": "136", + "year": "2012" + }, + { + "DOI": "10.1136/jcp.2010.086074", + "article-title": "Peritoneal mesothelial hyperplasia associated with gynaecological disease: a potential diagnostic pitfall that is commonly associated with endometriosis", + "author": "Opraka", + "doi-asserted-by": "crossref", + "first-page": "313", + "journal-title": "J. Clin. Pathol.", + "key": "10.1111/his.12200-BIB0002|his12200-cit-0002", + "volume": "64", + "year": "2011" + }, + { + "DOI": "10.1038/modpathol.2012.105", + "article-title": "Deciduoid mesothelioma: report of 21 cases with review of the literature", + "author": "Ordonez", + "doi-asserted-by": "crossref", + "first-page": "1481", + "journal-title": "Mod. Pathol.", + "key": "10.1111/his.12200-BIB0003|his12200-cit-0003", + "volume": "25", + "year": "2012" + }, + { + "DOI": "10.1111/j.1525-1438.2006.00509.x", + "article-title": "Atypical reactive ovarian surface epithelium, a pitfall in pathologic assessment", + "author": "Aydin", + "doi-asserted-by": "crossref", + "first-page": "207", + "issue": "Suppl. 1", + "journal-title": "Int. J. Gynecol. Cancer", + "key": "10.1111/his.12200-BIB0004|his12200-cit-0004", + "volume": "16", + "year": "2006" + }, + { + "DOI": "10.1097/PAP.0b013e3180ca7d7b", + "article-title": "The pathology of endometriosis: a survey of the many faces of a common disease emphasizing diagnostic pitfalls and unusual and newly appreciated aspects", + "author": "Clement", + "doi-asserted-by": "crossref", + "first-page": "241", + "journal-title": "Adv. Anat. Pathol.", + "key": "10.1111/his.12200-BIB0005|his12200-cit-0005", + "volume": "14", + "year": "2007" + }, + { + "article-title": "Extramedullary hematopoiesis associated with organizing peritoneal hemorrhage: a report of 5 cases in patients presenting with primary gynecological disorders", + "author": "Mesbah Ardakani", + "journal-title": "Int. J. Gynecol. Pathol.", + "key": "10.1111/his.12200-BIB0006|his12200-cit-0006" + }, + { + "key": "10.1016/B0-12-227090-8/00204-9_bib5", + "series-title": "Advances in Laser Remote Sensing – Selected Papers Presented at the 20th International Laser Radar Conference", + "year": "2001" + }, + { + "key": "CIT0041", + "unstructured": "Linda Weiss,Creating Capitalism. Oxford: Blackwell, 1988. 272 pp. £29.95. ISBN 0 631 15733 6." + }, + { + "author": "L Piegl", + "edition": "2", + "key": "576_CR3", + "unstructured": "Piegl L, Tiller W (1997) The NURBS Book, Monographs in Visual Communication, 2nd edn. Springer, Berlin", + "volume-title": "The NURBS Book, Monographs in Visual Communication", + "year": "1997" + } + ], + "reference-count": 6, + "references-count": 6, + "relation": { + "cites": [] + }, + "score": null, + "short-container-title": [ + "Histopathology" + ], + "short-title": [], + "source": "Crossref", + "subject": [ + "Pathology and Forensic Medicine", + "Histology", + "General Medicine" + ], + "subtitle": [], + "title": [ + "Deciduoid mesothelial hyperplasia of the pelvic peritoneum" + ], + "type": "journal-article" +}, + "release_ident": "arzkbn5brjf2nitdy4fkiusc4q" +} + diff --git a/tests/test_refs_transform.py b/tests/test_refs_transform.py index 3fa490b..078b73b 100644 --- a/tests/test_refs_transform.py +++ b/tests/test_refs_transform.py @@ -1,7 +1,8 @@ +import json from fatcat_openapi_client import ReleaseEntity from fatcat_scholar.grobid2json import teixml2json -from fatcat_scholar.transform import refs_from_grobid +from fatcat_scholar.transform import refs_from_grobid, refs_from_crossref def test_transform_refs_grobid() -> None: @@ -27,7 +28,7 @@ def test_transform_refs_grobid() -> None: assert ref.release_year == 1234 assert ref.ref_source == "grobid" assert ref.key == "b12" - assert ref.index == 12 + assert ref.index == 13 assert ref.locator == None assert ref.biblio.contrib_raw_names is not None assert ref.biblio.contrib_raw_names[0] == "K Tasa" @@ -40,3 +41,58 @@ def test_transform_refs_grobid() -> None: ref.biblio.unstructured == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19." ) + + +def test_transform_refs_crossref() -> None: + + with open("tests/files/example_crossref_record.json", "r") as f: + record = json.loads(f.read()) + + dummy_release = ReleaseEntity( + ident="releasedummy22222222222222", + work_id="workdummy22222222222222222", + release_year=1234, + release_stage="accepted", + ext_ids={}, + ) + + refs = refs_from_crossref(dummy_release, record) + + assert refs[0].release_ident == "releasedummy22222222222222" + assert refs[0].work_ident == "workdummy22222222222222222" + assert refs[0].release_stage == "accepted" + assert refs[0].release_year == 1234 + assert refs[0].ref_source == "crossref" + assert refs[0].key == "BIB0001|his12200-cit-0001" + assert refs[0].index == 1 + assert refs[0].locator is None + assert refs[0].biblio.contrib_raw_names is not None + assert refs[0].biblio.contrib_raw_names[0] == "Churg" + assert refs[0].biblio.container_name == "Arch. Pathol. Lab. Med." + assert ( + refs[0].biblio.title + == "The separation of benign and malignant mesothelial proliferations" + ) + assert refs[0].biblio.year == 2012 + assert refs[0].biblio.pages == "1217" + assert refs[0].biblio.volume == "136" + assert refs[0].biblio.doi == "10.5858/arpa.2012-0112-ra" + assert refs[0].biblio.unstructured is None + + assert ( + refs[6].biblio.title + == "Advances in Laser Remote Sensing – Selected Papers Presented at the 20th International Laser Radar Conference" + ) + assert refs[6].biblio.year == 2001 + + assert refs[7].key == "CIT0041" + assert ( + refs[7].biblio.unstructured + == "Linda Weiss,Creating Capitalism. Oxford: Blackwell, 1988. 272 pp. £29.95. ISBN 0 631 15733 6." + ) + + assert refs[8].key == "576_CR3" + assert refs[8].biblio.unstructured is not None + assert refs[8].biblio.title == "The NURBS Book, Monographs in Visual Communication" + assert refs[8].biblio.year == 1997 + assert refs[8].biblio.version == "2" diff --git a/tests/test_web.py b/tests/test_web.py index 7f1f72a..d9cfab6 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -3,6 +3,7 @@ from typing import Any import pytest from fastapi.testclient import TestClient +import fatcat_openapi_client from fatcat_scholar.web import app @@ -148,7 +149,11 @@ def test_basic_access_redirect(client: Any, mocker: Any) -> None: == "https://web.archive.org/web/20200206164725id_/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf" ) - # check that URL is validated + # check that URL is validated (force fatcat API fallback to fail) + fatcat_api_raw = mocker.patch("fatcat_openapi_client.ApiClient.call_api") + fatcat_api_raw.side_effect = [ + fatcat_openapi_client.ApiException(status=404, reason="dummy") + ] rv = client.get( "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf.DUMMY", allow_redirects=False, @@ -156,6 +161,102 @@ def test_basic_access_redirect(client: Any, mocker: Any) -> None: assert rv.status_code == 404 +def test_access_redirect_fallback(client: Any, mocker: Any) -> None: + + with open("tests/files/elastic_fulltext_get.json") as f: + elastic_resp = json.loads(f.read()) + + es_raw = mocker.patch( + "elasticsearch.connection.Urllib3HttpConnection.perform_request" + ) + es_raw.side_effect = [ + (200, {}, json.dumps(elastic_resp)), + (200, {}, json.dumps(elastic_resp)), + (200, {}, json.dumps(elastic_resp)), + (200, {}, json.dumps(elastic_resp)), + ] + fatcat_get_work_raw = mocker.patch("fatcat_openapi_client.DefaultApi.get_work") + fatcat_get_work_raw.side_effect = [ + fatcat_openapi_client.WorkEntity( + state="active", ident="wwwwwwwwwwwwwwwwwwwwwwwwww", + ) + ] * 4 + fatcat_get_work_releases_raw = mocker.patch( + "fatcat_openapi_client.DefaultApi.get_work_releases" + ) + fatcat_get_work_releases_raw.side_effect = [ + [ + fatcat_openapi_client.ReleaseEntity( + ident="rrrrrrrrrrrrrrrrrrrrrrrrrr", + ext_ids=fatcat_openapi_client.ReleaseExtIds(), + ), + ] + ] * 4 + fatcat_get_release_raw = mocker.patch( + "fatcat_openapi_client.DefaultApi.get_release" + ) + fatcat_get_release_raw.side_effect = [ + fatcat_openapi_client.ReleaseEntity( + state="active", + ident="rrrrrrrrrrrrrrrrrrrrrrrrrr", + ext_ids=fatcat_openapi_client.ReleaseExtIds(), + files=[ + fatcat_openapi_client.FileEntity( + ident="ffffffffffffffffffffffffff", + urls=[ + fatcat_openapi_client.FileUrl( + rel="web", url="https://blarg.example.com", + ), + fatcat_openapi_client.FileUrl( + rel="webarchive", + url="https://web.archive.org/web/12345/https://example.com", + ), + fatcat_openapi_client.FileUrl( + rel="archive", + url="https://archive.org/download/some/thing.pdf", + ), + ], + ), + ], + ) + ] * 4 + + # redirects should work after API lookup, for both wayback and archive.org + rv = client.get( + "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://example.com", + allow_redirects=False, + ) + assert rv.status_code == 302 + assert ( + rv.headers["Location"] + == "https://web.archive.org/web/12345id_/https://example.com" + ) + + rv = client.get( + "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/ia_file/some/thing.pdf", + allow_redirects=False, + ) + assert rv.status_code == 302 + assert rv.headers["Location"] == "https://archive.org/download/some/thing.pdf" + + # wrong URLs should still not work, but display a page with helpful links + rv = client.get( + "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/wayback/https://www.federalreserve.gov/econresdata/feds/2015/files/2015118pap.pdf.DUMMY", + allow_redirects=False, + ) + assert rv.status_code == 404 + assert b"Access Location Not Found" in rv.content + assert b"web.archive.org/web/*/https://www.federalreserve.gov" in rv.content + + rv = client.get( + "/work/2x5qvct2dnhrbctqa2q2uyut6a/access/ia_file/some/thing.else.pdf", + allow_redirects=False, + ) + assert rv.status_code == 404 + assert b"Access Location Not Found" in rv.content + assert b"archive.org/download/some/thing.else.pdf" in rv.content + + def test_access_redirect_encoding(client: Any, mocker: Any) -> None: with open("tests/files/elastic_get_work_a6gvpil4brdgzhqyaog3ftngqe.json") as f: |