import pytest from grobid_tei_xml import parse_citation_xml from grobid_tei_xml.types import GrobidBiblio, GrobidAuthor from fuzzycat.grobid_unstructured import grobid_api_process_citation, grobid_parse_unstructured, grobid_ref_to_release def test_grobid_ref_to_release(): d = GrobidBiblio( title="some title", doi='10.1234/5678', journal='some journal', authors=[ GrobidAuthor( full_name='ahab sailor', given_name='ahab', surname='sailor', ), GrobidAuthor(full_name='mary jane', given_name='mary', surname='jane'), ], ) r = grobid_ref_to_release(d) assert r.title == d.title assert r.ext_ids.doi == d.doi assert r.extra['container_name'] == d.journal assert r.contribs[0].surname == d.authors[0].surname assert r.contribs[1].raw_name == d.authors[1].full_name def test_transform_grobid_ref_xml(): """ This used to be a test of the grobid2json file in this repository. Now it is a backwards compatibility test for grobid_tei_xml """ citation_xml = """ Mesh migration following abdominal hernia repair: a comprehensive review H B Cunningham J J Weis L R Taveras S Huerta 10.1007/s10029-019-01898-9 30701369 Hernia 23 2 """ citation = parse_citation_xml(citation_xml) assert citation d = citation.to_legacy_dict() assert d['title'] == "Mesh migration following abdominal hernia repair: a comprehensive review" assert d['authors'][2]['given_name'] == "L" assert d['authors'][2]['surname'] == "Taveras" assert d['authors'][2]['name'] == "L R Taveras" assert d['doi'] == "10.1007/s10029-019-01898-9" assert d['pmid'] == "30701369" assert d['date'] == "2019-01-30" assert d['pages'] == "235-243" assert d['volume'] == "23" assert d['issue'] == "2" assert d['journal'] == "Hernia" def test_grobid_parse_unstructured(): """ NOTE: this test makes live network requests to GROBID """ r = grobid_parse_unstructured("blah") assert r is None r = grobid_parse_unstructured( """Cunningham HB, Weis JJ, Taveras LR, Huerta S. Mesh migration following abdominal hernia repair: a comprehensive review. Hernia. 2019 Apr;23(2):235-243. doi: 10.1007/s10029-019-01898-9. Epub 2019 Jan 30. PMID: 30701369.""" ) assert r.title == "Mesh migration following abdominal hernia repair: a comprehensive review" assert r.contribs[0].surname == "Cunningham" assert r.contribs[1].surname == "Weis" assert r.contribs[2].surname == "Taveras" assert r.contribs[3].surname == "Huerta" assert r.extra['container_name'] == "Hernia" assert r.release_year == 2019 assert r.volume == "23" assert r.issue == "2" assert r.pages == "235-243" assert r.ext_ids.doi == "10.1007/s10029-019-01898-9" assert r.ext_ids.pmid == "30701369" def test_grobid_parse_unstructured_timeout(): """ NOTE: this test makes live network requests to GROBID """ with pytest.raises(TimeoutError): grobid_parse_unstructured("blah", timeout=0.000001)