diff options
author | Martin Czygan <martin@archive.org> | 2021-04-15 14:11:09 +0000 |
---|---|---|
committer | Martin Czygan <martin@archive.org> | 2021-04-15 14:11:09 +0000 |
commit | b27c43071ab021e9595457999359009cfd7a1abb (patch) | |
tree | e00199889528c00f777f5bbc908d0962760fb96f /tests | |
parent | 8a17311c9516e63aeb31111647fdf21083bcf928 (diff) | |
parent | d44a9e421edfec2cac16048b67e6809cae8cdd18 (diff) | |
download | fuzzycat-b27c43071ab021e9595457999359009cfd7a1abb.tar.gz fuzzycat-b27c43071ab021e9595457999359009cfd7a1abb.zip |
Merge branch 'bnewbold-upstreaming' into 'master'
refactoring/upstreaming fuzzycat "live" matching helpers
See merge request webgroup/fuzzycat!2
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_grobid_unstructured.py | 130 | ||||
-rw-r--r-- | tests/test_simple.py | 42 |
2 files changed, 172 insertions, 0 deletions
diff --git a/tests/test_grobid_unstructured.py b/tests/test_grobid_unstructured.py new file mode 100644 index 0000000..dd69936 --- /dev/null +++ b/tests/test_grobid_unstructured.py @@ -0,0 +1,130 @@ +import pytest + +from fuzzycat.grobid_unstructured import grobid_api_process_citation, grobid_parse_unstructured, grobid_ref_to_release, transform_grobid_ref_xml + + +def test_grobid_ref_to_release(): + + d = { + 'title': + "some title", + 'doi': + '10.1234/5678', + 'journal': + 'some journal', + 'authors': [ + { + 'name': 'ahab sailor', + 'given_name': 'ahab', + 'surname': 'sailor' + }, + { + 'name': 'mary jane', + 'given_name': 'mary', + 'surname': 'jane' + }, + ], + } + r = grobid_ref_to_release(d) + assert r.title == d['title'] + assert r.ext_ids.doi == d['doi'] + assert r.extra['container_name'] == d['journal'] + assert r.contribs[0].surname == d['authors'][0]['surname'] + assert r.contribs[1].raw_name == d['authors'][1]['name'] + + +def test_transform_grobid_ref_xml(): + citation_xml = """ +<biblStruct > + <analytic> + <title level="a" type="main">Mesh migration following abdominal hernia repair: a comprehensive review</title> + <author> + <persName + xmlns="http://www.tei-c.org/ns/1.0"> + <forename type="first">H</forename> + <forename type="middle">B</forename> + <surname>Cunningham</surname> + </persName> + </author> + <author> + <persName + xmlns="http://www.tei-c.org/ns/1.0"> + <forename type="first">J</forename> + <forename type="middle">J</forename> + <surname>Weis</surname> + </persName> + </author> + <author> + <persName + xmlns="http://www.tei-c.org/ns/1.0"> + <forename type="first">L</forename> + <forename type="middle">R</forename> + <surname>Taveras</surname> + </persName> + </author> + <author> + <persName + xmlns="http://www.tei-c.org/ns/1.0"> + <forename type="first">S</forename> + <surname>Huerta</surname> + </persName> + </author> + <idno type="DOI">10.1007/s10029-019-01898-9</idno> + <idno type="PMID">30701369</idno> + </analytic> + <monogr> + <title level="j">Hernia</title> + <imprint> + <biblScope unit="volume">23</biblScope> + <biblScope unit="issue">2</biblScope> + <biblScope unit="page" from="235" to="243" /> + <date type="published" when="2019-01-30" /> + </imprint> + </monogr> +</biblStruct>""" + + d = transform_grobid_ref_xml(citation_xml) + assert d['title'] == "Mesh migration following abdominal hernia repair: a comprehensive review" + assert d['authors'][2]['given_name'] == "L" + assert d['authors'][2]['surname'] == "Taveras" + assert d['authors'][2]['name'] == "L R Taveras" + assert d['doi'] == "10.1007/s10029-019-01898-9" + assert d['pmid'] == "30701369" + assert d['date'] == "2019-01-30" + assert d['pages'] == "235-243" + assert d['volume'] == "23" + assert d['issue'] == "2" + assert d['journal'] == "Hernia" + + +def test_grobid_parse_unstructured(): + """ + NOTE: this test makes live network requests to GROBID + """ + + r = grobid_parse_unstructured("blah") + assert r is None + + r = grobid_parse_unstructured( + """Cunningham HB, Weis JJ, Taveras LR, Huerta S. Mesh migration following abdominal hernia repair: a comprehensive review. Hernia. 2019 Apr;23(2):235-243. doi: 10.1007/s10029-019-01898-9. Epub 2019 Jan 30. PMID: 30701369.""" + ) + assert r.title == "Mesh migration following abdominal hernia repair: a comprehensive review" + assert r.contribs[0].surname == "Cunningham" + assert r.contribs[1].surname == "Weis" + assert r.contribs[2].surname == "Taveras" + assert r.contribs[3].surname == "Huerta" + assert r.extra['container_name'] == "Hernia" + assert r.release_year == 2019 + assert r.volume == "23" + assert r.issue == "2" + assert r.pages == "235-243" + assert r.ext_ids.doi == "10.1007/s10029-019-01898-9" + assert r.ext_ids.pmid == "30701369" + + +def test_grobid_parse_unstructured_timeout(): + """ + NOTE: this test makes live network requests to GROBID + """ + with pytest.raises(TimeoutError): + grobid_parse_unstructured("blah", timeout=0.000001) diff --git a/tests/test_simple.py b/tests/test_simple.py new file mode 100644 index 0000000..0c5d216 --- /dev/null +++ b/tests/test_simple.py @@ -0,0 +1,42 @@ +""" +These basically all hit external network services. +""" + +import pytest +import elasticsearch + +from fuzzycat.simple import * +from fuzzycat.config import settings + + +@pytest.fixture +def es_client(): + return elasticsearch.Elasticsearch( + [settings.get("FATCAT_SEARCH_URL", "https://search.fatcat.wiki:443")]) + + +def test_close_fuzzy_unstructured_matches(es_client): + + matches = close_fuzzy_unstructured_matches( + """Cunningham HB, Weis JJ, Taveras LR, Huerta S. Mesh migration following abdominal hernia repair: a comprehensive review. Hernia. 2019 Apr;23(2):235-243. doi: 10.1007/s10029-019-01898-9. Epub 2019 Jan 30. PMID: 30701369.""", + es_client=es_client) + + assert matches + assert matches[0].status.name == "EXACT" + assert matches[0].release.ext_ids.doi == "10.1007/s10029-019-01898-9" + + +def test_close_fuzzy_biblio_matches(es_client): + + matches = close_fuzzy_biblio_matches(dict( + title="Mesh migration following abdominal hernia repair: a comprehensive review", + first_author="Cunningham", + year=2019, + journal="Hernia", + ), + es_client=es_client) + + assert matches + # TODO: should be "STRONG" or "WEAK" without all authors? + assert matches[0].status.name in ("STRONG", "WEAK", "AMBIGUOUS") + assert matches[0].release.ext_ids.doi == "10.1007/s10029-019-01898-9" |