diff options
author | Martin Czygan <martin@archive.org> | 2021-11-04 17:05:09 +0000 |
---|---|---|
committer | Martin Czygan <martin@archive.org> | 2021-11-04 17:05:09 +0000 |
commit | 282f315c6ba3643c8c614220ab2f7e1d55de3658 (patch) | |
tree | ccb9e818e18492708d90411cbe2ff7ba8ce0f5ca /tests | |
parent | 615439df4955ca19bf3fdfa10b41b7d8950b3e63 (diff) | |
parent | 2f41335d268b0e2705a1ebff0ff104e965630837 (diff) | |
download | fuzzycat-282f315c6ba3643c8c614220ab2f7e1d55de3658.tar.gz fuzzycat-282f315c6ba3643c8c614220ab2f7e1d55de3658.zip |
Merge branch 'bnewbold-grobid-tei-xml' into 'master'
use grobid_tei_xml for grobid unstructured lookups
See merge request webgroup/fuzzycat!9
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_grobid_unstructured.py | 58 |
1 files changed, 32 insertions, 26 deletions
diff --git a/tests/test_grobid_unstructured.py b/tests/test_grobid_unstructured.py index dd69936..cf71f91 100644 --- a/tests/test_grobid_unstructured.py +++ b/tests/test_grobid_unstructured.py @@ -1,39 +1,43 @@ import pytest -from fuzzycat.grobid_unstructured import grobid_api_process_citation, grobid_parse_unstructured, grobid_ref_to_release, transform_grobid_ref_xml +from grobid_tei_xml import parse_citation_xml +from grobid_tei_xml.types import GrobidBiblio, GrobidAuthor + +from fuzzycat.grobid_unstructured import grobid_api_process_citation, grobid_parse_unstructured, grobid_ref_to_release def test_grobid_ref_to_release(): - d = { - 'title': - "some title", - 'doi': - '10.1234/5678', - 'journal': - 'some journal', - 'authors': [ - { - 'name': 'ahab sailor', - 'given_name': 'ahab', - 'surname': 'sailor' - }, - { - 'name': 'mary jane', - 'given_name': 'mary', - 'surname': 'jane' - }, + d = GrobidBiblio( + title="some title", + doi='10.1234/5678', + journal='some journal', + authors=[ + GrobidAuthor( + full_name='ahab sailor', + given_name='ahab', + surname='sailor', + ), + GrobidAuthor( + full_name='mary jane', + given_name='mary', + surname='jane' + ), ], - } + ) r = grobid_ref_to_release(d) - assert r.title == d['title'] - assert r.ext_ids.doi == d['doi'] - assert r.extra['container_name'] == d['journal'] - assert r.contribs[0].surname == d['authors'][0]['surname'] - assert r.contribs[1].raw_name == d['authors'][1]['name'] + assert r.title == d.title + assert r.ext_ids.doi == d.doi + assert r.extra['container_name'] == d.journal + assert r.contribs[0].surname == d.authors[0].surname + assert r.contribs[1].raw_name == d.authors[1].full_name def test_transform_grobid_ref_xml(): + """ + This used to be a test of the grobid2json file in this repository. Now it + is a backwards compatibility test for grobid_tei_xml + """ citation_xml = """ <biblStruct > <analytic> @@ -83,7 +87,9 @@ def test_transform_grobid_ref_xml(): </monogr> </biblStruct>""" - d = transform_grobid_ref_xml(citation_xml) + citation = parse_citation_xml(citation_xml) + assert citation + d = citation.to_legacy_dict() assert d['title'] == "Mesh migration following abdominal hernia repair: a comprehensive review" assert d['authors'][2]['given_name'] == "L" assert d['authors'][2]['surname'] == "Taveras" |