aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorMartin Czygan <martin@archive.org>2021-11-04 17:05:09 +0000
committerMartin Czygan <martin@archive.org>2021-11-04 17:05:09 +0000
commit282f315c6ba3643c8c614220ab2f7e1d55de3658 (patch)
treeccb9e818e18492708d90411cbe2ff7ba8ce0f5ca /tests
parent615439df4955ca19bf3fdfa10b41b7d8950b3e63 (diff)
parent2f41335d268b0e2705a1ebff0ff104e965630837 (diff)
downloadfuzzycat-282f315c6ba3643c8c614220ab2f7e1d55de3658.tar.gz
fuzzycat-282f315c6ba3643c8c614220ab2f7e1d55de3658.zip
Merge branch 'bnewbold-grobid-tei-xml' into 'master'
use grobid_tei_xml for grobid unstructured lookups See merge request webgroup/fuzzycat!9
Diffstat (limited to 'tests')
-rw-r--r--tests/test_grobid_unstructured.py58
1 files changed, 32 insertions, 26 deletions
diff --git a/tests/test_grobid_unstructured.py b/tests/test_grobid_unstructured.py
index dd69936..cf71f91 100644
--- a/tests/test_grobid_unstructured.py
+++ b/tests/test_grobid_unstructured.py
@@ -1,39 +1,43 @@
import pytest
-from fuzzycat.grobid_unstructured import grobid_api_process_citation, grobid_parse_unstructured, grobid_ref_to_release, transform_grobid_ref_xml
+from grobid_tei_xml import parse_citation_xml
+from grobid_tei_xml.types import GrobidBiblio, GrobidAuthor
+
+from fuzzycat.grobid_unstructured import grobid_api_process_citation, grobid_parse_unstructured, grobid_ref_to_release
def test_grobid_ref_to_release():
- d = {
- 'title':
- "some title",
- 'doi':
- '10.1234/5678',
- 'journal':
- 'some journal',
- 'authors': [
- {
- 'name': 'ahab sailor',
- 'given_name': 'ahab',
- 'surname': 'sailor'
- },
- {
- 'name': 'mary jane',
- 'given_name': 'mary',
- 'surname': 'jane'
- },
+ d = GrobidBiblio(
+ title="some title",
+ doi='10.1234/5678',
+ journal='some journal',
+ authors=[
+ GrobidAuthor(
+ full_name='ahab sailor',
+ given_name='ahab',
+ surname='sailor',
+ ),
+ GrobidAuthor(
+ full_name='mary jane',
+ given_name='mary',
+ surname='jane'
+ ),
],
- }
+ )
r = grobid_ref_to_release(d)
- assert r.title == d['title']
- assert r.ext_ids.doi == d['doi']
- assert r.extra['container_name'] == d['journal']
- assert r.contribs[0].surname == d['authors'][0]['surname']
- assert r.contribs[1].raw_name == d['authors'][1]['name']
+ assert r.title == d.title
+ assert r.ext_ids.doi == d.doi
+ assert r.extra['container_name'] == d.journal
+ assert r.contribs[0].surname == d.authors[0].surname
+ assert r.contribs[1].raw_name == d.authors[1].full_name
def test_transform_grobid_ref_xml():
+ """
+ This used to be a test of the grobid2json file in this repository. Now it
+ is a backwards compatibility test for grobid_tei_xml
+ """
citation_xml = """
<biblStruct >
<analytic>
@@ -83,7 +87,9 @@ def test_transform_grobid_ref_xml():
</monogr>
</biblStruct>"""
- d = transform_grobid_ref_xml(citation_xml)
+ citation = parse_citation_xml(citation_xml)
+ assert citation
+ d = citation.to_legacy_dict()
assert d['title'] == "Mesh migration following abdominal hernia repair: a comprehensive review"
assert d['authors'][2]['given_name'] == "L"
assert d['authors'][2]['surname'] == "Taveras"