summaryrefslogtreecommitdiffstats
path: root/tests/test_refs_transform.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-09-14 00:15:57 -0700
committerBryan Newbold <bnewbold@archive.org>2020-09-14 00:15:57 -0700
commit71ce30e7547871cb6fe02fa4237af735bd6b9c49 (patch)
treed3fc00dfc5619fbd942ccb4c166fcbbae6d9c672 /tests/test_refs_transform.py
parent419311b5ab69938e3a90a97353038435c50c732c (diff)
downloadfatcat-scholar-71ce30e7547871cb6fe02fa4237af735bd6b9c49.tar.gz
fatcat-scholar-71ce30e7547871cb6fe02fa4237af735bd6b9c49.zip
minimum viable tests for GROBID XML parsing and refs transform
Diffstat (limited to 'tests/test_refs_transform.py')
-rw-r--r--tests/test_refs_transform.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/tests/test_refs_transform.py b/tests/test_refs_transform.py
new file mode 100644
index 0000000..c26ee1e
--- /dev/null
+++ b/tests/test_refs_transform.py
@@ -0,0 +1,33 @@
+from fatcat_openapi_client import ReleaseEntity
+
+from fatcat_scholar.grobid2json import teixml2json
+from fatcat_scholar.transform import refs_from_grobid
+
+
+def test_transform_refs_grobid() -> None:
+
+ with open("tests/files/example_grobid.tei.xml", "r") as f:
+ blob = f.read()
+
+ dummy_release = ReleaseEntity(
+ ident="releasedummy22222222222222",
+ work_id="workdummy22222222222222222",
+ release_year=1234,
+ ext_ids={},
+ )
+
+ tei_dict = teixml2json(blob, True)
+ refs = refs_from_grobid(dummy_release, tei_dict)
+
+ ref = refs[12].biblio
+ assert ref.contrib_raw_names is not None
+ assert ref.contrib_raw_names[0] == "K Tasa"
+ assert ref.container_name == "Quality Management in Health Care"
+ assert ref.title == "Using patient feedback for quality improvement"
+ assert ref.year == 1996
+ assert ref.pages == "206-225"
+ assert ref.volume == "8"
+ assert (
+ ref.unstructured
+ == "Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."
+ )