diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-22 13:45:47 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-22 13:45:47 -0700 |
commit | ff673bc6be7098efb5a6297d990955761bffc7e6 (patch) | |
tree | 91f36e66bc98c002eb18053b89c2f917523bf4e8 /tests/test_parse.py | |
parent | 8cd413e2ad07bae6bf3ae940d7c4b94b4be274fa (diff) | |
download | grobid_tei_xml-ff673bc6be7098efb5a6297d990955761bffc7e6.tar.gz grobid_tei_xml-ff673bc6be7098efb5a6297d990955761bffc7e6.zip |
to_legacy_dict() helper, and start adding some new fields
Diffstat (limited to 'tests/test_parse.py')
-rw-r--r-- | tests/test_parse.py | 15 |
1 files changed, 2 insertions, 13 deletions
diff --git a/tests/test_parse.py b/tests/test_parse.py index 30b2926..825b561 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -75,25 +75,14 @@ def test_small_xml() -> None: assert doc == expected -def test_small_xml_json() -> None: +def test_small_xml_legacy() -> None: with open('tests/files/small.xml', 'r') as f: tei_xml = f.read() with open('tests/files/small.json', 'r') as f: json_form = json.loads(f.read()) - d = parse_document_xml(tei_xml).to_dict() - - # munge back to the old JSON format - d.update(d.pop('header')) - addr = d['authors'][0]['affiliation']['address'] - addr['postCode'] = addr.pop('post_code') - - # remove nulls from old JSON - for c in json_form['citations']: - for k in list(c.keys()): - if c[k] is None: - c.pop(k) + d = parse_document_xml(tei_xml).to_legacy_dict() assert d == json_form |