From a0e275a4bad46ef41585f0207d6dfa1e3c38bc35 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 27 Oct 2021 19:10:35 -0700 Subject: remove grobid2json helper file, replace with grobid_tei_xml --- python/scripts/grobid_affiliations.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'python/scripts/grobid_affiliations.py') diff --git a/python/scripts/grobid_affiliations.py b/python/scripts/grobid_affiliations.py index b01e46a..90a0f77 100755 --- a/python/scripts/grobid_affiliations.py +++ b/python/scripts/grobid_affiliations.py @@ -12,7 +12,7 @@ Run in bulk like: import json import sys -from grobid2json import teixml2json +from grobid_tei_xml import parse_document_xml def parse_hbase(line): @@ -38,7 +38,9 @@ def run(mode="hbase"): else: raise NotImplementedError("parse mode: {}".format(mode)) - obj = teixml2json(tei_xml, encumbered=False) + tei_doc = parse_document_xml(tei_xml) + tei_doc.remove_encumbered() + obj = tei_doc.to_legacy_dict() affiliations = [] for author in obj["authors"]: -- cgit v1.2.3