diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 19:10:35 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 19:10:35 -0700 |
commit | a0e275a4bad46ef41585f0207d6dfa1e3c38bc35 (patch) | |
tree | 92dead8a85e6ff38808beefada8a42693261ceff /python/grobid_tool.py | |
parent | 40adf5ed09d917b8a4b8f75680bbf90c147848b3 (diff) | |
download | sandcrawler-a0e275a4bad46ef41585f0207d6dfa1e3c38bc35.tar.gz sandcrawler-a0e275a4bad46ef41585f0207d6dfa1e3c38bc35.zip |
remove grobid2json helper file, replace with grobid_tei_xml
Diffstat (limited to 'python/grobid_tool.py')
-rwxr-xr-x | python/grobid_tool.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/grobid_tool.py b/python/grobid_tool.py index f85d243..f99a78b 100755 --- a/python/grobid_tool.py +++ b/python/grobid_tool.py @@ -12,7 +12,8 @@ import argparse import json import sys -from grobid2json import teixml2json +from grobid_tei_xml import parse_document_xml + from sandcrawler import * @@ -75,7 +76,8 @@ def run_transform(args): if args.metadata_only: out = grobid_client.metadata(line) else: - out = teixml2json(line["tei_xml"]) + tei_doc = parse_document_xml(line["tei_xml"]) + out = tei_doc.to_legacy_dict() if out: if "source" in line: out["source"] = line["source"] |