aboutsummaryrefslogtreecommitdiffstats
path: root/python/grobid_tool.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-27 19:10:35 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-27 19:10:35 -0700
commita0e275a4bad46ef41585f0207d6dfa1e3c38bc35 (patch)
tree92dead8a85e6ff38808beefada8a42693261ceff /python/grobid_tool.py
parent40adf5ed09d917b8a4b8f75680bbf90c147848b3 (diff)
downloadsandcrawler-a0e275a4bad46ef41585f0207d6dfa1e3c38bc35.tar.gz
sandcrawler-a0e275a4bad46ef41585f0207d6dfa1e3c38bc35.zip
remove grobid2json helper file, replace with grobid_tei_xml
Diffstat (limited to 'python/grobid_tool.py')
-rwxr-xr-xpython/grobid_tool.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/grobid_tool.py b/python/grobid_tool.py
index f85d243..f99a78b 100755
--- a/python/grobid_tool.py
+++ b/python/grobid_tool.py
@@ -12,7 +12,8 @@ import argparse
import json
import sys
-from grobid2json import teixml2json
+from grobid_tei_xml import parse_document_xml
+
from sandcrawler import *
@@ -75,7 +76,8 @@ def run_transform(args):
if args.metadata_only:
out = grobid_client.metadata(line)
else:
- out = teixml2json(line["tei_xml"])
+ tei_doc = parse_document_xml(line["tei_xml"])
+ out = tei_doc.to_legacy_dict()
if out:
if "source" in line:
out["source"] = line["source"]