From be1eeae2a30907752729a8dd161d0beb2afd9e52 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 6 Apr 2018 15:04:47 -0700 Subject: renamed do_tei --- mapreduce/extraction_cdx_grobid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mapreduce') diff --git a/mapreduce/extraction_cdx_grobid.py b/mapreduce/extraction_cdx_grobid.py index d1ec840..8fa1720 100755 --- a/mapreduce/extraction_cdx_grobid.py +++ b/mapreduce/extraction_cdx_grobid.py @@ -30,7 +30,7 @@ from wayback.resource import ArcResource from wayback.resourcestore import ResourceStore from gwb.loader import CDXLoaderFactory from common import parse_cdx_line -from grobid2json import do_tei +from grobid2json import teixml2json class MRExtractCdxGrobid(MRJob): @@ -147,8 +147,8 @@ class MRExtractCdxGrobid(MRJob): # Convert TEI XML to JSON # TODO: - info['grobid0:tei_json'] = do_tei(grobid_response.content, encumbered=True) - info['grobid0:metadata'] = do_tei(grobid_response.content, encumbered=False) + info['grobid0:tei_json'] = teixml2json(grobid_response.content, encumbered=True) + info['grobid0:metadata'] = teixml2json(grobid_response.content, encumbered=False) # Determine extraction "quality" # TODO: -- cgit v1.2.3