diff options
Diffstat (limited to 'mapreduce')
| -rwxr-xr-x | mapreduce/extraction_cdx_grobid.py | 3 | 
1 files changed, 2 insertions, 1 deletions
| diff --git a/mapreduce/extraction_cdx_grobid.py b/mapreduce/extraction_cdx_grobid.py index 6690f49..e23950c 100755 --- a/mapreduce/extraction_cdx_grobid.py +++ b/mapreduce/extraction_cdx_grobid.py @@ -40,7 +40,8 @@ sentry_client = raven.Client()  class MRExtractCdxGrobid(MRJob):      # CDX lines in; JSON status out -    HADOOP_INPUT_FORMAT = 'org.apache.hadoop.mapred.lib.NLineInputFormat' +    #HADOOP_INPUT_FORMAT = 'org.apache.hadoop.mapred.lib.NLineInputFormat' +    #INPUT_PROTOCOL = mrjob.protocol.RawProtocol      INPUT_PROTOCOL = mrjob.protocol.RawValueProtocol      OUTPUT_PROTOCOL = mrjob.protocol.JSONValueProtocol | 
