diff options
Diffstat (limited to 'mapreduce/extraction_cdx_grobid.py')
-rwxr-xr-x | mapreduce/extraction_cdx_grobid.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/mapreduce/extraction_cdx_grobid.py b/mapreduce/extraction_cdx_grobid.py index 6690f49..e23950c 100755 --- a/mapreduce/extraction_cdx_grobid.py +++ b/mapreduce/extraction_cdx_grobid.py @@ -40,7 +40,8 @@ sentry_client = raven.Client() class MRExtractCdxGrobid(MRJob): # CDX lines in; JSON status out - HADOOP_INPUT_FORMAT = 'org.apache.hadoop.mapred.lib.NLineInputFormat' + #HADOOP_INPUT_FORMAT = 'org.apache.hadoop.mapred.lib.NLineInputFormat' + #INPUT_PROTOCOL = mrjob.protocol.RawProtocol INPUT_PROTOCOL = mrjob.protocol.RawValueProtocol OUTPUT_PROTOCOL = mrjob.protocol.JSONValueProtocol |