aboutsummaryrefslogtreecommitdiffstats
path: root/mapreduce/extraction_cdx_grobid.py
diff options
context:
space:
mode:
Diffstat (limited to 'mapreduce/extraction_cdx_grobid.py')
-rwxr-xr-xmapreduce/extraction_cdx_grobid.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/mapreduce/extraction_cdx_grobid.py b/mapreduce/extraction_cdx_grobid.py
index a4a13f8..63f290a 100755
--- a/mapreduce/extraction_cdx_grobid.py
+++ b/mapreduce/extraction_cdx_grobid.py
@@ -61,6 +61,7 @@ class MRExtractCdxGrobid(MRJob):
r = requests.post(self.options.grobid_uri + "/api/processFulltextDocument",
files={'input': content})
if r.status_code is not 200:
+ # if invalid file, get a 400 with JSON body with 'description' key (and others)
# XXX:
return None
return r