diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-04-06 12:39:49 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-04-06 12:39:49 -0700 |
commit | 114c6b611148d2ff499bcea302eee0eca00df647 (patch) | |
tree | df929050d3aa9484f78e5c1807bc951ce1e85512 /mapreduce/extraction_cdx_grobid.py | |
parent | e68d43e2369eed7ddf288be8c8f2edd0a85974e1 (diff) | |
download | sandcrawler-114c6b611148d2ff499bcea302eee0eca00df647.tar.gz sandcrawler-114c6b611148d2ff499bcea302eee0eca00df647.zip |
small grobid2json test
Diffstat (limited to 'mapreduce/extraction_cdx_grobid.py')
-rwxr-xr-x | mapreduce/extraction_cdx_grobid.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/mapreduce/extraction_cdx_grobid.py b/mapreduce/extraction_cdx_grobid.py index a4a13f8..63f290a 100755 --- a/mapreduce/extraction_cdx_grobid.py +++ b/mapreduce/extraction_cdx_grobid.py @@ -61,6 +61,7 @@ class MRExtractCdxGrobid(MRJob): r = requests.post(self.options.grobid_uri + "/api/processFulltextDocument", files={'input': content}) if r.status_code is not 200: + # if invalid file, get a 400 with JSON body with 'description' key (and others) # XXX: return None return r |