diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-08-26 05:15:21 +0000 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-08-26 05:15:21 +0000 |
commit | a71d556763b4031bfa0e56abc72348d7f1d3d966 (patch) | |
tree | cb7750cfb29197bcb9c6b67afe1fc04ed7d6a12b /please | |
parent | 45cc6f57f8c487f53a2946922acbc3519c0e25ee (diff) | |
download | sandcrawler-a71d556763b4031bfa0e56abc72348d7f1d3d966.tar.gz sandcrawler-a71d556763b4031bfa0e56abc72348d7f1d3d966.zip |
please: save extraction output
Diffstat (limited to 'please')
-rwxr-xr-x | please | 6 |
1 files changed, 6 insertions, 0 deletions
@@ -64,12 +64,15 @@ def run_extract(args): --grobid-uri {grobid_uri} \ -r hadoop \ -c mrjob.conf \ + --output-dir {output} \ + --no-output \ --archive venv-current.tar.gz#venv \ --jobconf mapred.line.input.format.linespermap=8000 \ --jobconf mapreduce.job.queuename=extraction \ --jobconf mapred.task.timeout=3600000 \ {input_cdx} """.format(hbase_host=HBASE_HOST, env=args.env, + output=output, input_cdx=args.input_cdx, grobid_uri=GROBID_URI) subprocess.call(cmd, shell=True) @@ -89,6 +92,8 @@ def run_extract_ungrobided(args): --grobid-uri {grobid_uri} \ -r hadoop \ -c mrjob.conf \ + --output-dir {output} \ + --no-output \ --archive venv-current.tar.gz#venv \ --jobconf mapred.line.input.format.linespermap=8000 \ --jobconf mapreduce.job.queuename=extraction \ @@ -96,6 +101,7 @@ def run_extract_ungrobided(args): {input_ungrobided} """.format(hbase_host=HBASE_HOST, env=args.env, input_ungrobided=args.input_ungrobided, + output=output, grobid_uri=GROBID_URI) subprocess.call(cmd, shell=True) |