aboutsummaryrefslogtreecommitdiffstats
path: root/python_hadoop/mrjob.conf
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-09-25 17:51:07 -0700
committerBryan Newbold <bnewbold@archive.org>2019-09-25 17:51:07 -0700
commitd7830b4a5aad0a59a588e98798711f0e694d50d6 (patch)
tree7565cbec74584a146b8ee79bb881fa9f78851f60 /python_hadoop/mrjob.conf
parent6e24eec4b6d1861eba37a0a05220b257e829ebbb (diff)
downloadsandcrawler-d7830b4a5aad0a59a588e98798711f0e694d50d6.tar.gz
sandcrawler-d7830b4a5aad0a59a588e98798711f0e694d50d6.zip
refactor old python hadoop code into new directory
Diffstat (limited to 'python_hadoop/mrjob.conf')
-rw-r--r--python_hadoop/mrjob.conf16
1 files changed, 16 insertions, 0 deletions
diff --git a/python_hadoop/mrjob.conf b/python_hadoop/mrjob.conf
new file mode 100644
index 0000000..6f36196
--- /dev/null
+++ b/python_hadoop/mrjob.conf
@@ -0,0 +1,16 @@
+runners:
+ local:
+ upload_files:
+ - common.py
+ - grobid2json.py
+ setup:
+ - export PYTHONPATH=$PYTHONPATH:venv/lib/python3.5/site-packages/
+ hadoop:
+ no_output: true
+ upload_files:
+ - common.py
+ - grobid2json.py
+ setup:
+ - export PYTHONPATH=$PYTHONPATH:venv/lib/python3.5/site-packages/
+ cmdenv:
+ SENTRY_DSN: https://6ab6ad080d034280b863f294e07cc5c6:414ebf0b68634f669d2dc00d7c935699@books-sentry.us.archive.org/9