aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--.gitlab-ci.yml4
-rw-r--r--README.md2
-rwxr-xr-xplease6
-rw-r--r--python/.coveragerc (renamed from mapreduce/.coveragerc)0
-rw-r--r--python/.pylintrc (renamed from mapreduce/.pylintrc)0
-rw-r--r--python/Pipfile (renamed from mapreduce/Pipfile)0
-rw-r--r--python/Pipfile.lock (renamed from mapreduce/Pipfile.lock)0
-rw-r--r--python/README.md (renamed from mapreduce/README.md)0
-rw-r--r--python/TODO (renamed from mapreduce/TODO)0
-rwxr-xr-xpython/backfill_hbase_from_cdx.py (renamed from mapreduce/backfill_hbase_from_cdx.py)0
-rw-r--r--python/common.py (renamed from mapreduce/common.py)0
-rwxr-xr-xpython/extraction_cdx_grobid.py (renamed from mapreduce/extraction_cdx_grobid.py)0
-rwxr-xr-xpython/grobid2json.py (renamed from mapreduce/grobid2json.py)0
-rw-r--r--python/mrjob.conf (renamed from mapreduce/mrjob.conf)0
-rw-r--r--python/pytest.ini (renamed from mapreduce/pytest.ini)0
-rw-r--r--python/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml (renamed from mapreduce/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml)0
-rw-r--r--python/tests/files/example.cdx (renamed from mapreduce/tests/files/example.cdx)0
-rw-r--r--python/tests/files/small.json (renamed from mapreduce/tests/files/small.json)0
-rw-r--r--python/tests/files/small.xml (renamed from mapreduce/tests/files/small.xml)0
-rw-r--r--python/tests/test_backfill_hbase_from_cdx.py (renamed from mapreduce/tests/test_backfill_hbase_from_cdx.py)0
-rw-r--r--python/tests/test_common.py (renamed from mapreduce/tests/test_common.py)0
-rw-r--r--python/tests/test_extraction_cdx_grobid.py (renamed from mapreduce/tests/test_extraction_cdx_grobid.py)0
-rw-r--r--python/tests/test_grobid2json.py (renamed from mapreduce/tests/test_grobid2json.py)0
-rw-r--r--python/xml2json.py (renamed from mapreduce/xml2json.py)0
25 files changed, 7 insertions, 7 deletions
diff --git a/.gitignore b/.gitignore
index 182246f..5723f96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
mapreduce-*.tar.gz
*,cover
htmlcov/
-mapreduce/venv-current.tar.gz
+python/venv-current.tar.gz
*.test
*.o
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 88f0cdd..da405d9 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -9,9 +9,9 @@ before_script:
- pip3 install pipenv
- pipenv --version
-test_python_mapreduce:
+test_python:
script:
- - cd mapreduce
+ - cd python
- pipenv install --dev --deploy
- pipenv run pytest --cov
- pipenv run pylint --disable bad-continuation,arguments-differ,unidiomatic-typecheck *.py
diff --git a/README.md b/README.md
index e53e775..b322438 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ On macOS:
Each directory has it's own environment. Do something like:
- cd mapreduce
+ cd python
pipenv install --dev
pipenv shell
diff --git a/please b/please
index 5449cee..2021b0a 100755
--- a/please
+++ b/please
@@ -18,7 +18,7 @@ GROBID_URI = "http://wbgrp-svc096.us.archive.org:8070"
def rebuild_python():
print("Rebuilding python venv...")
- cmd = """cd mapreduce;
+ cmd = """cd python;
export PIPENV_VENV_IN_PROJECT=1;
pipenv install --deploy
tar -czf venv-current.tar.gz -C .venv ."""
@@ -37,7 +37,7 @@ def run_backfill(args):
HDFS_DIR,
args.env,
datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S"))
- cmd = """cd mapreduce;
+ cmd = """cd python;
pipenv run ./backfill_hbase_from_cdx.py \
--hbase-host {hbase_host} \
--hbase-table wbgrp-journal-extract-0-{env} \
@@ -57,7 +57,7 @@ def run_extract(args):
HDFS_DIR,
args.env,
datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S"))
- cmd = """cd mapreduce;
+ cmd = """cd python;
pipenv run ./extraction_cdx_grobid.py \
--hbase-host {hbase_host} \
--hbase-table wbgrp-journal-extract-0-{env} \
diff --git a/mapreduce/.coveragerc b/python/.coveragerc
index 6235f57..6235f57 100644
--- a/mapreduce/.coveragerc
+++ b/python/.coveragerc
diff --git a/mapreduce/.pylintrc b/python/.pylintrc
index 78e9e7f..78e9e7f 100644
--- a/mapreduce/.pylintrc
+++ b/python/.pylintrc
diff --git a/mapreduce/Pipfile b/python/Pipfile
index 129b23e..129b23e 100644
--- a/mapreduce/Pipfile
+++ b/python/Pipfile
diff --git a/mapreduce/Pipfile.lock b/python/Pipfile.lock
index f21e01b..f21e01b 100644
--- a/mapreduce/Pipfile.lock
+++ b/python/Pipfile.lock
diff --git a/mapreduce/README.md b/python/README.md
index aebc160..aebc160 100644
--- a/mapreduce/README.md
+++ b/python/README.md
diff --git a/mapreduce/TODO b/python/TODO
index 4f4db16..4f4db16 100644
--- a/mapreduce/TODO
+++ b/python/TODO
diff --git a/mapreduce/backfill_hbase_from_cdx.py b/python/backfill_hbase_from_cdx.py
index 6b2ec0b..6b2ec0b 100755
--- a/mapreduce/backfill_hbase_from_cdx.py
+++ b/python/backfill_hbase_from_cdx.py
diff --git a/mapreduce/common.py b/python/common.py
index 6710044..6710044 100644
--- a/mapreduce/common.py
+++ b/python/common.py
diff --git a/mapreduce/extraction_cdx_grobid.py b/python/extraction_cdx_grobid.py
index 040538c..040538c 100755
--- a/mapreduce/extraction_cdx_grobid.py
+++ b/python/extraction_cdx_grobid.py
diff --git a/mapreduce/grobid2json.py b/python/grobid2json.py
index ca460f8..ca460f8 100755
--- a/mapreduce/grobid2json.py
+++ b/python/grobid2json.py
diff --git a/mapreduce/mrjob.conf b/python/mrjob.conf
index 6f36196..6f36196 100644
--- a/mapreduce/mrjob.conf
+++ b/python/mrjob.conf
diff --git a/mapreduce/pytest.ini b/python/pytest.ini
index 0a5e921..0a5e921 100644
--- a/mapreduce/pytest.ini
+++ b/python/pytest.ini
diff --git a/mapreduce/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml b/python/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml
index dbc8be5..dbc8be5 100644
--- a/mapreduce/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml
+++ b/python/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml
diff --git a/mapreduce/tests/files/example.cdx b/python/tests/files/example.cdx
index 84e3271..84e3271 100644
--- a/mapreduce/tests/files/example.cdx
+++ b/python/tests/files/example.cdx
diff --git a/mapreduce/tests/files/small.json b/python/tests/files/small.json
index 208fb49..208fb49 100644
--- a/mapreduce/tests/files/small.json
+++ b/python/tests/files/small.json
diff --git a/mapreduce/tests/files/small.xml b/python/tests/files/small.xml
index 78b9ba2..78b9ba2 100644
--- a/mapreduce/tests/files/small.xml
+++ b/python/tests/files/small.xml
diff --git a/mapreduce/tests/test_backfill_hbase_from_cdx.py b/python/tests/test_backfill_hbase_from_cdx.py
index 070662b..070662b 100644
--- a/mapreduce/tests/test_backfill_hbase_from_cdx.py
+++ b/python/tests/test_backfill_hbase_from_cdx.py
diff --git a/mapreduce/tests/test_common.py b/python/tests/test_common.py
index 34d50ed..34d50ed 100644
--- a/mapreduce/tests/test_common.py
+++ b/python/tests/test_common.py
diff --git a/mapreduce/tests/test_extraction_cdx_grobid.py b/python/tests/test_extraction_cdx_grobid.py
index 1bf2420..1bf2420 100644
--- a/mapreduce/tests/test_extraction_cdx_grobid.py
+++ b/python/tests/test_extraction_cdx_grobid.py
diff --git a/mapreduce/tests/test_grobid2json.py b/python/tests/test_grobid2json.py
index 8497b10..8497b10 100644
--- a/mapreduce/tests/test_grobid2json.py
+++ b/python/tests/test_grobid2json.py
diff --git a/mapreduce/xml2json.py b/python/xml2json.py
index df5064f..df5064f 100644
--- a/mapreduce/xml2json.py
+++ b/python/xml2json.py