diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | .gitlab-ci.yml | 4 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rwxr-xr-x | please | 6 | ||||
-rw-r--r-- | python/.coveragerc (renamed from mapreduce/.coveragerc) | 0 | ||||
-rw-r--r-- | python/.pylintrc (renamed from mapreduce/.pylintrc) | 0 | ||||
-rw-r--r-- | python/Pipfile (renamed from mapreduce/Pipfile) | 0 | ||||
-rw-r--r-- | python/Pipfile.lock (renamed from mapreduce/Pipfile.lock) | 0 | ||||
-rw-r--r-- | python/README.md (renamed from mapreduce/README.md) | 0 | ||||
-rw-r--r-- | python/TODO (renamed from mapreduce/TODO) | 0 | ||||
-rwxr-xr-x | python/backfill_hbase_from_cdx.py (renamed from mapreduce/backfill_hbase_from_cdx.py) | 0 | ||||
-rw-r--r-- | python/common.py (renamed from mapreduce/common.py) | 0 | ||||
-rwxr-xr-x | python/extraction_cdx_grobid.py (renamed from mapreduce/extraction_cdx_grobid.py) | 0 | ||||
-rwxr-xr-x | python/grobid2json.py (renamed from mapreduce/grobid2json.py) | 0 | ||||
-rw-r--r-- | python/mrjob.conf (renamed from mapreduce/mrjob.conf) | 0 | ||||
-rw-r--r-- | python/pytest.ini (renamed from mapreduce/pytest.ini) | 0 | ||||
-rw-r--r-- | python/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml (renamed from mapreduce/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml) | 0 | ||||
-rw-r--r-- | python/tests/files/example.cdx (renamed from mapreduce/tests/files/example.cdx) | 0 | ||||
-rw-r--r-- | python/tests/files/small.json (renamed from mapreduce/tests/files/small.json) | 0 | ||||
-rw-r--r-- | python/tests/files/small.xml (renamed from mapreduce/tests/files/small.xml) | 0 | ||||
-rw-r--r-- | python/tests/test_backfill_hbase_from_cdx.py (renamed from mapreduce/tests/test_backfill_hbase_from_cdx.py) | 0 | ||||
-rw-r--r-- | python/tests/test_common.py (renamed from mapreduce/tests/test_common.py) | 0 | ||||
-rw-r--r-- | python/tests/test_extraction_cdx_grobid.py (renamed from mapreduce/tests/test_extraction_cdx_grobid.py) | 0 | ||||
-rw-r--r-- | python/tests/test_grobid2json.py (renamed from mapreduce/tests/test_grobid2json.py) | 0 | ||||
-rw-r--r-- | python/xml2json.py (renamed from mapreduce/xml2json.py) | 0 |
25 files changed, 7 insertions, 7 deletions
@@ -2,7 +2,7 @@ mapreduce-*.tar.gz *,cover htmlcov/ -mapreduce/venv-current.tar.gz +python/venv-current.tar.gz *.test *.o diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 88f0cdd..da405d9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,9 +9,9 @@ before_script: - pip3 install pipenv - pipenv --version -test_python_mapreduce: +test_python: script: - - cd mapreduce + - cd python - pipenv install --dev --deploy - pipenv run pytest --cov - pipenv run pylint --disable bad-continuation,arguments-differ,unidiomatic-typecheck *.py @@ -29,7 +29,7 @@ On macOS: Each directory has it's own environment. Do something like: - cd mapreduce + cd python pipenv install --dev pipenv shell @@ -18,7 +18,7 @@ GROBID_URI = "http://wbgrp-svc096.us.archive.org:8070" def rebuild_python(): print("Rebuilding python venv...") - cmd = """cd mapreduce; + cmd = """cd python; export PIPENV_VENV_IN_PROJECT=1; pipenv install --deploy tar -czf venv-current.tar.gz -C .venv .""" @@ -37,7 +37,7 @@ def run_backfill(args): HDFS_DIR, args.env, datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S")) - cmd = """cd mapreduce; + cmd = """cd python; pipenv run ./backfill_hbase_from_cdx.py \ --hbase-host {hbase_host} \ --hbase-table wbgrp-journal-extract-0-{env} \ @@ -57,7 +57,7 @@ def run_extract(args): HDFS_DIR, args.env, datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S")) - cmd = """cd mapreduce; + cmd = """cd python; pipenv run ./extraction_cdx_grobid.py \ --hbase-host {hbase_host} \ --hbase-table wbgrp-journal-extract-0-{env} \ diff --git a/mapreduce/.coveragerc b/python/.coveragerc index 6235f57..6235f57 100644 --- a/mapreduce/.coveragerc +++ b/python/.coveragerc diff --git a/mapreduce/.pylintrc b/python/.pylintrc index 78e9e7f..78e9e7f 100644 --- a/mapreduce/.pylintrc +++ b/python/.pylintrc diff --git a/mapreduce/Pipfile b/python/Pipfile index 129b23e..129b23e 100644 --- a/mapreduce/Pipfile +++ b/python/Pipfile diff --git a/mapreduce/Pipfile.lock b/python/Pipfile.lock index f21e01b..f21e01b 100644 --- a/mapreduce/Pipfile.lock +++ b/python/Pipfile.lock diff --git a/mapreduce/README.md b/python/README.md index aebc160..aebc160 100644 --- a/mapreduce/README.md +++ b/python/README.md diff --git a/mapreduce/TODO b/python/TODO index 4f4db16..4f4db16 100644 --- a/mapreduce/TODO +++ b/python/TODO diff --git a/mapreduce/backfill_hbase_from_cdx.py b/python/backfill_hbase_from_cdx.py index 6b2ec0b..6b2ec0b 100755 --- a/mapreduce/backfill_hbase_from_cdx.py +++ b/python/backfill_hbase_from_cdx.py diff --git a/mapreduce/common.py b/python/common.py index 6710044..6710044 100644 --- a/mapreduce/common.py +++ b/python/common.py diff --git a/mapreduce/extraction_cdx_grobid.py b/python/extraction_cdx_grobid.py index 040538c..040538c 100755 --- a/mapreduce/extraction_cdx_grobid.py +++ b/python/extraction_cdx_grobid.py diff --git a/mapreduce/grobid2json.py b/python/grobid2json.py index ca460f8..ca460f8 100755 --- a/mapreduce/grobid2json.py +++ b/python/grobid2json.py diff --git a/mapreduce/mrjob.conf b/python/mrjob.conf index 6f36196..6f36196 100644 --- a/mapreduce/mrjob.conf +++ b/python/mrjob.conf diff --git a/mapreduce/pytest.ini b/python/pytest.ini index 0a5e921..0a5e921 100644 --- a/mapreduce/pytest.ini +++ b/python/pytest.ini diff --git a/mapreduce/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml b/python/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml index dbc8be5..dbc8be5 100644 --- a/mapreduce/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml +++ b/python/tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml diff --git a/mapreduce/tests/files/example.cdx b/python/tests/files/example.cdx index 84e3271..84e3271 100644 --- a/mapreduce/tests/files/example.cdx +++ b/python/tests/files/example.cdx diff --git a/mapreduce/tests/files/small.json b/python/tests/files/small.json index 208fb49..208fb49 100644 --- a/mapreduce/tests/files/small.json +++ b/python/tests/files/small.json diff --git a/mapreduce/tests/files/small.xml b/python/tests/files/small.xml index 78b9ba2..78b9ba2 100644 --- a/mapreduce/tests/files/small.xml +++ b/python/tests/files/small.xml diff --git a/mapreduce/tests/test_backfill_hbase_from_cdx.py b/python/tests/test_backfill_hbase_from_cdx.py index 070662b..070662b 100644 --- a/mapreduce/tests/test_backfill_hbase_from_cdx.py +++ b/python/tests/test_backfill_hbase_from_cdx.py diff --git a/mapreduce/tests/test_common.py b/python/tests/test_common.py index 34d50ed..34d50ed 100644 --- a/mapreduce/tests/test_common.py +++ b/python/tests/test_common.py diff --git a/mapreduce/tests/test_extraction_cdx_grobid.py b/python/tests/test_extraction_cdx_grobid.py index 1bf2420..1bf2420 100644 --- a/mapreduce/tests/test_extraction_cdx_grobid.py +++ b/python/tests/test_extraction_cdx_grobid.py diff --git a/mapreduce/tests/test_grobid2json.py b/python/tests/test_grobid2json.py index 8497b10..8497b10 100644 --- a/mapreduce/tests/test_grobid2json.py +++ b/python/tests/test_grobid2json.py diff --git a/mapreduce/xml2json.py b/python/xml2json.py index df5064f..df5064f 100644 --- a/mapreduce/xml2json.py +++ b/python/xml2json.py |