aboutsummaryrefslogtreecommitdiffstats
path: root/python
ModeNameSize
-rw-r--r--.coveragerc32logstatsplain
-rw-r--r--.gitignore29logstatsplain
-rw-r--r--.pylintrc409logstatsplain
-rw-r--r--Pipfile522logstatsplain
-rw-r--r--Pipfile.lock52174logstatsplain
-rw-r--r--README.md3563logstatsplain
-rw-r--r--TODO52logstatsplain
-rwxr-xr-xbackfill_hbase_from_cdx.py2896logstatsplain
-rw-r--r--common.py2618logstatsplain
-rwxr-xr-xenrich_scored_matches.py938logstatsplain
-rwxr-xr-xextraction_cdx_grobid.py11023logstatsplain
-rwxr-xr-xextraction_ungrobided.py10637logstatsplain
-rwxr-xr-xfilter_grobid_metadata.py4621logstatsplain
-rwxr-xr-xfilter_scored_matches.py3432logstatsplain
-rwxr-xr-xgrobid2json.py5122logstatsplain
-rwxr-xr-ximport_grobid_metadata.py2426logstatsplain
-rwxr-xr-xkafka_grobid.py11273logstatsplain
-rwxr-xr-xkafka_grobid_hbase.py6999logstatsplain
-rwxr-xr-xmanifest_converter.py1594logstatsplain
-rw-r--r--mrjob.conf466logstatsplain
-rw-r--r--pytest.ini171logstatsplain
d---------tests294logstatsplain
l---------title_slug_blacklist.txt -> ../scalding/src/main/resources/slug-denylist.txt48logstatsplain
-rw-r--r--xml2json.py199logstatsplain