aboutsummaryrefslogtreecommitdiffstats
path: root/python
ModeNameSize
-rw-r--r--.coveragerc32logstatsplain
-rw-r--r--.gitignore29logstatsplain
-rw-r--r--.pylintrc409logstatsplain
-rw-r--r--Pipfile651logstatsplain
-rw-r--r--Pipfile.lock62666logstatsplain
-rw-r--r--README.md3563logstatsplain
-rw-r--r--TODO52logstatsplain
-rwxr-xr-xbackfill_hbase_from_cdx.py2896logstatsplain
-rw-r--r--common.py2618logstatsplain
-rwxr-xr-xdeliver_dumpgrobid_to_s3.py4092logstatsplain
-rwxr-xr-xdeliver_gwb_to_disk.py7109logstatsplain
-rwxr-xr-xdeliver_gwb_to_s3.py7663logstatsplain
-rwxr-xr-xenrich_scored_matches.py938logstatsplain
-rwxr-xr-xextraction_cdx_grobid.py11769logstatsplain
-rwxr-xr-xextraction_ungrobided.py11383logstatsplain
-rwxr-xr-xfilter_grobid_metadata.py4621logstatsplain
-rwxr-xr-xfilter_groupworks.py4233logstatsplain
-rwxr-xr-xfilter_scored_matches.py3432logstatsplain
-rwxr-xr-xgrobid2json.py5595logstatsplain
-rwxr-xr-xia_pdf_match.py2889logstatsplain
-rwxr-xr-ximport_grobid_metadata.py2426logstatsplain
-rwxr-xr-xkafka_grobid.py13599logstatsplain
-rwxr-xr-xkafka_grobid_hbase.py7413logstatsplain
-rwxr-xr-xmanifest_converter.py1594logstatsplain
-rw-r--r--mrjob.conf466logstatsplain
-rw-r--r--pytest.ini171logstatsplain
d---------sandcrawler111logstatsplain
d---------tests334logstatsplain
l---------title_slug_blacklist.txt -> ../scalding/src/main/resources/slug-denylist.txt48logstatsplain
-rw-r--r--xml2json.py199logstatsplain