/python/
../
.coveragerc
.gitignore
.pylintrc
Pipfile
Pipfile.lock
README.md
TODO
backfill_hbase_from_cdx.py
common.py
deliver_dumpgrobid_to_s3.py
deliver_gwb_to_disk.py
deliver_gwb_to_s3.py
enrich_scored_matches.py
extraction_cdx_grobid.py
extraction_ungrobided.py
filter_grobid_metadata.py
filter_groupworks.py
filter_scored_matches.py
grobid2json.py
ia_pdf_match.py
import_grobid_metadata.py
ingest_file.py
kafka_grobid.py
kafka_grobid_hbase.py
manifest_converter.py
mrjob.conf
pytest.ini
sandcrawler
tests
title_slug_blacklist.txt
xml2json.py