index
:
sandcrawler
bnewbold-args
bnewbold-backfill
bnewbold-persist-grobid-errors
bnewbold-refactor-loggging
master
trawler
[no description]
about
summary
refs
log
tree
commit
diff
stats
log msg
author
committer
range
path:
root
/
python
Mode
Name
Size
-rw-r--r--
.coveragerc
32
log
stats
plain
-rw-r--r--
.gitignore
29
log
stats
plain
-rw-r--r--
.pylintrc
409
log
stats
plain
-rw-r--r--
Pipfile
651
log
stats
plain
-rw-r--r--
Pipfile.lock
62666
log
stats
plain
-rw-r--r--
TODO
52
log
stats
plain
-rw-r--r--
common.py
2618
log
stats
plain
-rwxr-xr-x
deliver_dumpgrobid_to_s3.py
4092
log
stats
plain
-rwxr-xr-x
deliver_gwb_to_disk.py
7109
log
stats
plain
-rwxr-xr-x
deliver_gwb_to_s3.py
7663
log
stats
plain
-rwxr-xr-x
enrich_scored_matches.py
938
log
stats
plain
-rwxr-xr-x
filter_grobid_metadata.py
4621
log
stats
plain
-rwxr-xr-x
filter_groupworks.py
4233
log
stats
plain
-rwxr-xr-x
filter_scored_matches.py
3432
log
stats
plain
-rwxr-xr-x
grobid2json.py
5595
log
stats
plain
-rwxr-xr-x
ia_pdf_match.py
2889
log
stats
plain
-rwxr-xr-x
import_grobid_metadata.py
2426
log
stats
plain
-rwxr-xr-x
ingest_file.py
10487
log
stats
plain
-rwxr-xr-x
kafka_grobid.py
13599
log
stats
plain
-rwxr-xr-x
manifest_converter.py
1594
log
stats
plain
-rw-r--r--
pytest.ini
171
log
stats
plain
d---------
sandcrawler
111
log
stats
plain
d---------
tests
161
log
stats
plain
l---------
title_slug_blacklist.txt
->
../scalding/src/main/resources/slug-denylist.txt
48
log
stats
plain
-rw-r--r--
xml2json.py
199
log
stats
plain