aboutsummaryrefslogtreecommitdiffstats
path: root/extraction/TODO
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-04-04 11:47:41 -0700
committerBryan Newbold <bnewbold@archive.org>2018-04-04 11:47:41 -0700
commite8eb959fbdd5d13cd53421ddf2487811d049c4e8 (patch)
treefbc8c052aac7d4eeb83da0a2d181fb585d2e4a8b /extraction/TODO
parent7056c83d4a6bc107155eedb1b39f38dc6d290a39 (diff)
downloadsandcrawler-e8eb959fbdd5d13cd53421ddf2487811d049c4e8.tar.gz
sandcrawler-e8eb959fbdd5d13cd53421ddf2487811d049c4e8.zip
more WIP on extractor
Diffstat (limited to 'extraction/TODO')
-rw-r--r--extraction/TODO2
1 files changed, 2 insertions, 0 deletions
diff --git a/extraction/TODO b/extraction/TODO
new file mode 100644
index 0000000..ed10834
--- /dev/null
+++ b/extraction/TODO
@@ -0,0 +1,2 @@
+- abstract CDX line reading and HBase stuff out into a common library
+- actual GROBID_SERVER="http://wbgrp-svc096.us.archive.org:8070"