aboutsummaryrefslogtreecommitdiffstats
path: root/extraction/TODO
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-04-04 12:06:38 -0700
committerBryan Newbold <bnewbold@archive.org>2018-04-04 12:06:38 -0700
commit1dad0d9e54bfae93eebea47f8a3cb291cdd645c5 (patch)
tree97a8c9bcaf93734e2dbd8f431d37213520b55fbd /extraction/TODO
parent427dd875958c8a6d2d791d55f9dda300ebdc853b (diff)
downloadsandcrawler-1dad0d9e54bfae93eebea47f8a3cb291cdd645c5.tar.gz
sandcrawler-1dad0d9e54bfae93eebea47f8a3cb291cdd645c5.zip
extraction -> mapreduce
Diffstat (limited to 'extraction/TODO')
-rw-r--r--extraction/TODO6
1 files changed, 0 insertions, 6 deletions
diff --git a/extraction/TODO b/extraction/TODO
deleted file mode 100644
index 3459752..0000000
--- a/extraction/TODO
+++ /dev/null
@@ -1,6 +0,0 @@
-- better test coverage (actually check coverage!)
-- use pre-mapper command to filter down, eg, by status type?
-- automation/docs for bundling virtualenv along
-- think about speedups
-- abstract CDX line reading and HBase stuff out into a common library
-- actual GROBID_SERVER="http://wbgrp-svc096.us.archive.org:8070"