aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-26 21:35:18 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-26 21:35:18 -0800
commit905c116821dbcf0103323fcf8f0b58d2dfa81ddf (patch)
treea3c568356004bdf801d9e2c92a99224f3535fb33 /python
parent3245ab84fab934d495fbe8ca312ccaa9319eba95 (diff)
downloadsandcrawler-905c116821dbcf0103323fcf8f0b58d2dfa81ddf.tar.gz
sandcrawler-905c116821dbcf0103323fcf8f0b58d2dfa81ddf.zip
update TODO
Diffstat (limited to 'python')
-rw-r--r--python/TODO8
1 files changed, 7 insertions, 1 deletions
diff --git a/python/TODO b/python/TODO
index 6b05646..89cec83 100644
--- a/python/TODO
+++ b/python/TODO
@@ -1 +1,7 @@
-- refactor extractor common code into a shared file
+
+ingest crawler:
+- SPNv2 only
+ - remove most SPNv1/v2 path selection
+- landing page + fulltext hops only (short recursion depth)
+- use wayback client library instead of requests to fetch content
+