aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest_fileset.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 18:15:33 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 18:15:33 -0700
commit4c0e3f9c02692cd2cab0657d7fbcd1861a586076 (patch)
treed3a06b0425d4e3477ee3381b23407e92e6c46f45 /python/sandcrawler/ingest_fileset.py
parent41fae4c294e2ba43370b4a4193c0f6107201dbf0 (diff)
downloadsandcrawler-4c0e3f9c02692cd2cab0657d7fbcd1861a586076.tar.gz
sandcrawler-4c0e3f9c02692cd2cab0657d7fbcd1861a586076.zip
update 'XXX' notes from fileset ingest development
Diffstat (limited to 'python/sandcrawler/ingest_fileset.py')
-rw-r--r--python/sandcrawler/ingest_fileset.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py
index d88fb46..172e1d7 100644
--- a/python/sandcrawler/ingest_fileset.py
+++ b/python/sandcrawler/ingest_fileset.py
@@ -80,7 +80,7 @@ class IngestFilesetWorker(IngestFileWorker):
# check against blocklist
for block in self.base_url_blocklist:
- # XXX: hack to not skip archive.org content
+ # NOTE: hack to not skip archive.org content
if 'archive.org' in block:
continue
if block in next_url: