diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 18:15:33 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 18:15:33 -0700 |
commit | 4c0e3f9c02692cd2cab0657d7fbcd1861a586076 (patch) | |
tree | d3a06b0425d4e3477ee3381b23407e92e6c46f45 /python/sandcrawler/ingest_fileset.py | |
parent | 41fae4c294e2ba43370b4a4193c0f6107201dbf0 (diff) | |
download | sandcrawler-4c0e3f9c02692cd2cab0657d7fbcd1861a586076.tar.gz sandcrawler-4c0e3f9c02692cd2cab0657d7fbcd1861a586076.zip |
update 'XXX' notes from fileset ingest development
Diffstat (limited to 'python/sandcrawler/ingest_fileset.py')
-rw-r--r-- | python/sandcrawler/ingest_fileset.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py index d88fb46..172e1d7 100644 --- a/python/sandcrawler/ingest_fileset.py +++ b/python/sandcrawler/ingest_fileset.py @@ -80,7 +80,7 @@ class IngestFilesetWorker(IngestFileWorker): # check against blocklist for block in self.base_url_blocklist: - # XXX: hack to not skip archive.org content + # NOTE: hack to not skip archive.org content if 'archive.org' in block: continue if block in next_url: |