From 4c0e3f9c02692cd2cab0657d7fbcd1861a586076 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 26 Oct 2021 18:15:33 -0700 Subject: update 'XXX' notes from fileset ingest development --- python/sandcrawler/ingest_fileset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python/sandcrawler/ingest_fileset.py') diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py index d88fb46..172e1d7 100644 --- a/python/sandcrawler/ingest_fileset.py +++ b/python/sandcrawler/ingest_fileset.py @@ -80,7 +80,7 @@ class IngestFilesetWorker(IngestFileWorker): # check against blocklist for block in self.base_url_blocklist: - # XXX: hack to not skip archive.org content + # NOTE: hack to not skip archive.org content if 'archive.org' in block: continue if block in next_url: -- cgit v1.2.3