From fd2d3f95fcdb10084d8c9e52f3696c3095e9dde7 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 3 Nov 2020 15:44:47 -0800 Subject: ingest: add more IA domains --- python/sandcrawler/ingest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 6d8b162..35b37fc 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -76,8 +76,10 @@ class IngestFileWorker(SandcrawlerWorker): # temporary, until we implement specific fetch and 'petabox' output "://archive.org/", + "://www.archive.org/", "://web.archive.org/web/", "://openlibrary.org/", + "://www.openlibrary.org/", "://fatcat.wiki/", # Domain squats -- cgit v1.2.3