aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ia.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/ia.py')
-rw-r--r--python/sandcrawler/ia.py22
1 files changed, 12 insertions, 10 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index a2ca346..ca1182f 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -3,29 +3,31 @@
# in `wayback` library. Means we can't run pylint.
# pylint: skip-file
+import datetime
+import gzip
+import http.client
+import json
import os
import sys
import time
-import gzip
-import json
-import requests
-import datetime
import urllib.parse
-import urllib3.exceptions
-from typing import Tuple
from collections import namedtuple
+from typing import Tuple
-import http.client
+import requests
+import urllib3.exceptions
# not sure this will really work. Should go before wayback imports.
http.client._MAXHEADERS = 1000 # type: ignore
-import wayback.exception
from http.client import IncompleteRead
-from wayback.resourcestore import ResourceStore
+
+import wayback.exception
from gwb.loader import CDXLoaderFactory3
+from wayback.resourcestore import ResourceStore
+
+from .misc import b32_hex, clean_url, gen_file_metadata, requests_retry_session
-from .misc import b32_hex, requests_retry_session, gen_file_metadata, clean_url
class SandcrawlerBackoffError(Exception):
"""