aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/ia.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 657bee6..672a0b6 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -367,8 +367,9 @@ class CdxApiClient:
int(r.mimetype == best_mimetype),
int(r.mimetype != "warc/revisit"),
r.datetime[:4] == closest_dt[:4],
- int("/" in r.warc_path),
int(r.datetime),
+ # NOTE: previously we demoted SPN records with this warc_path check ahead of datetime
+ int("/" in r.warc_path),
)
rows = sorted(rows, key=_cdx_sort_key)