diff options
-rw-r--r-- | python/sandcrawler/ia.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 657bee6..672a0b6 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -367,8 +367,9 @@ class CdxApiClient: int(r.mimetype == best_mimetype), int(r.mimetype != "warc/revisit"), r.datetime[:4] == closest_dt[:4], - int("/" in r.warc_path), int(r.datetime), + # NOTE: previously we demoted SPN records with this warc_path check ahead of datetime + int("/" in r.warc_path), ) rows = sorted(rows, key=_cdx_sort_key) |