aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ia.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-19 16:30:20 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-19 16:30:20 -0700
commite5c7645010ed1315a43f9cc0cd20ca192b5e8008 (patch)
treecf50c444d70010154c97db5714f29d0db4b7657a /python/sandcrawler/ia.py
parentb388be5aff1b074b82a5382c5267a8ab4c9e615b (diff)
downloadsandcrawler-e5c7645010ed1315a43f9cc0cd20ca192b5e8008.tar.gz
sandcrawler-e5c7645010ed1315a43f9cc0cd20ca192b5e8008.zip
SPN: more verbose status logging
Diffstat (limited to 'python/sandcrawler/ia.py')
-rw-r--r--python/sandcrawler/ia.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 30ebc77..2d0d068 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -4,6 +4,7 @@
# pylint: skip-file
import os, sys, time
+import json
import requests
import datetime
from collections import namedtuple
@@ -861,6 +862,7 @@ class SavePageNowClient:
# if there was a recent crawl of same URL, fetch the status of that
# crawl to get correct datetime
if final_json.get('original_job_id'):
+ print(f" SPN recent capture: {job_id} -> {final_json['original_job_id']}", file=sys.stderr)
resp = self.v2_session.get("{}/status/{}".format(self.v2endpoint, final_json['original_job_id']))
try:
resp.raise_for_status()
@@ -871,6 +873,8 @@ class SavePageNowClient:
#print(final_json, file=sys.stderr)
if final_json['status'] == "success":
+ if final_json.get('original_url').startswith('/'):
+ print(f" truncateded URL in JSON: {request_url} {json.dumps(final_json)}", file=sys.stderr)
return SavePageNowResult(
True,
"success",