SPN: more verbose status logging

author: Bryan Newbold <bnewbold@archive.org> 2020-10-19 16:30:20 -0700
committer: Bryan Newbold <bnewbold@archive.org> 2020-10-19 16:30:20 -0700
commit: e5c7645010ed1315a43f9cc0cd20ca192b5e8008 (patch)
tree: cf50c444d70010154c97db5714f29d0db4b7657a
parent: b388be5aff1b074b82a5382c5267a8ab4c9e615b (diff)
download: sandcrawler-e5c7645010ed1315a43f9cc0cd20ca192b5e8008.tar.gz
sandcrawler-e5c7645010ed1315a43f9cc0cd20ca192b5e8008.zip
1 files changed, 4 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 30ebc77..2d0d068 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -4,6 +4,7 @@
 # pylint: skip-file
 
 import os, sys, time
+import json
 import requests
 import datetime
 from collections import namedtuple
@@ -861,6 +862,7 @@ class SavePageNowClient:
         # if there was a recent crawl of same URL, fetch the status of that
         # crawl to get correct datetime
         if final_json.get('original_job_id'):
+            print(f"  SPN recent capture: {job_id} -> {final_json['original_job_id']}", file=sys.stderr)
             resp = self.v2_session.get("{}/status/{}".format(self.v2endpoint, final_json['original_job_id']))
             try:
                 resp.raise_for_status()
@@ -871,6 +873,8 @@ class SavePageNowClient:
         #print(final_json, file=sys.stderr)
 
         if final_json['status'] == "success":
+            if final_json.get('original_url').startswith('/'):
+                print(f"  truncateded URL in JSON: {request_url} {json.dumps(final_json)}", file=sys.stderr)
             return SavePageNowResult(
                 True,
                 "success",
author	Bryan Newbold <bnewbold@archive.org>	2020-10-19 16:30:20 -0700
committer	Bryan Newbold <bnewbold@archive.org>	2020-10-19 16:30:20 -0700
commit	e5c7645010ed1315a43f9cc0cd20ca192b5e8008 (patch)
tree	cf50c444d70010154c97db5714f29d0db4b7657a
parent	b388be5aff1b074b82a5382c5267a8ab4c9e615b (diff)
download	sandcrawler-e5c7645010ed1315a43f9cc0cd20ca192b5e8008.tar.gz sandcrawler-e5c7645010ed1315a43f9cc0cd20ca192b5e8008.zip