aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-18 14:54:09 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-18 15:00:27 -0800
commitcbe1af25bdfefd5a6bd9da6014440eaf88ee8e26 (patch)
tree1c5eec4e34afb8c43975850a1d36ff1841bc0fcf /python/sandcrawler/ingest.py
parent1a45fe3f3ef3122355f49470fbbcd6dc82c959b2 (diff)
downloadsandcrawler-cbe1af25bdfefd5a6bd9da6014440eaf88ee8e26.tar.gz
sandcrawler-cbe1af25bdfefd5a6bd9da6014440eaf88ee8e26.zip
refactor: use print(..., file=sys.stderr)
Should use logging soon, but this seems more idiomatic in the meanwhile.
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r--python/sandcrawler/ingest.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index d3f7043..077469a 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -70,7 +70,7 @@ class IngestFileWorker(SandcrawlerWorker):
# extraction didn't work as expected; fetch whatever SPN2 got
cdx = self.cdx_client.lookup_latest(url, follow_redirects=True)
if not cdx:
- sys.stderr.write("{}\n".format(cdx_list))
+ print("{}".format(cdx_list), file=sys.stderr)
raise SavePageNowError("Failed to find terminal capture from SPNv2")
else:
return self.spn_client.save_url_now_v1(url)
@@ -123,7 +123,7 @@ class IngestFileWorker(SandcrawlerWorker):
response['status'] = 'wayback-error'
response['error_message'] = str(e)
return response
- sys.stderr.write("CDX hit: {}\n".format(cdx_dict))
+ print("CDX hit: {}".format(cdx_dict), file=sys.stderr)
response['cdx'] = cdx_dict
# TODO: populate terminal
@@ -172,7 +172,7 @@ class IngestFileWorker(SandcrawlerWorker):
# do GROBID
response['grobid'] = self.grobid_client.process_fulltext(body)
- #sys.stderr.write("GROBID status: {}\n".format(response['grobid']['status']))
+ #print("GROBID status: {}".format(response['grobid']['status']), file=sys.stderr)
# TODO: optionally publish to Kafka here, but continue on failure (but
# send a sentry exception?)
@@ -185,7 +185,7 @@ class IngestFileWorker(SandcrawlerWorker):
response['grobid'].pop('tei_xml')
# Ok, now what?
- #sys.stderr.write("GOT TO END\n")
+ #print("GOT TO END", file=sys.stderr)
response['status'] = "success"
response['hit'] = True
return response