diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-12-18 14:54:09 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-12-18 15:00:27 -0800 |
commit | cbe1af25bdfefd5a6bd9da6014440eaf88ee8e26 (patch) | |
tree | 1c5eec4e34afb8c43975850a1d36ff1841bc0fcf /python/sandcrawler/ingest.py | |
parent | 1a45fe3f3ef3122355f49470fbbcd6dc82c959b2 (diff) | |
download | sandcrawler-cbe1af25bdfefd5a6bd9da6014440eaf88ee8e26.tar.gz sandcrawler-cbe1af25bdfefd5a6bd9da6014440eaf88ee8e26.zip |
refactor: use print(..., file=sys.stderr)
Should use logging soon, but this seems more idiomatic in the meanwhile.
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r-- | python/sandcrawler/ingest.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index d3f7043..077469a 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -70,7 +70,7 @@ class IngestFileWorker(SandcrawlerWorker): # extraction didn't work as expected; fetch whatever SPN2 got cdx = self.cdx_client.lookup_latest(url, follow_redirects=True) if not cdx: - sys.stderr.write("{}\n".format(cdx_list)) + print("{}".format(cdx_list), file=sys.stderr) raise SavePageNowError("Failed to find terminal capture from SPNv2") else: return self.spn_client.save_url_now_v1(url) @@ -123,7 +123,7 @@ class IngestFileWorker(SandcrawlerWorker): response['status'] = 'wayback-error' response['error_message'] = str(e) return response - sys.stderr.write("CDX hit: {}\n".format(cdx_dict)) + print("CDX hit: {}".format(cdx_dict), file=sys.stderr) response['cdx'] = cdx_dict # TODO: populate terminal @@ -172,7 +172,7 @@ class IngestFileWorker(SandcrawlerWorker): # do GROBID response['grobid'] = self.grobid_client.process_fulltext(body) - #sys.stderr.write("GROBID status: {}\n".format(response['grobid']['status'])) + #print("GROBID status: {}".format(response['grobid']['status']), file=sys.stderr) # TODO: optionally publish to Kafka here, but continue on failure (but # send a sentry exception?) @@ -185,7 +185,7 @@ class IngestFileWorker(SandcrawlerWorker): response['grobid'].pop('tei_xml') # Ok, now what? - #sys.stderr.write("GOT TO END\n") + #print("GOT TO END", file=sys.stderr) response['status'] = "success" response['hit'] = True return response |