diff options
author | bnewbold <bnewbold@archive.org> | 2020-02-14 17:55:39 +0000 |
---|---|---|
committer | bnewbold <bnewbold@archive.org> | 2020-02-14 17:55:39 +0000 |
commit | c6d7107dc6d8e0925d8a314a64692751725faf78 (patch) | |
tree | 533f45d7b70eceb5d5d19d37e6a71adf0d5f124b /python/fatcat_tools/harvest/doi_registrars.py | |
parent | 07fabec32aada55a75c064e5c1e01a46da30d854 (diff) | |
parent | a1bbf612cef473af0410f9985d9e191a4000a0f5 (diff) | |
download | fatcat-c6d7107dc6d8e0925d8a314a64692751725faf78.tar.gz fatcat-c6d7107dc6d8e0925d8a314a64692751725faf78.zip |
Merge branch 'martin-harvest-print' into 'master'
harvest: log state on startup and use stderr for diagnostics
See merge request webgroup/fatcat!27
Diffstat (limited to 'python/fatcat_tools/harvest/doi_registrars.py')
-rw-r--r-- | python/fatcat_tools/harvest/doi_registrars.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 33f44600..d2d71d3c 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -70,8 +70,8 @@ class HarvestCrossrefWorker: def fail_fast(err, msg): if err is not None: - print("Kafka producer delivery error: {}".format(err)) - print("Bailing out...") + print("Kafka producer delivery error: {}".format(err), file=sys.stderr) + print("Bailing out...", file=sys.stderr) # TODO: should it be sys.exit(-1)? raise KafkaException(err) @@ -117,7 +117,7 @@ class HarvestCrossrefWorker: if http_resp.status_code == 503: # crude backoff; now redundant with session exponential # backoff, but allows for longer backoff/downtime on remote end - print("got HTTP {}, pausing for 30 seconds".format(http_resp.status_code)) + print("got HTTP {}, pausing for 30 seconds".format(http_resp.status_code), file=sys.stderr) # keep kafka producer connection alive self.producer.poll(0) time.sleep(30.0) @@ -131,7 +131,7 @@ class HarvestCrossrefWorker: items = self.extract_items(resp) count += len(items) print("... got {} ({} of {}), HTTP fetch took {}".format(len(items), count, - self.extract_total(resp), http_resp.elapsed)) + self.extract_total(resp), http_resp.elapsed), file=sys.stderr) #print(json.dumps(resp)) for work in items: self.producer.produce( @@ -156,7 +156,7 @@ class HarvestCrossrefWorker: while True: current = self.state.next(continuous) if current: - print("Fetching DOIs updated on {} (UTC)".format(current)) + print("Fetching DOIs updated on {} (UTC)".format(current), file=sys.stderr) self.fetch_date(current) self.state.complete(current, kafka_topic=self.state_topic, @@ -164,11 +164,11 @@ class HarvestCrossrefWorker: continue if continuous: - print("Sleeping {} seconds...".format(self.loop_sleep)) + print("Sleeping {} seconds...".format(self.loop_sleep), file=sys.stderr) time.sleep(self.loop_sleep) else: break - print("{} DOI ingest caught up".format(self.name)) + print("{} DOI ingest caught up".format(self.name), file=sys.stderr) class HarvestDataciteWorker(HarvestCrossrefWorker): |