diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-12-03 17:02:27 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-12-03 17:02:27 -0800 |
commit | 255f76dcbe15eaa9f032f26c19a6f28b4690d204 (patch) | |
tree | a826785660b6468189271a5437b4fc8eaede02b1 /python | |
parent | 22b8f10bf94cdd4729095b48f3de932fa62cf8a4 (diff) | |
download | sandcrawler-255f76dcbe15eaa9f032f26c19a6f28b4690d204.tar.gz sandcrawler-255f76dcbe15eaa9f032f26c19a6f28b4690d204.zip |
more sentry tags when extracting
Diffstat (limited to 'python')
-rwxr-xr-x | python/kafka_grobid.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py index 13fbcff..f3aaedf 100755 --- a/python/kafka_grobid.py +++ b/python/kafka_grobid.py @@ -199,7 +199,12 @@ class KafkaGrobidWorker: return None, dict(status='denylist', key=key) # Note: this may not get "cleared" correctly - sentry_client.extra_context(dict(row_key=key)) + sentry_client.extra_context(dict( + row_key=key, + cdx_url=info['file:cdx']['url'], + cdx_dt=info['file:cdx']['dt'], + cdx_warc=info['file:cdx']['warc'], + )) # Do the extraction info, status = self.extract(info) |