aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-12-03 17:02:27 -0800
committerBryan Newbold <bnewbold@archive.org>2018-12-03 17:02:27 -0800
commit255f76dcbe15eaa9f032f26c19a6f28b4690d204 (patch)
treea826785660b6468189271a5437b4fc8eaede02b1
parent22b8f10bf94cdd4729095b48f3de932fa62cf8a4 (diff)
downloadsandcrawler-255f76dcbe15eaa9f032f26c19a6f28b4690d204.tar.gz
sandcrawler-255f76dcbe15eaa9f032f26c19a6f28b4690d204.zip
more sentry tags when extracting
-rwxr-xr-xpython/kafka_grobid.py7
1 files changed, 6 insertions, 1 deletions
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py
index 13fbcff..f3aaedf 100755
--- a/python/kafka_grobid.py
+++ b/python/kafka_grobid.py
@@ -199,7 +199,12 @@ class KafkaGrobidWorker:
return None, dict(status='denylist', key=key)
# Note: this may not get "cleared" correctly
- sentry_client.extra_context(dict(row_key=key))
+ sentry_client.extra_context(dict(
+ row_key=key,
+ cdx_url=info['file:cdx']['url'],
+ cdx_dt=info['file:cdx']['dt'],
+ cdx_warc=info['file:cdx']['warc'],
+ ))
# Do the extraction
info, status = self.extract(info)