diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-02-24 16:35:10 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-02-24 16:35:10 -0800 |
commit | 7f7846b99042897afd5916b9263320c0e2775706 (patch) | |
tree | 2898f1aeab36d62db8b7e8f7f3e61c977ae74744 /python/ingest_tool.py | |
parent | 2394c3349d4fd898c1d88808e06da365d6eaa767 (diff) | |
download | sandcrawler-7f7846b99042897afd5916b9263320c0e2775706.tar.gz sandcrawler-7f7846b99042897afd5916b9263320c0e2775706.zip |
switch from 'raven' to 'sentry-sdk'
Diffstat (limited to 'python/ingest_tool.py')
-rwxr-xr-x | python/ingest_tool.py | 30 |
1 files changed, 18 insertions, 12 deletions
diff --git a/python/ingest_tool.py b/python/ingest_tool.py index ce3a59c..7866630 100755 --- a/python/ingest_tool.py +++ b/python/ingest_tool.py @@ -2,10 +2,11 @@ import argparse import json +import subprocess import sys from http.server import HTTPServer -import raven +import sentry_sdk from sandcrawler import GrobidClient, JsonLinePusher, KafkaCompressSink, KafkaSink from sandcrawler.ingest_file import IngestFileRequestHandler, IngestFileWorker @@ -43,12 +44,6 @@ def run_single_ingest(args): def run_requests(args): - if args.enable_sentry: - try: - git_sha = raven.fetch_git_sha("..") - except Exception: - git_sha = None - sentry_client = raven.Client(release=git_sha) # noqa: # TODO: switch to using JsonLinePusher file_worker = IngestFileWorker( try_spn2=not args.no_spn2, @@ -129,6 +124,11 @@ def run_api(args): def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--enable-sentry", + action="store_true", + help="report exceptions to Sentry", + ) subparsers = parser.add_subparsers() sub_single = subparsers.add_parser("single", help="ingests a single base URL") @@ -163,11 +163,6 @@ def main(): "--no-spn2", action="store_true", help="don't use live web (SPNv2)" ) sub_requests.add_argument( - "--enable-sentry", - action="store_true", - help="report exceptions to Sentry", - ) - sub_requests.add_argument( "--html-quick-mode", action="store_true", help="don't fetch individual sub-resources, just use CDX", @@ -214,6 +209,17 @@ def main(): parser.print_help(file=sys.stderr) sys.exit(-1) + # configure sentry *after* parsing args + if args.enable_sentry: + try: + GIT_REVISION = ( + subprocess.check_output(["git", "describe", "--always"]).strip().decode("utf-8") + ) + except Exception: + print("failed to configure git revision", file=sys.stderr) + GIT_REVISION = None + sentry_sdk.Client(release=GIT_REVISION, environment=args.env, max_breadcrumbs=10) + args.func(args) |