diff options
Diffstat (limited to 'python/ingest_tool.py')
-rwxr-xr-x | python/ingest_tool.py | 30 |
1 files changed, 18 insertions, 12 deletions
diff --git a/python/ingest_tool.py b/python/ingest_tool.py index ce3a59c..7866630 100755 --- a/python/ingest_tool.py +++ b/python/ingest_tool.py @@ -2,10 +2,11 @@ import argparse import json +import subprocess import sys from http.server import HTTPServer -import raven +import sentry_sdk from sandcrawler import GrobidClient, JsonLinePusher, KafkaCompressSink, KafkaSink from sandcrawler.ingest_file import IngestFileRequestHandler, IngestFileWorker @@ -43,12 +44,6 @@ def run_single_ingest(args): def run_requests(args): - if args.enable_sentry: - try: - git_sha = raven.fetch_git_sha("..") - except Exception: - git_sha = None - sentry_client = raven.Client(release=git_sha) # noqa: # TODO: switch to using JsonLinePusher file_worker = IngestFileWorker( try_spn2=not args.no_spn2, @@ -129,6 +124,11 @@ def run_api(args): def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--enable-sentry", + action="store_true", + help="report exceptions to Sentry", + ) subparsers = parser.add_subparsers() sub_single = subparsers.add_parser("single", help="ingests a single base URL") @@ -163,11 +163,6 @@ def main(): "--no-spn2", action="store_true", help="don't use live web (SPNv2)" ) sub_requests.add_argument( - "--enable-sentry", - action="store_true", - help="report exceptions to Sentry", - ) - sub_requests.add_argument( "--html-quick-mode", action="store_true", help="don't fetch individual sub-resources, just use CDX", @@ -214,6 +209,17 @@ def main(): parser.print_help(file=sys.stderr) sys.exit(-1) + # configure sentry *after* parsing args + if args.enable_sentry: + try: + GIT_REVISION = ( + subprocess.check_output(["git", "describe", "--always"]).strip().decode("utf-8") + ) + except Exception: + print("failed to configure git revision", file=sys.stderr) + GIT_REVISION = None + sentry_sdk.Client(release=GIT_REVISION, environment=args.env, max_breadcrumbs=10) + args.func(args) |