cat_gateway/service/utilities/
catch_panic.rs

1//! Handle catching panics created by endpoints, logging them and properly responding.
2use std::{any::Any, backtrace::Backtrace, cell::RefCell};
3
4use chrono::prelude::*;
5use panic_message::panic_message;
6use poem::{http::StatusCode, middleware::PanicHandler, IntoResponse};
7use poem_openapi::payload::Json;
8use serde_json::json;
9use tracing::debug;
10
11use crate::{
12    service::{
13        common::responses::code_500_internal_server_error::InternalServerError,
14        utilities::health::{get_live_counter, inc_live_counter, set_not_live},
15    },
16    settings::Settings,
17};
18
19/// Customized Panic handler.
20/// Catches all panics, and turns them into 500.
21/// Does not crash the service, BUT will set it to NOT LIVE.
22/// Logs the panic as an error.
23/// This should cause Kubernetes to restart the service.
24#[derive(Clone)]
25pub(crate) struct ServicePanicHandler;
26
27// Customized Panic handler - data storage.
28// Allows us to catch the backtrace so we can include it in logs.
29thread_local! {
30    static BACKTRACE: RefCell<Option<String>> = const { RefCell::new(None) };
31    static LOCATION: RefCell<Option<String>> = const { RefCell::new(None) };
32}
33
34/// Sets a custom panic hook to capture the Backtrace and Panic Location for logging
35/// purposes. This hook gets called BEFORE we catch it.  So the thread local variables
36/// stored here are valid when processing the panic capture.
37pub(crate) fn set_panic_hook() {
38    std::panic::set_hook(Box::new(|panic_info| {
39        // Get the backtrace and format it.
40        let raw_trace = Backtrace::force_capture();
41        let trace = format!("{raw_trace}");
42        BACKTRACE.with(move |b| b.borrow_mut().replace(trace));
43
44        // Get the location and format it.
45        let location = match panic_info.location() {
46            Some(location) => format!("{location}"),
47            None => "Unknown".to_string(),
48        };
49        LOCATION.with(move |l| l.borrow_mut().replace(location));
50    }));
51}
52
53impl PanicHandler for ServicePanicHandler {
54    type Response = poem::Response;
55
56    /// Handle a panic.
57    /// Log the panic and respond with a 500 with appropriate data.
58    fn get_response(&self, err: Box<dyn Any + Send + 'static>) -> Self::Response {
59        // Increment the counter used for liveness checks.
60        inc_live_counter();
61
62        let current_count = get_live_counter();
63        debug!(
64            live_counter = current_count,
65            "Handling service panic response"
66        );
67
68        // If current count is above the threshold, then flag the system as NOT live.
69        if current_count > Settings::service_live_counter_threshold() {
70            set_not_live();
71        }
72
73        let server_err = InternalServerError::new(None);
74
75        // Get the unique identifier for this panic, so we can find it in the logs.
76        let panic_identifier = server_err.id().to_string();
77
78        // Get the message from the panic as best we can.
79        let err_msg = panic_message(&err);
80
81        // This is the location of the panic.
82        let location = match LOCATION.with(|l| l.borrow_mut().take()) {
83            Some(location) => location,
84            None => "Unknown".to_string(),
85        };
86
87        // This is the backtrace of the panic.
88        let backtrace = match BACKTRACE.with(|b| b.borrow_mut().take()) {
89            Some(backtrace) => backtrace,
90            None => "Unknown".to_string(),
91        };
92
93        // For some reason logging doesn't work here.
94        // So manually form a log message and send to stdout.
95        let time = chrono::Utc::now().to_rfc3339_opts(SecondsFormat::Nanos, true);
96
97        let json_log = json!({
98            "backtrace": backtrace,
99            "location": location,
100            "message": err_msg,
101            "id": panic_identifier,
102            "level": "PANIC",
103            "timestamp": time
104        })
105        .to_string();
106
107        println!("{json_log}");
108
109        let mut resp = Json(server_err).into_response();
110        resp.set_status(StatusCode::INTERNAL_SERVER_ERROR);
111        resp
112    }
113}