cat_gateway/service/utilities/
catch_panic.rs

1//! Handle catching panics created by endpoints, logging them and properly responding.
2use std::{any::Any, backtrace::Backtrace, cell::RefCell};
3
4use chrono::prelude::*;
5use panic_message::panic_message;
6use poem::{http::StatusCode, middleware::PanicHandler, IntoResponse};
7use poem_openapi::payload::Json;
8use serde_json::json;
9
10use crate::{
11    service::{
12        common::responses::code_500_internal_server_error::InternalServerError,
13        utilities::health::{get_live_counter, inc_live_counter, set_not_live},
14    },
15    settings::Settings,
16};
17
18/// Customized Panic handler.
19/// Catches all panics, and turns them into 500.
20/// Does not crash the service, BUT will set it to NOT LIVE.
21/// Logs the panic as an error.
22/// This should cause Kubernetes to restart the service.
23#[derive(Clone)]
24pub(crate) struct ServicePanicHandler;
25
26// Customized Panic handler - data storage.
27// Allows us to catch the backtrace so we can include it in logs.
28thread_local! {
29    static BACKTRACE: RefCell<Option<String>> = const { RefCell::new(None) };
30    static LOCATION: RefCell<Option<String>> = const { RefCell::new(None) };
31}
32
33/// Sets a custom panic hook to capture the Backtrace and Panic Location for logging
34/// purposes. This hook gets called BEFORE we catch it.  So the thread local variables
35/// stored here are valid when processing the panic capture.
36pub(crate) fn set_panic_hook() {
37    std::panic::set_hook(Box::new(|panic_info| {
38        // Get the backtrace and format it.
39        let raw_trace = Backtrace::force_capture();
40        let trace = format!("{raw_trace}");
41        BACKTRACE.with(move |b| b.borrow_mut().replace(trace));
42
43        // Get the location and format it.
44        let location = match panic_info.location() {
45            Some(location) => format!("{location}"),
46            None => "Unknown".to_string(),
47        };
48        LOCATION.with(move |l| l.borrow_mut().replace(location));
49    }));
50}
51
52impl PanicHandler for ServicePanicHandler {
53    type Response = poem::Response;
54
55    /// Handle a panic.
56    /// Log the panic and respond with a 500 with appropriate data.
57    fn get_response(&self, err: Box<dyn Any + Send + 'static>) -> Self::Response {
58        // Increment the counter used for liveness checks.
59        inc_live_counter();
60
61        // If current count is above the threshold, then flag the system as NOT live.
62        if get_live_counter() > Settings::service_live_counter_threshold() {
63            set_not_live();
64        }
65
66        let server_err = InternalServerError::new(None);
67
68        // Get the unique identifier for this panic, so we can find it in the logs.
69        let panic_identifier = server_err.id().to_string();
70
71        // Get the message from the panic as best we can.
72        let err_msg = panic_message(&err);
73
74        // This is the location of the panic.
75        let location = match LOCATION.with(|l| l.borrow_mut().take()) {
76            Some(location) => location,
77            None => "Unknown".to_string(),
78        };
79
80        // This is the backtrace of the panic.
81        let backtrace = match BACKTRACE.with(|b| b.borrow_mut().take()) {
82            Some(backtrace) => backtrace,
83            None => "Unknown".to_string(),
84        };
85
86        // For some reason logging doesn't work here.
87        // So manually form a log message and send to stdout.
88        let time = chrono::Utc::now().to_rfc3339_opts(SecondsFormat::Nanos, true);
89
90        let json_log = json!({
91            "backtrace": backtrace,
92            "location": location,
93            "message": err_msg,
94            "id": panic_identifier,
95            "level": "PANIC",
96            "timestamp": time
97        })
98        .to_string();
99
100        println!("{json_log}");
101
102        let mut resp = Json(server_err).into_response();
103        resp.set_status(StatusCode::INTERNAL_SERVER_ERROR);
104        resp
105    }
106}