public inbox for drm-ai-reviews@public-inbox.freedesktop.org
 help / color / mirror / Atom feed
From: Alexandre Courbot <acourbot@nvidia.com>
To: Danilo Krummrich <dakr@kernel.org>,
	Alice Ryhl <aliceryhl@google.com>,
	David Airlie <airlied@gmail.com>, Simona Vetter <simona@ffwll.ch>
Cc: John Hubbard <jhubbard@nvidia.com>,
	Alistair Popple <apopple@nvidia.com>,
	Timur Tabi <ttabi@nvidia.com>,
	Eliot Courtney <ecourtney@nvidia.com>,
	nova-gpu@lists.linux.dev, dri-devel@lists.freedesktop.org,
	linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org,
	Alexandre Courbot <acourbot@nvidia.com>
Subject: [PATCH v7 4/4] gpu: nova-core: gsp: run the unload bundle if Gsp::boot() fails
Date: Fri, 29 May 2026 16:33:44 +0900	[thread overview]
Message-ID: <20260529-nova-unload-v7-4-678f39209e00@nvidia.com> (raw)
In-Reply-To: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com>

If `Gsp::boot` fails, the GSP can be left in a state where boot cannot
be attempted again unless it is reset first.

To avoid this, we want to run the unload bundle whenever `boot` fails to
try and clear the partially-initialized state.

Do this by wrapping the unload bundle into a drop guard up until `boot`
returns. After that, running the unload bundle becomes the
responsibility of the caller.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drivers/gpu/nova-core/gsp/boot.rs      | 67 ++++++++++++++++++++++++++++++++--
 drivers/gpu/nova-core/gsp/hal.rs       | 19 +++++-----
 drivers/gpu/nova-core/gsp/hal/gh100.rs | 15 ++++----
 drivers/gpu/nova-core/gsp/hal/tu102.rs | 31 ++++++++++------
 4 files changed, 101 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
index 8d6fcc35b653..1f83f63ceeb0 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -8,7 +8,8 @@
     io::poll::read_poll_timeout,
     pci,
     prelude::*,
-    time::Delta, //
+    time::Delta,
+    types::ScopeGuard, //
 };
 
 use crate::{
@@ -31,6 +32,66 @@
     },
 };
 
+/// Arguments required to call [`Gsp::unload`](super::Gsp::unload).
+///
+/// Stored as their own type to avoid repeating a long and tedious list in [`BootUnloadGuard`].
+pub(super) struct BootUnloadArgs<'a> {
+    gsp: &'a super::Gsp,
+    dev: &'a device::Device<device::Bound>,
+    bar: &'a Bar0,
+    gsp_falcon: &'a Falcon<Gsp>,
+    sec2_falcon: &'a Falcon<Sec2>,
+    unload_bundle: Option<super::UnloadBundle>,
+}
+
+/// Guard that calls [`Gsp::unload`](super::Gsp::unload) with a
+/// [`UnloadBundle`](super::UnloadBundle) when dropped.
+///
+/// Used to ensure the `UnloadBundle` is run during failure paths.
+pub(super) struct BootUnloadGuard<'a> {
+    guard: ScopeGuard<BootUnloadArgs<'a>, fn(BootUnloadArgs<'a>)>,
+}
+
+impl<'a> BootUnloadGuard<'a> {
+    /// Wraps `unload_bundle` into a guard that executes it when dropped.
+    pub(super) fn new(
+        gsp: &'a super::Gsp,
+        dev: &'a device::Device<device::Bound>,
+        bar: &'a Bar0,
+        gsp_falcon: &'a Falcon<Gsp>,
+        sec2_falcon: &'a Falcon<Sec2>,
+        unload_bundle: Option<super::UnloadBundle>,
+    ) -> Self {
+        Self {
+            guard: ScopeGuard::new_with_data(
+                BootUnloadArgs {
+                    gsp,
+                    dev,
+                    bar,
+                    gsp_falcon,
+                    sec2_falcon,
+                    unload_bundle,
+                },
+                |args| {
+                    let _ = super::Gsp::unload(
+                        args.gsp,
+                        args.dev,
+                        args.bar,
+                        args.gsp_falcon,
+                        args.sec2_falcon,
+                        args.unload_bundle,
+                    );
+                },
+            ),
+        }
+    }
+
+    /// Disarms the guard and returns the [`UnloadBundle`](super::UnloadBundle) it contains.
+    pub(super) fn dismiss(self) -> Option<super::UnloadBundle> {
+        self.guard.dismiss().unload_bundle
+    }
+}
+
 impl super::Gsp {
     /// Attempt to boot the GSP.
     ///
@@ -59,7 +120,7 @@ pub(crate) fn boot(
         let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?;
 
         // Perform the chipset-specific boot sequence, and retrieve the unload bundle.
-        let unload_bundle = hal.boot(
+        let unload_guard = hal.boot(
             &self,
             dev,
             bar,
@@ -99,7 +160,7 @@ pub(crate) fn boot(
             Err(e) => dev_warn!(pdev, "GPU name unavailable: {:?}\n", e),
         }
 
-        Ok(unload_bundle)
+        Ok(unload_guard.dismiss())
     }
 
     /// Shut down the GSP and wait until it is offline.
diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/hal.rs
index 501b852dcb29..88fc3e791114 100644
--- a/drivers/gpu/nova-core/gsp/hal.rs
+++ b/drivers/gpu/nova-core/gsp/hal.rs
@@ -25,6 +25,7 @@
         Chipset, //
     },
     gsp::{
+        boot::BootUnloadGuard,
         Gsp,
         GspFwWprMeta, //
     },
@@ -50,20 +51,20 @@ fn run(
 pub(super) trait GspHal: Send {
     /// Performs the GSP boot process, loading and running the required firmwares as needed.
     ///
-    /// Upon success, returns the [`UnloadBundle`] to be run (if any) in order to properly reset the
-    /// GSP after it has been stopped.
+    /// Upon success, returns a guard that runs the GSP unload sequence if GSP boot does not
+    /// complete.
     #[allow(clippy::too_many_arguments)]
-    fn boot(
+    fn boot<'a>(
         &self,
-        gsp: &Gsp,
-        dev: &device::Device<device::Bound>,
-        bar: &Bar0,
+        gsp: &'a Gsp,
+        dev: &'a device::Device<device::Bound>,
+        bar: &'a Bar0,
         chipset: Chipset,
         fb_layout: &FbLayout,
         wpr_meta: &Coherent<GspFwWprMeta>,
-        gsp_falcon: &Falcon<GspEngine>,
-        sec2_falcon: &Falcon<Sec2>,
-    ) -> Result<Option<crate::gsp::UnloadBundle>>;
+        gsp_falcon: &'a Falcon<GspEngine>,
+        sec2_falcon: &'a Falcon<Sec2>,
+    ) -> Result<BootUnloadGuard<'a>>;
 
     /// Performs HAL-specific post-GSP boot tasks.
     ///
diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs
index 0a8b7f763883..9a4bb22578b3 100644
--- a/drivers/gpu/nova-core/gsp/hal/gh100.rs
+++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs
@@ -18,6 +18,7 @@
     fb::FbLayout,
     gpu::Chipset,
     gsp::{
+        boot::BootUnloadGuard,
         hal::GspHal,
         Gsp,
         GspFwWprMeta, //
@@ -31,17 +32,17 @@ impl GspHal for Gh100 {
     ///
     /// This path uses FSP to establish a chain of trust and boot GSP-FMC. FSP handles
     /// the GSP boot internally - no manual GSP reset/boot is needed.
-    fn boot(
+    fn boot<'a>(
         &self,
-        _gsp: &Gsp,
-        _dev: &device::Device<device::Bound>,
-        _bar: &Bar0,
+        _gsp: &'a Gsp,
+        _dev: &'a device::Device<device::Bound>,
+        _bar: &'a Bar0,
         _chipset: Chipset,
         _fb_layout: &FbLayout,
         _wpr_meta: &Coherent<GspFwWprMeta>,
-        _gsp_falcon: &Falcon<GspEngine>,
-        _sec2_falcon: &Falcon<Sec2>,
-    ) -> Result<Option<crate::gsp::UnloadBundle>> {
+        _gsp_falcon: &'a Falcon<GspEngine>,
+        _sec2_falcon: &'a Falcon<Sec2>,
+    ) -> Result<BootUnloadGuard<'a>> {
         Err(ENOTSUPP)
     }
 }
diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core/gsp/hal/tu102.rs
index c4ab081f25c4..6a27e7e90279 100644
--- a/drivers/gpu/nova-core/gsp/hal/tu102.rs
+++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs
@@ -32,6 +32,7 @@
     },
     gpu::Chipset,
     gsp::{
+        boot::BootUnloadGuard,
         hal::{
             GspHal,
             UnloadBundle, //
@@ -254,21 +255,23 @@ fn run_fwsec_frts(
 struct Tu102;
 
 impl GspHal for Tu102 {
-    fn boot(
+    fn boot<'a>(
         &self,
-        gsp: &Gsp,
-        dev: &device::Device<device::Bound>,
-        bar: &Bar0,
+        gsp: &'a Gsp,
+        dev: &'a device::Device<device::Bound>,
+        bar: &'a Bar0,
         chipset: Chipset,
         fb_layout: &FbLayout,
         wpr_meta: &Coherent<GspFwWprMeta>,
-        gsp_falcon: &Falcon<GspEngine>,
-        sec2_falcon: &Falcon<Sec2>,
-    ) -> Result<Option<crate::gsp::UnloadBundle>> {
+        gsp_falcon: &'a Falcon<GspEngine>,
+        sec2_falcon: &'a Falcon<Sec2>,
+    ) -> Result<BootUnloadGuard<'a>> {
         let bios = Vbios::new(dev, bar)?;
 
-        // Try and prepare the unload bundle. If this fails, the GPU will need to be reset
-        // before the driver can be probed again.
+        // Try and prepare the unload bundle.
+        //
+        // If the unload bundle creation fails, the GPU will need to be reset before the driver can
+        // be probed again.
         let unload_bundle =
             Sec2UnloadBundle::build(dev, bar, chipset, &bios, gsp_falcon, sec2_falcon)
                 .inspect_err(|e| {
@@ -279,8 +282,12 @@ fn boot(
                         "The GPU will need to be reset before the driver can bind again.\n"
                     );
                 })
-                .map(crate::gsp::UnloadBundle)
-                .ok();
+                .ok()
+                .map(crate::gsp::UnloadBundle);
+
+        // Wrap the unload bundle into a drop guard so it is automatically run upon failure.
+        let unload_guard =
+            BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, unload_bundle);
 
         // FWSEC-FRTS is not executed on chips where the FRTS region size is 0 (e.g. GA100).
         if !fb_layout.frts.is_empty() {
@@ -311,7 +318,7 @@ fn boot(
         )?
         .run(dev, bar, sec2_falcon, wpr_meta)?;
 
-        Ok(unload_bundle)
+        Ok(unload_guard)
     }
 
     fn post_boot(

-- 
2.54.0


  parent reply	other threads:[~2026-05-29  7:34 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-29  7:33 [PATCH v7 0/4] gpu: nova-core: run unload sequence upon unbinding Alexandre Courbot
2026-05-29  7:33 ` [PATCH v7 1/4] gpu: nova-core: gsp: move chipset-specific parts of the boot process into a HAL Alexandre Courbot
2026-06-04  6:57   ` Claude review: " Claude Code Review Bot
2026-05-29  7:33 ` [PATCH v7 2/4] gpu: nova-core: send UNLOADING_GUEST_DRIVER GSP command upon unloading Alexandre Courbot
2026-06-04  6:57   ` Claude review: " Claude Code Review Bot
2026-05-29  7:33 ` [PATCH v7 3/4] gpu: nova-core: run Booter Unloader and FWSEC-SB upon unbinding Alexandre Courbot
2026-06-04  6:57   ` Claude review: " Claude Code Review Bot
2026-05-29  7:33 ` Alexandre Courbot [this message]
2026-05-30  1:46   ` [PATCH v7 4/4] gpu: nova-core: gsp: run the unload bundle if Gsp::boot() fails Eliot Courtney
2026-06-04  6:57   ` Claude review: " Claude Code Review Bot
2026-05-29 11:15 ` [PATCH v7 0/4] gpu: nova-core: run unload sequence upon unbinding Danilo Krummrich
2026-05-29 13:06   ` Alexandre Courbot
2026-05-30  5:55 ` Alexandre Courbot
2026-06-04  6:57 ` Claude review: " Claude Code Review Bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260529-nova-unload-v7-4-678f39209e00@nvidia.com \
    --to=acourbot@nvidia.com \
    --cc=airlied@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=apopple@nvidia.com \
    --cc=dakr@kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=ecourtney@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nova-gpu@lists.linux.dev \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=simona@ffwll.ch \
    --cc=ttabi@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox