From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 3B284FCA17B for ; Mon, 9 Mar 2026 19:40:46 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 7E0D710E5BE; Mon, 9 Mar 2026 19:40:45 +0000 (UTC) Authentication-Results: gabe.freedesktop.org; dkim=pass (2048-bit key; unprotected) header.d=kernel.org header.i=@kernel.org header.b="rd3ms1Qk"; dkim-atps=neutral Received: from tor.source.kernel.org (tor.source.kernel.org [172.105.4.254]) by gabe.freedesktop.org (Postfix) with ESMTPS id 0252110E5BE; Mon, 9 Mar 2026 19:40:43 +0000 (UTC) Received: from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58]) by tor.source.kernel.org (Postfix) with ESMTP id 40523600AE; Mon, 9 Mar 2026 19:40:43 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9822BC4CEF7; Mon, 9 Mar 2026 19:40:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1773085242; bh=ykEDyFWqy0dWXj89fHrjAyafwdENidwWPunnvZCcBsE=; h=Date:To:From:Subject:Cc:References:In-Reply-To:From; b=rd3ms1QkWISKBK/kda8ZvCMUc8dUM6BDjqWnDSwTxnHEVTaJxsnj62YNmUcrNsZUq rHhBNMVYVbSmOttFJlipiFCze2aX20rShfa5uF+O9peJ1GHMIKJsLMHuKfTPXRk4Eo 3l4lrQ0ld6HuU8dspVmvmzIGmm14LjkHh5gdY8srnZzNwA+MUXLAlBgTv+IjKJLRo0 8efqtIGgwtJCi2MmWvchNg7hYj1Xao8rHPKiawOJw4WOtKMMptErwMy2V5xq9nfzk6 Taf1FbV6N2ZnYNldwgYDZaIcdg6GduReD+VDwF+cF16DdBWOT/+q8FHnmM5VfpZvMQ qvNg5IvGuvBdg== Mime-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=UTF-8 Date: Mon, 09 Mar 2026 20:40:36 +0100 Message-Id: To: "Tim Kovalenko via B4 Relay" From: "Danilo Krummrich" Subject: Re: [PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation Cc: , "Alexandre Courbot" , "Alice Ryhl" , "David Airlie" , "Simona Vetter" , "Miguel Ojeda" , "Gary Guo" , =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= , "Benno Lossin" , "Andreas Hindborg" , "Trevor Gross" , "Boqun Feng" , "Nathan Chancellor" , "Nicolas Schier" , "Abdiel Janulgue" , "Daniel Almeida" , "Robin Murphy" , , , , , , References: <20260309-drm-rust-next-v4-0-4ef485b19a4c@proton.me> <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me> In-Reply-To: <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me> X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" On Mon Mar 9, 2026 at 5:34 PM CET, Tim Kovalenko via B4 Relay wrote: > From: Tim Kovalenko > > The `Cmdq::new` function was allocating a `PteArray` struct on the stack > and was causing a stack overflow with 8216 bytes. > > Modify the `PteArray` to calculate and write the Page Table Entries > directly into the coherent DMA buffer one-by-one. This reduces the stack > usage quite a lot. > Reported-by: Gary Guo Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/to= pic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549 Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp") > Signed-off-by: Tim Kovalenko A few nits below, but I can fix them up [1] on apply. > + for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).= enumerate() { > + let pte_value =3D start_addr > + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) > + .ok_or(EOVERFLOW)?; This should use PteArray::entry(). It would also be nice to get rid of the unsafe {} and use dma_write!() inst= ead, but this can be a follow-up patch. > + > + chunk.copy_from_slice(&pte_value.to_ne_bytes()); > + } > + > Ok(obj) > } > } > diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gs= p/cmdq.rs > index 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee990= 38ad10a16e1e32d 100644 > --- a/drivers/gpu/nova-core/gsp/cmdq.rs > +++ b/drivers/gpu/nova-core/gsp/cmdq.rs > @@ -202,7 +202,20 @@ fn new(dev: &device::Device) -> Resul= t { > =20 > let gsp_mem =3D > CoherentAllocation::::alloc_coherent(dev, 1, GFP_KER= NEL | __GFP_ZERO)?; > - dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle(= ))?); > + > + const NUM_PTES: usize =3D GSP_PAGE_SIZE / size_of::(); We can avoid duplicating this by making it a constant of GspMem. > + let start =3D gsp_mem.dma_handle(); > + // One by one GSP Page write to the memory to avoid stack overfl= ow when allocating > + // the whole array at once. > + for i in 0..NUM_PTES { > + dma_write!( > + gsp_mem, > + [0]?.ptes.0[i], > + PteArray::::entry(start, i)? > + ); > + } -- [1] -- diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 20170e483e04..b5ea14b7dad7 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -48,6 +48,7 @@ unsafe impl AsBytes for PteArra= y {} =20 impl PteArray { /// Returns the page table entry for `index`, for a mapping starting a= t `start` DmaAddress. + // TODO: Replace with `IoView` projection once available. fn entry(start: DmaAddress, index: usize) -> Result { start .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) @@ -90,12 +91,9 @@ fn new(dev: &device::Device) -> Result { .as_slice_mut(size_of::(), NUM_PAGES * size_of::= ())? }; =20 - // This is a one by one GSP Page write to the memory - // to avoid stack overflow when allocating the whole array at once= . + // Write values one by one to avoid an on-stack instance of `PteAr= ray`. for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).en= umerate() { - let pte_value =3D start_addr - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; + let pte_value =3D PteArray::::entry(start_addr, i)?= ; =20 chunk.copy_from_slice(&pte_value.to_ne_bytes()); } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/= cmdq.rs index 9107a1473797..aa42d180f0d5 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -159,7 +159,7 @@ struct Msgq { #[repr(C)] struct GspMem { /// Self-mapping page table entries. - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them= . It also contains the /// write and read pointers that the CPU updates. /// @@ -172,6 +172,10 @@ struct GspMem { gspq: Msgq, } =20 +impl GspMem { + const PTE_ARRAY_SIZE: usize =3D GSP_PAGE_SIZE / size_of::(); +} + // SAFETY: These structs don't meet the no-padding requirements of AsBytes= but // that is not a problem because they are not used outside the kernel. unsafe impl AsBytes for GspMem {} @@ -202,16 +206,14 @@ fn new(dev: &device::Device) -> Result= { let gsp_mem =3D CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNE= L | __GFP_ZERO)?; =20 - const NUM_PTES: usize =3D GSP_PAGE_SIZE / size_of::(); - let start =3D gsp_mem.dma_handle(); // One by one GSP Page write to the memory to avoid stack overflow= when allocating // the whole array at once. - for i in 0..NUM_PTES { + for i in 0..GspMem::PTE_ARRAY_SIZE { dma_write!( gsp_mem, [0]?.ptes.0[i], - PteArray::::entry(start, i)? + PteArray::<{ GspMem::PTE_ARRAY_SIZE }>::entry(start, i)? ); } =20