From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 49FB2105F7AE for ; Fri, 13 Mar 2026 14:57:21 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 7B32B10E027; Fri, 13 Mar 2026 14:57:20 +0000 (UTC) Authentication-Results: gabe.freedesktop.org; dkim=pass (1024-bit key; unprotected) header.d=collabora.com header.i=daniel.almeida@collabora.com header.b="Kt8jVkPf"; dkim-atps=neutral Received: from sender4-pp-f112.zoho.com (sender4-pp-f112.zoho.com [136.143.188.112]) by gabe.freedesktop.org (Postfix) with ESMTPS id 7F3A810E027 for ; Fri, 13 Mar 2026 14:57:19 +0000 (UTC) ARC-Seal: i=1; a=rsa-sha256; t=1773413837; cv=none; d=zohomail.com; s=zohoarc; b=Nby/xzI670tYxlLgthn7pcRoq6hbAjsDb0cpaTetyuZaIuthlpVX7h8JZUcGnKXHJebm4vP58pp/WDtfsz38D+iI827z/gSRYmWXwhVdno3Tb64zcu7JXqgDfyT/CXSmFPUr1c+J9B5+iShKgtHXtDQVRgMR5O5eCc7vifuu/d8= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1773413837; h=Content-Type:Content-Transfer-Encoding:Cc:Cc:Date:Date:From:From:In-Reply-To:MIME-Version:Message-ID:References:Subject:Subject:To:To:Message-Id:Reply-To; bh=Z4WG56oUcuu9VIK21sSTkqgnVdbzzdfg5Q0Ed/8VRBg=; b=Nqv3BpQc/S0jwV9oBeRTJmQh/WfvSisWqye7S4gAw+sTEPw/GK2VW55azUrqzIteZIyMiKh6rdB8Gj3UgSLMXvxW6KZi7nKcyRIAyaVCiAVAb+Q2xv66gcb4AEJBTxmpgqJJhlKJDIYDmHbB53VtkzNE9Gv/23gRZDrkKowiC6Q= ARC-Authentication-Results: i=1; mx.zohomail.com; dkim=pass header.i=collabora.com; spf=pass smtp.mailfrom=daniel.almeida@collabora.com; dmarc=pass header.from= DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; t=1773413837; s=zohomail; d=collabora.com; i=daniel.almeida@collabora.com; h=Content-Type:Mime-Version:Subject:Subject:From:From:In-Reply-To:Date:Date:Cc:Cc:Content-Transfer-Encoding:Message-Id:Message-Id:References:To:To:Reply-To; bh=Z4WG56oUcuu9VIK21sSTkqgnVdbzzdfg5Q0Ed/8VRBg=; b=Kt8jVkPfr3EY18f3fyrIRdHOt/lQ07MvcQGjFFAfZrFT/tLrNeKrfaNWDTcWTNRC LOSRH42qnkMQJ1jMdgQFW7U3sxeZJklYdzyac730FnRD/URl4b2PXVDgPnu/0NNdvzy PFUQ9cwU/5R8Upxp/4bouMCkUrR1Y94pc67106so= Received: by mx.zohomail.com with SMTPS id 1773413833949805.4851047988343; Fri, 13 Mar 2026 07:57:13 -0700 (PDT) Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 16.0 \(3826.700.81\)) Subject: Re: [PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling From: Daniel Almeida In-Reply-To: <20260313091646.16938-5-work@onurozkan.dev> Date: Fri, 13 Mar 2026 11:56:58 -0300 Cc: linux-kernel@vger.kernel.org, dakr@kernel.org, aliceryhl@google.com, airlied@gmail.com, simona@ffwll.ch, dri-devel@lists.freedesktop.org, rust-for-linux@vger.kernel.org, Deborah Brouwer Content-Transfer-Encoding: quoted-printable Message-Id: <1E3CDC4F-A17E-4B31-89F7-FA1B96248CCD@collabora.com> References: <20260313091646.16938-1-work@onurozkan.dev> <20260313091646.16938-5-work@onurozkan.dev> To: =?utf-8?Q?Onur_=C3=96zkan?= X-Mailer: Apple Mail (2.3826.700.81) X-ZohoMailClient: External X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" > On 13 Mar 2026, at 06:16, Onur =C3=96zkan wrote: >=20 > Move Tyr reset logic into a new reset module and add async reset work. >=20 > This adds: > - ResetHandle with internal controller state > - a dedicated ordered reset workqueue > - a pending flag to avoid duplicate queued resets > - run_reset() as the shared synchronous reset helper >=20 > Probe now calls reset::run_reset() before normal init. Driver data now > keeps ResetHandle so reset work is drained before clocks and = regulators > are dropped. >=20 > Tested-by: Deborah Brouwer > Signed-off-by: Onur =C3=96zkan > --- > drivers/gpu/drm/tyr/driver.rs | 40 +++----- > drivers/gpu/drm/tyr/reset.rs | 180 ++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/tyr/tyr.rs | 1 + > 3 files changed, 192 insertions(+), 29 deletions(-) > create mode 100644 drivers/gpu/drm/tyr/reset.rs >=20 > diff --git a/drivers/gpu/drm/tyr/driver.rs = b/drivers/gpu/drm/tyr/driver.rs > index f7951804e4e0..c80238a21ff2 100644 > --- a/drivers/gpu/drm/tyr/driver.rs > +++ b/drivers/gpu/drm/tyr/driver.rs > @@ -6,11 +6,8 @@ > OptionalClk, // > }, > device::{ > - Bound, > - Core, > - Device, // > + Core, // > }, > - devres::Devres, > dma::{ > Device as DmaDevice, > DmaMask, // > @@ -22,10 +19,7 @@ > Registered, > UnregisteredDevice, // > }, > - io::poll, > - new_mutex, > - of, > - platform, > + new_mutex, of, platform, > prelude::*, > regulator, > regulator::Regulator, > @@ -35,17 +29,15 @@ > Arc, > Mutex, // > }, > - time, // > }; >=20 > use crate::{ > file::TyrDrmFileData, > fw::Firmware, > gem::BoData, > - gpu, > gpu::GpuInfo, > mmu::Mmu, > - regs, // > + reset, // > }; >=20 > pub(crate) type IoMem =3D kernel::io::mem::IoMem; > @@ -62,6 +54,11 @@ pub(crate) struct TyrPlatformDriverData { >=20 > #[pin_data] > pub(crate) struct TyrDrmDeviceData { > + // `ResetHandle::drop()` drains queued/running works and this = must happen > + // before clocks/regulators are dropped. So keep this field = before them to > + // ensure the correct drop order. > + pub(crate) reset: reset::ResetHandle, > + > pub(crate) pdev: ARef, >=20 > pub(crate) fw: Arc, > @@ -90,22 +87,6 @@ unsafe impl Send for TyrDrmDeviceData {} > // SAFETY: This will be removed in a future patch. > unsafe impl Sync for TyrDrmDeviceData {} >=20 > -fn issue_soft_reset(dev: &Device, iomem: &Devres) -> = Result { > - // Clear any stale reset-complete IRQ state before issuing a new = soft reset. > - regs::GPU_IRQ_CLEAR.write(dev, iomem, = regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?; > - regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?; > - > - poll::read_poll_timeout( > - || regs::GPU_IRQ_RAWSTAT.read(dev, iomem), > - |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED !=3D = 0, > - time::Delta::from_millis(1), > - time::Delta::from_millis(100), > - ) > - .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?; > - > - Ok(()) > -} > - > kernel::of_device_table!( > OF_TABLE, > MODULE_OF_TABLE, > @@ -138,8 +119,7 @@ fn probe( > let request =3D pdev.io_request_by_index(0).ok_or(ENODEV)?; > let iomem =3D Arc::pin_init(request.iomap_sized::(), = GFP_KERNEL)?; >=20 > - issue_soft_reset(pdev.as_ref(), &iomem)?; > - gpu::l2_power_on(pdev.as_ref(), &iomem)?; > + reset::run_reset(pdev.as_ref(), &iomem)?; >=20 > let gpu_info =3D GpuInfo::new(pdev.as_ref(), &iomem)?; > gpu_info.log(pdev); > @@ -153,6 +133,7 @@ fn probe( >=20 > let uninit_ddev =3D = UnregisteredDevice::::new(pdev.as_ref())?; > let platform: ARef =3D pdev.into(); > + let reset =3D reset::ResetHandle::new(platform.clone(), = iomem.clone())?; >=20 > let mmu =3D Mmu::new(pdev, iomem.as_arc_borrow(), &gpu_info)?; >=20 > @@ -178,6 +159,7 @@ fn probe( > _mali: mali_regulator, > _sram: sram_regulator, > }), > + reset, > gpu_info, > }); > let ddev =3D Registration::new_foreign_owned(uninit_ddev, = pdev.as_ref(), data, 0)?; > diff --git a/drivers/gpu/drm/tyr/reset.rs = b/drivers/gpu/drm/tyr/reset.rs > new file mode 100644 > index 000000000000..29dfae98b0dd > --- /dev/null > +++ b/drivers/gpu/drm/tyr/reset.rs > @@ -0,0 +1,180 @@ > +// SPDX-License-Identifier: GPL-2.0 or MIT > + > +//! Provides asynchronous reset handling for the Tyr DRM driver via > +//! [`ResetHandle`], which runs reset work on a dedicated ordered > +//! workqueue and avoids duplicate pending resets. > + > +use kernel::{ > + device::{ > + Bound, > + Device, // > + }, > + devres::Devres, > + io::poll, > + platform, > + prelude::*, > + sync::{ > + aref::ARef, > + atomic::{ > + Acquire, > + Atomic, > + Relaxed, > + Release, // > + }, > + Arc, > + }, > + time, > + workqueue::{ > + self, > + Work, // > + }, > +}; > + > +use crate::{ > + driver::IoMem, > + gpu, > + regs, // > +}; > + > +/// Manages asynchronous GPU reset handling and ensures only a single = reset > +/// work is pending at a time. > +#[pin_data] > +struct Controller { > + /// Platform device reference needed for reset operations and = logging. > + pdev: ARef, > + /// Mapped register space needed for reset operations. > + iomem: Arc>, > + /// Atomic flag for controlling the scheduling pending state. > + pending: Atomic, > + /// Dedicated ordered workqueue for reset operations. > + wq: workqueue::OrderedQueue, > + /// Work item backing async reset processing. > + #[pin] > + work: Work, > +} > + > +kernel::impl_has_work! { > + impl HasWork for Controller { self.work } > +} > + > +impl workqueue::WorkItem for Controller { > + type Pointer =3D Arc; > + > + fn run(this: Arc) { > + this.reset_work(); > + } > +} > + > +impl Controller { > + /// Creates a [`Controller`] instance. > + fn new(pdev: ARef, iomem: Arc>) = -> Result> { > + let wq =3D workqueue::OrderedQueue::new(c"tyr-reset-wq", 0)?; > + > + Arc::pin_init( > + try_pin_init!(Self { > + pdev, > + iomem, > + pending: Atomic::new(false), > + wq, > + work <- kernel::new_work!("tyr::reset"), > + }), > + GFP_KERNEL, > + ) > + } > + > + /// Processes one scheduled reset request. > + /// > + /// Panthor reference: > + /// - = drivers/gpu/drm/panthor/panthor_device.c::panthor_device_reset_work() > + fn reset_work(self: &Arc) { > + dev_info!(self.pdev.as_ref(), "GPU reset work is = started.\n"); > + > + // SAFETY: `Controller` is part of driver-private data and = only exists > + // while the platform device is bound. > + let pdev =3D unsafe { self.pdev.as_ref().as_bound() }; > + if let Err(e) =3D run_reset(pdev, &self.iomem) { > + dev_err!(self.pdev.as_ref(), "GPU reset failed: {:?}\n", = e); > + } else { > + dev_info!(self.pdev.as_ref(), "GPU reset work is = done.\n"); > + } Can we have more descriptive strings here? A user cares little for implementation details such as =E2=80=9Creset work=E2=80=9D, what they = care about is that the hardware is undergoing a reset. > + > + self.pending.store(false, Release); > + } > +} > + > +/// Reset handle that shuts down pending work gracefully on drop. > +pub(crate) struct ResetHandle(Arc); > + Why is this an Arc? There seems to be a single owner? > +impl ResetHandle { > + /// Creates a [`ResetHandle`] instance. > + pub(crate) fn new(pdev: ARef, iomem: = Arc>) -> Result { > + Ok(Self(Controller::new(pdev, iomem)?)) > + } > + > + /// Schedules reset work. > + #[expect(dead_code)] > + pub(crate) fn schedule(&self) { > + // TODO: Similar to `panthor_device_schedule_reset()` in = Panthor, add a > + // power management check once Tyr supports it. > + > + // Keep only one reset request running or queued. If one is = already pending, > + // we ignore new schedule requests. > + if self.0.pending.cmpxchg(false, true, Relaxed).is_ok() > + && self.0.wq.enqueue(self.0.clone()).is_err() > + { > + self.0.pending.store(false, Release); > + } > + } > + > + /// Returns true if a reset is queued or in progress. > + /// > + /// Note that the state can change immediately after the return. > + #[inline] > + #[expect(dead_code)] > + pub(crate) fn is_pending(&self) -> bool { > + self.0.pending.load(Acquire) > + } > +} > + > +impl Drop for ResetHandle { > + fn drop(&mut self) { > + // Drain queued/running work and block future queueing = attempts for this > + // work item before clocks/regulators are torn down. > + // SAFETY: drop executes in a sleepable context. > + unsafe { self.0.work.disable_sync() }; > + } > +} > + > +/// Issues a soft reset command and waits for reset-complete IRQ = status. > +fn issue_soft_reset(dev: &Device, iomem: &Devres) -> = Result { > + // Clear any stale reset-complete IRQ state before issuing a new = soft reset. > + regs::GPU_IRQ_CLEAR.write(dev, iomem, = regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?; > + regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?; > + > + poll::read_poll_timeout( > + || regs::GPU_IRQ_RAWSTAT.read(dev, iomem), > + |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED !=3D = 0, > + time::Delta::from_millis(1), > + time::Delta::from_millis(100), > + ) > + .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?; > + > + Ok(()) > +} > + > +/// Runs one synchronous GPU reset pass. > +/// > +/// Its visibility is `pub(super)` only so the probe path can run an > +/// initial reset; it is not part of this module's public API. > +/// > +/// On success, the GPU is left in a state suitable for = reinitialization. > +/// > +/// The reset sequence is as follows: > +/// 1. Trigger a GPU soft reset. > +/// 2. Wait for the reset-complete IRQ status. > +/// 3. Power L2 back on. > +pub(super) fn run_reset(dev: &Device, iomem: &Devres) = -> Result { > + issue_soft_reset(dev, iomem)?; > + gpu::l2_power_on(dev, iomem)?; > + Ok(()) > +} > diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs > index 18b0668bb217..d0349bc49f27 100644 > --- a/drivers/gpu/drm/tyr/tyr.rs > +++ b/drivers/gpu/drm/tyr/tyr.rs > @@ -14,6 +14,7 @@ > mod gpu; > mod mmu; > mod regs; > +mod reset; > mod slot; > mod vm; >=20 > --=20 > 2.51.2 >=20