public inbox for drm-ai-reviews@public-inbox.freedesktop.org
 help / color / mirror / Atom feed
From: Adrián Larumbe <adrian.larumbe@collabora.com>
To: Boris Brezillon <boris.brezillon@collabora.com>,
	Rob Herring <robh@kernel.org>,
	Steven Price <steven.price@arm.com>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	David Airlie <airlied@gmail.com>, Simona Vetter <simona@ffwll.ch>,
	Faith Ekstrand <faith.ekstrand@collabora.com>,
	"Marty E. Plummer" <hanetzer@startmail.com>,
	Tomeu Vizoso <tomeu@tomeuvizoso.net>,
	Eric Anholt <eric@anholt.net>,
	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>,
	Robin Murphy <robin.murphy@arm.com>
Cc: dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org,
	Collabora Kernel Team <kernel@collabora.com>,
	Adrián Larumbe <adrian.larumbe@collabora.com>,
	Neil Armstrong <neil.armstrong@linaro.org>,
	Claude <noreply@anthropic.com>
Subject: [PATCH v2 5/7] drm/panfrost: Make reset sequence deal with an active HWPerf session
Date: Thu, 04 Jun 2026 18:35:24 +0100	[thread overview]
Message-ID: <20260604-claude-fixes-v2-5-57c6bd4c1655@collabora.com> (raw)
In-Reply-To: <20260604-claude-fixes-v2-0-57c6bd4c1655@collabora.com>

Right now, if there's a HW reset and an HWPerf session is active,
panfrost_mmu_reset() will reset the AS count for every single open file's
mmu struct back to 0, and also invalidate their AS numbers. Then, when
disabling hwperf, panfrost_mmu_as_put() will WARN that mmu->as_count is
less than zero.

Fix this by introducing a perfcnt HW reset path.

The choice was made to render perfcnt unusable after reset, so that a
user might have to reprogram it with a full disable/enable sequence
before requesting more perfcnt dumps.

Reported-by: Claude <noreply@anthropic.com>
Closes: https://gitlab.freedesktop.org/panfrost/linux/-/work_items/88
Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
Fixes: 7786fd108777 ("drm/panfrost: Expose performance counters through unstable ioctls")
---
 drivers/gpu/drm/panfrost/panfrost_device.c  |  1 +
 drivers/gpu/drm/panfrost/panfrost_perfcnt.c | 46 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/panfrost/panfrost_perfcnt.h |  1 +
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index 87b372c9e675..2805d50c1b9b 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -426,6 +426,7 @@ bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev,
 
 void panfrost_device_reset(struct panfrost_device *pfdev, bool enable_job_int)
 {
+	panfrost_perfcnt_reset(pfdev);
 	panfrost_gpu_soft_reset(pfdev);
 
 	panfrost_gpu_power_on(pfdev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
index ad1156678e91..c2087ea705fe 100644
--- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
@@ -33,6 +33,7 @@ struct panfrost_perfcnt {
 	struct panfrost_file_priv *user;
 	struct mutex lock;
 	struct completion dump_comp;
+	atomic_t hw_reset_happened;
 };
 
 static void panfrost_perfcnt_gpu_disable(struct panfrost_device *pfdev)
@@ -57,9 +58,13 @@ void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
 
 static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
 {
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
 	u64 gpuva;
 	int ret;
 
+	if (atomic_read(&perfcnt->hw_reset_happened))
+		return -EIO;
+
 	reinit_completion(&pfdev->perfcnt->dump_comp);
 	gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT;
 	gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva));
@@ -140,6 +145,15 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
 		goto err_vunmap;
 	}
 
+	/* If a reset is ongoing, the AS we get right below will be torn
+	 * down, so rather than waiting until this becomes obvious in a
+	 * perfcnt_dump() ioctl, we ask the user to try again slightly later.
+	 */
+	if (atomic_read(&pfdev->reset.pending)) {
+		ret = -EAGAIN;
+		goto err_vunmap;
+	}
+
 	ret = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu);
 	if (ret < 0)
 		goto err_vunmap;
@@ -173,6 +187,16 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
 	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
 		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
 
+	/* If a reset happened, we've no way of knowing whether it was between the time we called
+	 * panfrost_mmu_as_get() or before perfcnt_enable(), so clearing this flag and going forward
+	 * isn't possible. We must clear the flag and try again in the hopes no resets will happen
+	 * between this and the next ioctl invocation.
+	 */
+	if (atomic_cmpxchg(&perfcnt->hw_reset_happened, 1, 0)) {
+		ret = EAGAIN;
+		goto err_disable;
+	}
+
 	/* The BO ref is retained by the mapping. */
 	drm_gem_object_put(&bo->base);
 
@@ -180,6 +204,8 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
 
 	return 0;
 
+err_disable:
+	panfrost_perfcnt_gpu_disable(pfdev);
 err_vunmap:
 	drm_gem_vunmap(&bo->base, &map);
 err_put_mapping:
@@ -209,7 +235,8 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
 	drm_gem_vunmap(&perfcnt->mapping->obj->base.base, &map);
 	perfcnt->buf = NULL;
 	panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv);
-	panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
+	if (!atomic_read(&perfcnt->hw_reset_happened))
+		panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
 	panfrost_gem_mapping_put(perfcnt->mapping);
 	perfcnt->mapping = NULL;
 	pm_runtime_put_autosuspend(pfdev->base.dev);
@@ -346,3 +373,20 @@ void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
 	/* Disable everything before leaving. */
 	panfrost_perfcnt_gpu_disable(pfdev);
 }
+
+void panfrost_perfcnt_reset(struct panfrost_device *pfdev)
+{
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+
+	/* Since this function will be called either from a scheduled HW reset
+	 * or a runtime resume, tearing down any perfcnt resources means we're
+	 * doomed to deadlocking with perfcnt_{enable/disable}, since we'd have
+	 * to take the perfecnt lock. On top of that, it'd also violate DMA fence
+	 * signalling rules because GFP_KERNEL allocations are made with the perfcnt
+	 * lock taken in perfcnt_enable. In light of this, the only thing we can do
+	 * is disabling perfcnt unconditionally, and notifying the perfcnt user of
+	 * the reset having happpened so that they can take recovery measures.
+	 */
+	panfrost_perfcnt_gpu_disable(pfdev);
+	atomic_set(&perfcnt->hw_reset_happened, 1);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
index 8bbcf5f5fb33..8b9bc704b634 100644
--- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
@@ -14,5 +14,6 @@ int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
 int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
+void panfrost_perfcnt_reset(struct panfrost_device *pfdev);
 
 #endif

-- 
2.53.0


  parent reply	other threads:[~2026-06-04 17:51 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04 17:35 [PATCH v2 0/7] RPM, perfcnt and other minor fixes for Panfrost Adrián Larumbe
2026-06-04 17:35 ` [PATCH v2 1/7] drm/panfrost: Check another bo field for cache option query Adrián Larumbe
2026-06-04 17:57   ` Boris Brezillon
2026-06-04 20:16   ` Claude review: " Claude Code Review Bot
2026-06-04 17:35 ` [PATCH v2 2/7] drm/panfrost: Prevent division by 0 Adrián Larumbe
2026-06-04 17:44   ` sashiko-bot
2026-06-04 20:16     ` Claude review: " Claude Code Review Bot
2026-06-04 18:02   ` Boris Brezillon
2026-06-04 17:35 ` [PATCH v2 3/7] drm/panfrost: Move shrinker initialization and unplug one level down Adrián Larumbe
2026-06-04 18:04   ` Boris Brezillon
2026-06-04 20:16   ` Claude review: " Claude Code Review Bot
2026-06-04 17:35 ` [PATCH v2 4/7] drm/panfrost: Move perfcnt GPU disable sequence into a helper Adrián Larumbe
2026-06-04 17:47   ` sashiko-bot
2026-06-04 20:16     ` Claude review: " Claude Code Review Bot
2026-06-04 18:05   ` Boris Brezillon
2026-06-04 17:35 ` Adrián Larumbe [this message]
2026-06-04 17:49   ` [PATCH v2 5/7] drm/panfrost: Make reset sequence deal with an active HWPerf session sashiko-bot
2026-06-04 20:16     ` Claude review: " Claude Code Review Bot
2026-06-04 18:26   ` Boris Brezillon
2026-06-04 17:35 ` [PATCH v2 6/7] drm/panfrost: Fix PM usage_count mishandling Adrián Larumbe
2026-06-04 17:50   ` sashiko-bot
2026-06-04 20:16     ` Claude review: " Claude Code Review Bot
2026-06-04 18:36   ` Boris Brezillon
2026-06-04 17:35 ` [PATCH v2 7/7] drm/panfrost: Explicitly enable MMU interrupts at device init Adrián Larumbe
2026-06-04 17:55   ` sashiko-bot
2026-06-04 20:16   ` Claude review: " Claude Code Review Bot
2026-06-04 20:16 ` Claude review: RPM, perfcnt and other minor fixes for Panfrost Claude Code Review Bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260604-claude-fixes-v2-5-57c6bd4c1655@collabora.com \
    --to=adrian.larumbe@collabora.com \
    --cc=airlied@gmail.com \
    --cc=alyssa.rosenzweig@collabora.com \
    --cc=boris.brezillon@collabora.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=eric@anholt.net \
    --cc=faith.ekstrand@collabora.com \
    --cc=hanetzer@startmail.com \
    --cc=kernel@collabora.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=mripard@kernel.org \
    --cc=neil.armstrong@linaro.org \
    --cc=noreply@anthropic.com \
    --cc=robh@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=simona@ffwll.ch \
    --cc=steven.price@arm.com \
    --cc=tomeu@tomeuvizoso.net \
    --cc=tzimmermann@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox