public inbox for drm-ai-reviews@public-inbox.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH] drm/panthor: Fix the "done_fence is initialized" detection logic
@ 2026-03-09 10:30 Boris Brezillon
  2026-03-09 10:50 ` Christian König
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Boris Brezillon @ 2026-03-09 10:30 UTC (permalink / raw)
  To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
  Cc: dri-devel, kernel, Nicolas Frattaroli, Tvrtko Ursulin,
	Philipp Stanner, Christian König

After commit 541c8f2468b9 ("dma-buf: detach fence ops on signal v3"),
dma_fence::ops == NULL can't be used to check if the fence is initialized
or not. We could turn this into an "is_signaled() || ops == NULL" test,
but that's fragile, since it's still subject to dma_fence internal
changes. So let's have the "is_initialized" state encoded directly in
the pointer through the lowest bit which is guaranteed to be unused
because of the dma_fence alignment constraint.

Cc: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Philipp Stanner <phasta@kernel.org>
Cc: Christian König <christian.koenig@amd.com>
Reported-by: Steven Price <steven.price@arm.com>
Reported-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Fixes: 541c8f2468b9 ("dma-buf: detach fence ops on signal v3")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_sched.c | 69 ++++++++++++++++++-------
 1 file changed, 50 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index bd703a2904a1..31589add86f5 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -835,8 +835,15 @@ struct panthor_job {
 	 */
 	struct list_head node;
 
-	/** @done_fence: Fence signaled when the job is finished or cancelled. */
-	struct dma_fence *done_fence;
+	/**
+	 * @done_fence: Fence signaled when the job is finished or cancelled.
+	 *
+	 * This is a uintptr_t because we use the lower bit to encode whether
+	 * the fence has been initialized or not, and we don't want code to dereference
+	 * this field directly (job_done_fence()/job_done_fence_initialized() should be used
+	 * instead).
+	 */
+	uintptr_t done_fence;
 
 	/** @profiling: Job profiling information. */
 	struct {
@@ -1518,6 +1525,18 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
 		 info);
 }
 
+#define DONE_FENCE_INITIALIZED BIT(0)
+
+static struct dma_fence *job_done_fence(struct panthor_job *job)
+{
+	return (void *)(job->done_fence & ~(uintptr_t)DONE_FENCE_INITIALIZED);
+}
+
+static bool job_done_fence_initialized(struct panthor_job *job)
+{
+	return job->done_fence & DONE_FENCE_INITIALIZED;
+}
+
 static void
 cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
 				   u32 csg_id, u32 cs_id)
@@ -1549,7 +1568,7 @@ cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
 			if (cs_extract < job->ringbuf.start)
 				break;
 
-			dma_fence_set_error(job->done_fence, -EINVAL);
+			dma_fence_set_error(job_done_fence(job), -EINVAL);
 		}
 		spin_unlock(&queue->fence_ctx.lock);
 	}
@@ -2182,9 +2201,11 @@ group_term_post_processing(struct panthor_group *group)
 
 		spin_lock(&queue->fence_ctx.lock);
 		list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
+			struct dma_fence *done_fence = job_done_fence(job);
+
 			list_move_tail(&job->node, &faulty_jobs);
-			dma_fence_set_error(job->done_fence, err);
-			dma_fence_signal_locked(job->done_fence);
+			dma_fence_set_error(done_fence, err);
+			dma_fence_signal_locked(done_fence);
 		}
 		spin_unlock(&queue->fence_ctx.lock);
 
@@ -2734,7 +2755,7 @@ static void queue_start(struct panthor_queue *queue)
 
 	/* Re-assign the parent fences. */
 	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
-		job->base.s_fence->parent = dma_fence_get(job->done_fence);
+		job->base.s_fence->parent = dma_fence_get(job_done_fence(job));
 
 	enable_delayed_work(&queue->timeout.work);
 	drm_sched_start(&queue->scheduler, 0);
@@ -3047,6 +3068,8 @@ static bool queue_check_job_completion(struct panthor_queue *queue)
 	cookie = dma_fence_begin_signalling();
 	spin_lock(&queue->fence_ctx.lock);
 	list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
+		struct dma_fence *done_fence = job_done_fence(job);
+
 		if (!syncobj) {
 			struct panthor_group *group = job->group;
 
@@ -3054,11 +3077,11 @@ static bool queue_check_job_completion(struct panthor_queue *queue)
 				  (job->queue_idx * sizeof(*syncobj));
 		}
 
-		if (syncobj->seqno < job->done_fence->seqno)
+		if (syncobj->seqno < done_fence->seqno)
 			break;
 
 		list_move_tail(&job->node, &done_jobs);
-		dma_fence_signal_locked(job->done_fence);
+		dma_fence_signal_locked(done_fence);
 	}
 
 	if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
@@ -3309,8 +3332,10 @@ queue_run_job(struct drm_sched_job *sched_job)
 	 * drm_sched_job::s_fence::finished fence.
 	 */
 	if (!job->call_info.size) {
-		job->done_fence = dma_fence_get(queue->fence_ctx.last_fence);
-		return dma_fence_get(job->done_fence);
+		done_fence = dma_fence_get(queue->fence_ctx.last_fence);
+
+		job->done_fence = (uintptr_t)done_fence | DONE_FENCE_INITIALIZED;
+		return dma_fence_get(done_fence);
 	}
 
 	ret = panthor_device_resume_and_get(ptdev);
@@ -3323,11 +3348,13 @@ queue_run_job(struct drm_sched_job *sched_job)
 		goto out_unlock;
 	}
 
-	dma_fence_init(job->done_fence,
+	done_fence = job_done_fence(job);
+	dma_fence_init(done_fence,
 		       &panthor_queue_fence_ops,
 		       &queue->fence_ctx.lock,
 		       queue->fence_ctx.id,
 		       atomic64_inc_return(&queue->fence_ctx.seqno));
+	job->done_fence |= DONE_FENCE_INITIALIZED;
 
 	job->profiling.slot = queue->profiling.seqno++;
 	if (queue->profiling.seqno == queue->profiling.slot_count)
@@ -3381,9 +3408,9 @@ queue_run_job(struct drm_sched_job *sched_job)
 
 	/* Update the last fence. */
 	dma_fence_put(queue->fence_ctx.last_fence);
-	queue->fence_ctx.last_fence = dma_fence_get(job->done_fence);
+	queue->fence_ctx.last_fence = dma_fence_get(done_fence);
 
-	done_fence = dma_fence_get(job->done_fence);
+	done_fence = dma_fence_get(done_fence);
 
 out_unlock:
 	mutex_unlock(&sched->lock);
@@ -3403,7 +3430,7 @@ queue_timedout_job(struct drm_sched_job *sched_job)
 	struct panthor_queue *queue = group->queues[job->queue_idx];
 
 	drm_warn(&ptdev->base, "job timeout: pid=%d, comm=%s, seqno=%llu\n",
-		 group->task_info.pid, group->task_info.comm, job->done_fence->seqno);
+		 group->task_info.pid, group->task_info.comm, job_done_fence(job)->seqno);
 
 	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
 
@@ -3915,10 +3942,10 @@ static void job_release(struct kref *ref)
 	if (job->base.s_fence)
 		drm_sched_job_cleanup(&job->base);
 
-	if (job->done_fence && job->done_fence->ops)
-		dma_fence_put(job->done_fence);
+	if (job_done_fence_initialized(job))
+		dma_fence_put(job_done_fence(job));
 	else
-		dma_fence_free(job->done_fence);
+		dma_fence_free(job_done_fence(job));
 
 	group_put(job->group);
 
@@ -4011,11 +4038,15 @@ panthor_job_create(struct panthor_file *pfile,
 	 * the previously submitted job.
 	 */
 	if (job->call_info.size) {
-		job->done_fence = kzalloc_obj(*job->done_fence);
-		if (!job->done_fence) {
+		struct dma_fence *done_fence;
+
+		done_fence = kzalloc_obj(*done_fence);
+		if (!done_fence) {
 			ret = -ENOMEM;
 			goto err_put_job;
 		}
+
+		job->done_fence = (uintptr_t)done_fence;
 	}
 
 	job->profiling.mask = pfile->ptdev->profile_mask;
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread
* [PATCH v2] drm/panthor: Fix the "done_fence is initialized" detection logic
@ 2026-03-09 12:43 Boris Brezillon
  2026-03-10  2:23 ` Claude review: " Claude Code Review Bot
  2026-03-10  2:23 ` Claude Code Review Bot
  0 siblings, 2 replies; 12+ messages in thread
From: Boris Brezillon @ 2026-03-09 12:43 UTC (permalink / raw)
  To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
  Cc: dri-devel, kernel, Nicolas Frattaroli, Tvrtko Ursulin,
	Philipp Stanner, Christian König

After commit 541c8f2468b9 ("dma-buf: detach fence ops on signal v3"),
dma_fence::ops == NULL can't be used to check if the fence is initialized.
Use dma_fence_was_initialized() instead.

v2:
- Use dma_fence_was_initialized() instead of open-coding it

Cc: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Philipp Stanner <phasta@kernel.org>
Cc: Christian König <christian.koenig@amd.com>
Reported-by: Steven Price <steven.price@arm.com>
Reported-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Fixes: 541c8f2468b9 ("dma-buf: detach fence ops on signal v3")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index bd703a2904a1..c15941ebe07a 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -3915,7 +3915,7 @@ static void job_release(struct kref *ref)
 	if (job->base.s_fence)
 		drm_sched_job_cleanup(&job->base);
 
-	if (job->done_fence && job->done_fence->ops)
+	if (dma_fence_was_initialized(job->done_fence))
 		dma_fence_put(job->done_fence);
 	else
 		dma_fence_free(job->done_fence);
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2026-03-10  2:25 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-09 10:30 [PATCH] drm/panthor: Fix the "done_fence is initialized" detection logic Boris Brezillon
2026-03-09 10:50 ` Christian König
2026-03-09 11:06   ` Boris Brezillon
2026-03-09 11:05 ` Liviu Dudau
2026-03-09 13:15   ` Boris Brezillon
2026-03-09 14:54     ` Liviu Dudau
2026-03-09 15:32       ` Boris Brezillon
2026-03-09 11:06 ` Nicolas Frattaroli
2026-03-10  2:25 ` Claude review: " Claude Code Review Bot
2026-03-10  2:25 ` Claude Code Review Bot
  -- strict thread matches above, loose matches on Subject: below --
2026-03-09 12:43 [PATCH v2] " Boris Brezillon
2026-03-10  2:23 ` Claude review: " Claude Code Review Bot
2026-03-10  2:23 ` Claude Code Review Bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox