public inbox for drm-ai-reviews@public-inbox.freedesktop.org
 help / color / mirror / Atom feed
From: Christian König <christian.koenig@amd.com>
To: Boris Brezillon <boris.brezillon@collabora.com>,
	Steven Price <steven.price@arm.com>,
	Liviu Dudau <liviu.dudau@arm.com>,
	Adrián Larumbe <adrian.larumbe@collabora.com>
Cc: dri-devel@lists.freedesktop.org, kernel@collabora.com,
	Nicolas Frattaroli <nicolas.frattaroli@collabora.com>,
	Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
	Philipp Stanner <phasta@kernel.org>
Subject: Re: [PATCH] drm/panthor: Fix the "done_fence is initialized" detection logic
Date: Mon, 9 Mar 2026 11:50:16 +0100	[thread overview]
Message-ID: <bd0bcaeb-0a0f-4415-b964-ea1d5219b671@amd.com> (raw)
In-Reply-To: <20260309103053.211415-1-boris.brezillon@collabora.com>

On 3/9/26 11:30, Boris Brezillon wrote:
> After commit 541c8f2468b9 ("dma-buf: detach fence ops on signal v3"),
> dma_fence::ops == NULL can't be used to check if the fence is initialized
> or not. We could turn this into an "is_signaled() || ops == NULL" test,
> but that's fragile, since it's still subject to dma_fence internal
> changes. So let's have the "is_initialized" state encoded directly in
> the pointer through the lowest bit which is guaranteed to be unused
> because of the dma_fence alignment constraint.

We added the dma_fence_was_initialized() function exactly for this use case and fixed all callers.

But looks like that occasion here somehow felt through the cracks, no idea why my script for NULL checks of dma_fence->ops didn't triggered here.

Regards,
Christian.

> 
> Cc: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> Cc: Philipp Stanner <phasta@kernel.org>
> Cc: Christian König <christian.koenig@amd.com>
> Reported-by: Steven Price <steven.price@arm.com>
> Reported-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> Fixes: 541c8f2468b9 ("dma-buf: detach fence ops on signal v3")
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
>  drivers/gpu/drm/panthor/panthor_sched.c | 69 ++++++++++++++++++-------
>  1 file changed, 50 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index bd703a2904a1..31589add86f5 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -835,8 +835,15 @@ struct panthor_job {
>  	 */
>  	struct list_head node;
>  
> -	/** @done_fence: Fence signaled when the job is finished or cancelled. */
> -	struct dma_fence *done_fence;
> +	/**
> +	 * @done_fence: Fence signaled when the job is finished or cancelled.
> +	 *
> +	 * This is a uintptr_t because we use the lower bit to encode whether
> +	 * the fence has been initialized or not, and we don't want code to dereference
> +	 * this field directly (job_done_fence()/job_done_fence_initialized() should be used
> +	 * instead).
> +	 */
> +	uintptr_t done_fence;
>  
>  	/** @profiling: Job profiling information. */
>  	struct {
> @@ -1518,6 +1525,18 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
>  		 info);
>  }
>  
> +#define DONE_FENCE_INITIALIZED BIT(0)
> +
> +static struct dma_fence *job_done_fence(struct panthor_job *job)
> +{
> +	return (void *)(job->done_fence & ~(uintptr_t)DONE_FENCE_INITIALIZED);
> +}
> +
> +static bool job_done_fence_initialized(struct panthor_job *job)
> +{
> +	return job->done_fence & DONE_FENCE_INITIALIZED;
> +}
> +
>  static void
>  cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
>  				   u32 csg_id, u32 cs_id)
> @@ -1549,7 +1568,7 @@ cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
>  			if (cs_extract < job->ringbuf.start)
>  				break;
>  
> -			dma_fence_set_error(job->done_fence, -EINVAL);
> +			dma_fence_set_error(job_done_fence(job), -EINVAL);
>  		}
>  		spin_unlock(&queue->fence_ctx.lock);
>  	}
> @@ -2182,9 +2201,11 @@ group_term_post_processing(struct panthor_group *group)
>  
>  		spin_lock(&queue->fence_ctx.lock);
>  		list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
> +			struct dma_fence *done_fence = job_done_fence(job);
> +
>  			list_move_tail(&job->node, &faulty_jobs);
> -			dma_fence_set_error(job->done_fence, err);
> -			dma_fence_signal_locked(job->done_fence);
> +			dma_fence_set_error(done_fence, err);
> +			dma_fence_signal_locked(done_fence);
>  		}
>  		spin_unlock(&queue->fence_ctx.lock);
>  
> @@ -2734,7 +2755,7 @@ static void queue_start(struct panthor_queue *queue)
>  
>  	/* Re-assign the parent fences. */
>  	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
> -		job->base.s_fence->parent = dma_fence_get(job->done_fence);
> +		job->base.s_fence->parent = dma_fence_get(job_done_fence(job));
>  
>  	enable_delayed_work(&queue->timeout.work);
>  	drm_sched_start(&queue->scheduler, 0);
> @@ -3047,6 +3068,8 @@ static bool queue_check_job_completion(struct panthor_queue *queue)
>  	cookie = dma_fence_begin_signalling();
>  	spin_lock(&queue->fence_ctx.lock);
>  	list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> +		struct dma_fence *done_fence = job_done_fence(job);
> +
>  		if (!syncobj) {
>  			struct panthor_group *group = job->group;
>  
> @@ -3054,11 +3077,11 @@ static bool queue_check_job_completion(struct panthor_queue *queue)
>  				  (job->queue_idx * sizeof(*syncobj));
>  		}
>  
> -		if (syncobj->seqno < job->done_fence->seqno)
> +		if (syncobj->seqno < done_fence->seqno)
>  			break;
>  
>  		list_move_tail(&job->node, &done_jobs);
> -		dma_fence_signal_locked(job->done_fence);
> +		dma_fence_signal_locked(done_fence);
>  	}
>  
>  	if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
> @@ -3309,8 +3332,10 @@ queue_run_job(struct drm_sched_job *sched_job)
>  	 * drm_sched_job::s_fence::finished fence.
>  	 */
>  	if (!job->call_info.size) {
> -		job->done_fence = dma_fence_get(queue->fence_ctx.last_fence);
> -		return dma_fence_get(job->done_fence);
> +		done_fence = dma_fence_get(queue->fence_ctx.last_fence);
> +
> +		job->done_fence = (uintptr_t)done_fence | DONE_FENCE_INITIALIZED;
> +		return dma_fence_get(done_fence);
>  	}
>  
>  	ret = panthor_device_resume_and_get(ptdev);
> @@ -3323,11 +3348,13 @@ queue_run_job(struct drm_sched_job *sched_job)
>  		goto out_unlock;
>  	}
>  
> -	dma_fence_init(job->done_fence,
> +	done_fence = job_done_fence(job);
> +	dma_fence_init(done_fence,
>  		       &panthor_queue_fence_ops,
>  		       &queue->fence_ctx.lock,
>  		       queue->fence_ctx.id,
>  		       atomic64_inc_return(&queue->fence_ctx.seqno));
> +	job->done_fence |= DONE_FENCE_INITIALIZED;
>  
>  	job->profiling.slot = queue->profiling.seqno++;
>  	if (queue->profiling.seqno == queue->profiling.slot_count)
> @@ -3381,9 +3408,9 @@ queue_run_job(struct drm_sched_job *sched_job)
>  
>  	/* Update the last fence. */
>  	dma_fence_put(queue->fence_ctx.last_fence);
> -	queue->fence_ctx.last_fence = dma_fence_get(job->done_fence);
> +	queue->fence_ctx.last_fence = dma_fence_get(done_fence);
>  
> -	done_fence = dma_fence_get(job->done_fence);
> +	done_fence = dma_fence_get(done_fence);
>  
>  out_unlock:
>  	mutex_unlock(&sched->lock);
> @@ -3403,7 +3430,7 @@ queue_timedout_job(struct drm_sched_job *sched_job)
>  	struct panthor_queue *queue = group->queues[job->queue_idx];
>  
>  	drm_warn(&ptdev->base, "job timeout: pid=%d, comm=%s, seqno=%llu\n",
> -		 group->task_info.pid, group->task_info.comm, job->done_fence->seqno);
> +		 group->task_info.pid, group->task_info.comm, job_done_fence(job)->seqno);
>  
>  	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
>  
> @@ -3915,10 +3942,10 @@ static void job_release(struct kref *ref)
>  	if (job->base.s_fence)
>  		drm_sched_job_cleanup(&job->base);
>  
> -	if (job->done_fence && job->done_fence->ops)
> -		dma_fence_put(job->done_fence);
> +	if (job_done_fence_initialized(job))
> +		dma_fence_put(job_done_fence(job));
>  	else
> -		dma_fence_free(job->done_fence);
> +		dma_fence_free(job_done_fence(job));
>  
>  	group_put(job->group);
>  
> @@ -4011,11 +4038,15 @@ panthor_job_create(struct panthor_file *pfile,
>  	 * the previously submitted job.
>  	 */
>  	if (job->call_info.size) {
> -		job->done_fence = kzalloc_obj(*job->done_fence);
> -		if (!job->done_fence) {
> +		struct dma_fence *done_fence;
> +
> +		done_fence = kzalloc_obj(*done_fence);
> +		if (!done_fence) {
>  			ret = -ENOMEM;
>  			goto err_put_job;
>  		}
> +
> +		job->done_fence = (uintptr_t)done_fence;
>  	}
>  
>  	job->profiling.mask = pfile->ptdev->profile_mask;


  reply	other threads:[~2026-03-09 10:50 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-09 10:30 [PATCH] drm/panthor: Fix the "done_fence is initialized" detection logic Boris Brezillon
2026-03-09 10:50 ` Christian König [this message]
2026-03-09 11:06   ` Boris Brezillon
2026-03-09 11:05 ` Liviu Dudau
2026-03-09 13:15   ` Boris Brezillon
2026-03-09 14:54     ` Liviu Dudau
2026-03-09 15:32       ` Boris Brezillon
2026-03-09 11:06 ` Nicolas Frattaroli
2026-03-10  2:25 ` Claude review: " Claude Code Review Bot
2026-03-10  2:25 ` Claude Code Review Bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bd0bcaeb-0a0f-4415-b964-ea1d5219b671@amd.com \
    --to=christian.koenig@amd.com \
    --cc=adrian.larumbe@collabora.com \
    --cc=boris.brezillon@collabora.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=kernel@collabora.com \
    --cc=liviu.dudau@arm.com \
    --cc=nicolas.frattaroli@collabora.com \
    --cc=phasta@kernel.org \
    --cc=steven.price@arm.com \
    --cc=tvrtko.ursulin@igalia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox