public inbox for drm-ai-reviews@public-inbox.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH] accel/rocket: Fix drm_mm UAF on close vs in-flight job
@ 2026-04-26 10:37 gyeyoung baek
  2026-04-28  5:18 ` Claude review: " Claude Code Review Bot
  2026-04-28  5:18 ` Claude Code Review Bot
  0 siblings, 2 replies; 3+ messages in thread
From: gyeyoung baek @ 2026-04-26 10:37 UTC (permalink / raw)
  To: Tomeu Vizoso, Oded Gabbay, dri-devel, linux-kernel; +Cc: gyeyoung baek

The drm_mm and its mutex live in rocket_file_priv, which
rocket_postclose() frees on fd close.  But a BO held by an
in-flight job needs to access them later -- when its destructor
runs from the drm_sched free_job worker after the NPU IRQ --
and that access hits freed memory.

  BUG: KASAN: slab-use-after-free in __mutex_trylock_common+0x90/0x1e8
  Workqueue: <NPU>.npu drm_sched_free_job_work [gpu_sched]
  Call trace:
    __mutex_lock
    rocket_gem_bo_free
    rocket_job_cleanup
    rocket_job_free
    drm_sched_free_job_work [gpu_sched]

Move drm_mm and the mutex out of rocket_file_priv into the
kref-managed rocket_iommu_domain (renamed to rocket_vm).
Their lifetime now follows the vm: while any job references
the vm, the address-space state stays alive.

Fixes: ed98261b4168 ("accel/rocket: Add a new driver for Rockchip's NPU")
Signed-off-by: gyeyoung baek <gye976@gmail.com>
---
 drivers/accel/rocket/rocket_drv.c | 74 +++++++++++++++----------------
 drivers/accel/rocket/rocket_drv.h | 13 +++---
 drivers/accel/rocket/rocket_gem.c | 29 ++++++------
 drivers/accel/rocket/rocket_gem.h |  4 +-
 drivers/accel/rocket/rocket_job.c |  6 +--
 drivers/accel/rocket/rocket_job.h |  2 +-
 6 files changed, 63 insertions(+), 65 deletions(-)

diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c
index 8bbbce594..bddcfc0ff 100644
--- a/drivers/accel/rocket/rocket_drv.c
+++ b/drivers/accel/rocket/rocket_drv.c
@@ -26,46 +26,54 @@ static struct platform_device *drm_dev;
 static struct rocket_device *rdev;
 
 static void
-rocket_iommu_domain_destroy(struct kref *kref)
+rocket_vm_destroy(struct kref *kref)
 {
-	struct rocket_iommu_domain *domain = container_of(kref, struct rocket_iommu_domain, kref);
+	struct rocket_vm *vm = container_of(kref, struct rocket_vm, kref);
 
-	iommu_domain_free(domain->domain);
-	domain->domain = NULL;
-	kfree(domain);
+	drm_mm_takedown(&vm->mm);
+	mutex_destroy(&vm->lock);
+	iommu_domain_free(vm->domain);
+	vm->domain = NULL;
+	kfree(vm);
 }
 
-static struct rocket_iommu_domain*
-rocket_iommu_domain_create(struct device *dev)
+static struct rocket_vm *
+rocket_vm_create(struct device *dev)
 {
-	struct rocket_iommu_domain *domain = kmalloc_obj(*domain);
+	struct rocket_vm *vm = kmalloc_obj(*vm);
+	u64 start, end;
 	void *err;
 
-	if (!domain)
+	if (!vm)
 		return ERR_PTR(-ENOMEM);
 
-	domain->domain = iommu_paging_domain_alloc(dev);
-	if (IS_ERR(domain->domain)) {
-		err = ERR_CAST(domain->domain);
-		kfree(domain);
+	vm->domain = iommu_paging_domain_alloc(dev);
+	if (IS_ERR(vm->domain)) {
+		err = ERR_CAST(vm->domain);
+		kfree(vm);
 		return err;
 	}
-	kref_init(&domain->kref);
 
-	return domain;
+	start = vm->domain->geometry.aperture_start;
+	end = vm->domain->geometry.aperture_end;
+	drm_mm_init(&vm->mm, start, end - start + 1);
+	mutex_init(&vm->lock);
+	kref_init(&vm->kref);
+
+	return vm;
 }
 
-struct rocket_iommu_domain *
-rocket_iommu_domain_get(struct rocket_file_priv *rocket_priv)
+struct rocket_vm *
+rocket_vm_get(struct rocket_file_priv *rocket_priv)
 {
-	kref_get(&rocket_priv->domain->kref);
-	return rocket_priv->domain;
+	kref_get(&rocket_priv->vm->kref);
+	return rocket_priv->vm;
 }
 
 void
-rocket_iommu_domain_put(struct rocket_iommu_domain *domain)
+rocket_vm_put(struct rocket_vm *vm)
 {
-	kref_put(&domain->kref, rocket_iommu_domain_destroy);
+	kref_put(&vm->kref, rocket_vm_destroy);
 }
 
 static int
@@ -73,7 +81,6 @@ rocket_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct rocket_device *rdev = to_rocket_device(dev);
 	struct rocket_file_priv *rocket_priv;
-	u64 start, end;
 	int ret;
 
 	if (!try_module_get(THIS_MODULE))
@@ -86,29 +93,22 @@ rocket_open(struct drm_device *dev, struct drm_file *file)
 	}
 
 	rocket_priv->rdev = rdev;
-	rocket_priv->domain = rocket_iommu_domain_create(rdev->cores[0].dev);
-	if (IS_ERR(rocket_priv->domain)) {
-		ret = PTR_ERR(rocket_priv->domain);
+	rocket_priv->vm = rocket_vm_create(rdev->cores[0].dev);
+	if (IS_ERR(rocket_priv->vm)) {
+		ret = PTR_ERR(rocket_priv->vm);
 		goto err_free;
 	}
 
 	file->driver_priv = rocket_priv;
 
-	start = rocket_priv->domain->domain->geometry.aperture_start;
-	end = rocket_priv->domain->domain->geometry.aperture_end;
-	drm_mm_init(&rocket_priv->mm, start, end - start + 1);
-	mutex_init(&rocket_priv->mm_lock);
-
 	ret = rocket_job_open(rocket_priv);
 	if (ret)
-		goto err_mm_takedown;
+		goto err_vm_put;
 
 	return 0;
 
-err_mm_takedown:
-	mutex_destroy(&rocket_priv->mm_lock);
-	drm_mm_takedown(&rocket_priv->mm);
-	rocket_iommu_domain_put(rocket_priv->domain);
+err_vm_put:
+	rocket_vm_put(rocket_priv->vm);
 err_free:
 	kfree(rocket_priv);
 err_put_mod:
@@ -122,9 +122,7 @@ rocket_postclose(struct drm_device *dev, struct drm_file *file)
 	struct rocket_file_priv *rocket_priv = file->driver_priv;
 
 	rocket_job_close(rocket_priv);
-	mutex_destroy(&rocket_priv->mm_lock);
-	drm_mm_takedown(&rocket_priv->mm);
-	rocket_iommu_domain_put(rocket_priv->domain);
+	rocket_vm_put(rocket_priv->vm);
 	kfree(rocket_priv);
 	module_put(THIS_MODULE);
 }
diff --git a/drivers/accel/rocket/rocket_drv.h b/drivers/accel/rocket/rocket_drv.h
index 2c673bb99..2754f46f1 100644
--- a/drivers/accel/rocket/rocket_drv.h
+++ b/drivers/accel/rocket/rocket_drv.h
@@ -11,22 +11,23 @@
 
 extern const struct dev_pm_ops rocket_pm_ops;
 
-struct rocket_iommu_domain {
+struct rocket_vm {
 	struct iommu_domain *domain;
+	struct drm_mm mm;
+	/* protects @mm */
+	struct mutex lock;
 	struct kref kref;
 };
 
 struct rocket_file_priv {
 	struct rocket_device *rdev;
 
-	struct rocket_iommu_domain *domain;
-	struct drm_mm mm;
-	struct mutex mm_lock;
+	struct rocket_vm *vm;
 
 	struct drm_sched_entity sched_entity;
 };
 
-struct rocket_iommu_domain *rocket_iommu_domain_get(struct rocket_file_priv *rocket_priv);
-void rocket_iommu_domain_put(struct rocket_iommu_domain *domain);
+struct rocket_vm *rocket_vm_get(struct rocket_file_priv *rocket_priv);
+void rocket_vm_put(struct rocket_vm *vm);
 
 #endif
diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c
index b6a385d2e..7f64134fd 100644
--- a/drivers/accel/rocket/rocket_gem.c
+++ b/drivers/accel/rocket/rocket_gem.c
@@ -14,20 +14,20 @@
 static void rocket_gem_bo_free(struct drm_gem_object *obj)
 {
 	struct rocket_gem_object *bo = to_rocket_bo(obj);
-	struct rocket_file_priv *rocket_priv = bo->driver_priv;
+	struct rocket_vm *vm = bo->vm;
 	size_t unmapped;
 
 	drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
 
-	unmapped = iommu_unmap(bo->domain->domain, bo->mm.start, bo->size);
+	unmapped = iommu_unmap(vm->domain, bo->mm.start, bo->size);
 	drm_WARN_ON(obj->dev, unmapped != bo->size);
 
-	mutex_lock(&rocket_priv->mm_lock);
+	mutex_lock(&vm->lock);
 	drm_mm_remove_node(&bo->mm);
-	mutex_unlock(&rocket_priv->mm_lock);
+	mutex_unlock(&vm->lock);
 
-	rocket_iommu_domain_put(bo->domain);
-	bo->domain = NULL;
+	rocket_vm_put(vm);
+	bo->vm = NULL;
 
 	drm_gem_shmem_free(&bo->base);
 }
@@ -64,6 +64,7 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
 	struct drm_gem_shmem_object *shmem_obj;
 	struct rocket_gem_object *rkt_obj;
 	struct drm_gem_object *gem_obj;
+	struct rocket_vm *vm;
 	struct sg_table *sgt;
 	int ret;
 
@@ -74,8 +75,8 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
 	gem_obj = &shmem_obj->base;
 	rkt_obj = to_rocket_bo(gem_obj);
 
-	rkt_obj->driver_priv = rocket_priv;
-	rkt_obj->domain = rocket_iommu_domain_get(rocket_priv);
+	vm = rocket_vm_get(rocket_priv);
+	rkt_obj->vm = vm;
 	rkt_obj->size = args->size;
 	rkt_obj->offset = 0;
 
@@ -90,13 +91,13 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
 		goto err;
 	}
 
-	mutex_lock(&rocket_priv->mm_lock);
-	ret = drm_mm_insert_node_generic(&rocket_priv->mm, &rkt_obj->mm,
+	mutex_lock(&vm->lock);
+	ret = drm_mm_insert_node_generic(&vm->mm, &rkt_obj->mm,
 					 rkt_obj->size, PAGE_SIZE,
 					 0, 0);
-	mutex_unlock(&rocket_priv->mm_lock);
+	mutex_unlock(&vm->lock);
 
-	ret = iommu_map_sgtable(rocket_priv->domain->domain,
+	ret = iommu_map_sgtable(vm->domain,
 				rkt_obj->mm.start,
 				shmem_obj->sgt,
 				IOMMU_READ | IOMMU_WRITE);
@@ -115,9 +116,9 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
 	return 0;
 
 err_remove_node:
-	mutex_lock(&rocket_priv->mm_lock);
+	mutex_lock(&vm->lock);
 	drm_mm_remove_node(&rkt_obj->mm);
-	mutex_unlock(&rocket_priv->mm_lock);
+	mutex_unlock(&vm->lock);
 
 err:
 	drm_gem_shmem_object_free(gem_obj);
diff --git a/drivers/accel/rocket/rocket_gem.h b/drivers/accel/rocket/rocket_gem.h
index 240430334..e1fbbd8cf 100644
--- a/drivers/accel/rocket/rocket_gem.h
+++ b/drivers/accel/rocket/rocket_gem.h
@@ -9,9 +9,7 @@
 struct rocket_gem_object {
 	struct drm_gem_shmem_object base;
 
-	struct rocket_file_priv *driver_priv;
-
-	struct rocket_iommu_domain *domain;
+	struct rocket_vm *vm;
 	struct drm_mm_node mm;
 	size_t size;
 	u32 offset;
diff --git a/drivers/accel/rocket/rocket_job.c b/drivers/accel/rocket/rocket_job.c
index 2f1861f96..7695fca02 100644
--- a/drivers/accel/rocket/rocket_job.c
+++ b/drivers/accel/rocket/rocket_job.c
@@ -233,7 +233,7 @@ static void rocket_job_cleanup(struct kref *ref)
 						refcount);
 	unsigned int i;
 
-	rocket_iommu_domain_put(job->domain);
+	rocket_vm_put(job->vm);
 
 	dma_fence_put(job->done_fence);
 	dma_fence_put(job->inference_done_fence);
@@ -314,7 +314,7 @@ static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
 	if (ret < 0)
 		return fence;
 
-	ret = iommu_attach_group(job->domain->domain, core->iommu_group);
+	ret = iommu_attach_group(job->vm->domain, core->iommu_group);
 	if (ret < 0)
 		return fence;
 
@@ -573,7 +573,7 @@ static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file
 
 	rjob->out_bo_count = job->out_bo_handle_count;
 
-	rjob->domain = rocket_iommu_domain_get(file_priv);
+	rjob->vm = rocket_vm_get(file_priv);
 
 	ret = rocket_job_push(rjob);
 	if (ret)
diff --git a/drivers/accel/rocket/rocket_job.h b/drivers/accel/rocket/rocket_job.h
index 4ae00feec..9373c3d02 100644
--- a/drivers/accel/rocket/rocket_job.h
+++ b/drivers/accel/rocket/rocket_job.h
@@ -36,7 +36,7 @@ struct rocket_job {
 	/* Fence to be signaled by IRQ handler when the job is complete. */
 	struct dma_fence *done_fence;
 
-	struct rocket_iommu_domain *domain;
+	struct rocket_vm *vm;
 
 	struct kref refcount;
 };
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Claude review: accel/rocket: Fix drm_mm UAF on close vs in-flight job
  2026-04-26 10:37 [PATCH] accel/rocket: Fix drm_mm UAF on close vs in-flight job gyeyoung baek
@ 2026-04-28  5:18 ` Claude Code Review Bot
  2026-04-28  5:18 ` Claude Code Review Bot
  1 sibling, 0 replies; 3+ messages in thread
From: Claude Code Review Bot @ 2026-04-28  5:18 UTC (permalink / raw)
  To: dri-devel-reviews

Overall Series Review

Subject: accel/rocket: Fix drm_mm UAF on close vs in-flight job
Author: gyeyoung baek <gye976@gmail.com>
Patches: 1
Reviewed: 2026-04-28T15:18:19.348685

---

This is a single-patch fix for a real use-after-free bug in the `accel/rocket` NPU driver. The core insight is correct: `drm_mm` and its mutex were embedded in `rocket_file_priv`, which is freed in `rocket_postclose()` on fd close — but BO destructors running from the scheduler's `free_job` worker still need to call `drm_mm_remove_node()` against that same allocator, causing a UAF. Moving these into the kref-managed `rocket_vm` struct (which jobs and BOs hold references to) is the right fix.

The rename from `rocket_iommu_domain` to `rocket_vm` is clean and improves clarity. The mechanical changes are consistent across all six files.

However, there are two pre-existing bugs that this patch carries forward without fixing, and one of them interacts with the new code. These should be addressed either here or in follow-up patches.

**Verdict**: Correct fix for the stated UAF, but needs at least the `drm_mm_insert_node_generic` error-handling issue fixed before merging.

---

---
Generated by Claude Code Patch Reviewer

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Claude review: accel/rocket: Fix drm_mm UAF on close vs in-flight job
  2026-04-26 10:37 [PATCH] accel/rocket: Fix drm_mm UAF on close vs in-flight job gyeyoung baek
  2026-04-28  5:18 ` Claude review: " Claude Code Review Bot
@ 2026-04-28  5:18 ` Claude Code Review Bot
  1 sibling, 0 replies; 3+ messages in thread
From: Claude Code Review Bot @ 2026-04-28  5:18 UTC (permalink / raw)
  To: dri-devel-reviews

Patch Review

**Positive aspects:**

1. The commit message is excellent — it includes the KASAN splat, clearly explains the root cause and the fix approach, and has a proper `Fixes:` tag.

2. The lifetime model is now correct: `rocket_vm` is kref-managed, and both `rocket_gem_object` (via `bo->vm`) and `rocket_job` (via `job->vm`) hold references, so the `drm_mm` stays alive as long as any BO or job needs it.

3. The `rocket_vm_destroy()` correctly tears down in the right order — `drm_mm_takedown`, `mutex_destroy`, then `iommu_domain_free`:
```c
drm_mm_takedown(&vm->mm);
mutex_destroy(&vm->lock);
iommu_domain_free(vm->domain);
vm->domain = NULL;
kfree(vm);
```

4. Initializing `drm_mm` inside `rocket_vm_create()` from the IOMMU geometry aperture is cleaner than the previous approach in `rocket_open()`:
```c
start = vm->domain->geometry.aperture_start;
end = vm->domain->geometry.aperture_end;
drm_mm_init(&vm->mm, start, end - start + 1);
```

**Issues:**

1. **Bug (pre-existing, carried forward): Missing error check on `drm_mm_insert_node_generic`** — In `rocket_gem.c:94-98`, the return value from `drm_mm_insert_node_generic()` is assigned to `ret`, but it is immediately overwritten by the `iommu_map_sgtable()` call on line 100 without checking for failure:
```c
mutex_lock(&vm->lock);
ret = drm_mm_insert_node_generic(&vm->mm, &rkt_obj->mm,
				 rkt_obj->size, PAGE_SIZE,
				 0, 0);
mutex_unlock(&vm->lock);

ret = iommu_map_sgtable(vm->domain,
			rkt_obj->mm.start,
			shmem_obj->sgt,
			IOMMU_READ | IOMMU_WRITE);
```
If `drm_mm_insert_node_generic()` fails, `rkt_obj->mm` is not allocated, yet `iommu_map_sgtable()` will use `rkt_obj->mm.start` (which is uninitialized/zero) and the `err_remove_node` path will try to remove a node that was never inserted. This is a pre-existing bug, but since this patch is reworking the function, it should add the missing check:
```c
if (ret)
    goto err;
```

2. **Bug (pre-existing, carried forward): Missing `rocket_vm_put` in the `err` path of `rocket_ioctl_create_bo`** — At line 78-79, a `rocket_vm_get()` reference is taken and stored in `rkt_obj->vm`:
```c
vm = rocket_vm_get(rocket_priv);
rkt_obj->vm = vm;
```
If any subsequent step fails and we jump to `err:`, the code calls `drm_gem_shmem_object_free(gem_obj)` — but that calls `rocket_gem_bo_free()` only if the GEM object's free callback is wired up, and the BO destructor path already calls `rocket_vm_put()`. However, looking more carefully: after `drm_gem_handle_create()` + `drm_gem_object_put()` on line 83-84, the driver drops its reference. If `drm_gem_handle_create()` succeeded but a later step fails, the GEM handle still exists, and jumping to `err:` calls `drm_gem_shmem_object_free()` without first closing the handle — this leaks the GEM handle. This is also a pre-existing bug but worth fixing in this series.

3. **Minor: `void *err` type in `rocket_vm_create`** — The error return variable is declared as `void *err` rather than the more typical `struct rocket_vm *err` or just using `ERR_PTR` inline. This is carried forward from the original code but is slightly unusual — no functional impact.

4. **Nit: `vm->domain = NULL` before `kfree(vm)`** — In `rocket_vm_destroy()`, setting `vm->domain = NULL` just before `kfree(vm)` is pointless since the memory is about to be freed. This was also present in the original code.

**Summary**: The core fix is sound and addresses the reported UAF correctly. The `drm_mm_insert_node_generic` missing error check should be fixed — either in this patch or as a clearly-marked follow-up — since it can cause further UAFs and undefined behavior on the new `vm->mm`. The other issues are lower priority.

---
Generated by Claude Code Patch Reviewer

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-04-28  5:18 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-26 10:37 [PATCH] accel/rocket: Fix drm_mm UAF on close vs in-flight job gyeyoung baek
2026-04-28  5:18 ` Claude review: " Claude Code Review Bot
2026-04-28  5:18 ` Claude Code Review Bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox