* [PATCH] accel/rocket: Fix drm_mm UAF on close vs in-flight job
@ 2026-04-26 10:37 gyeyoung baek
2026-04-28 5:18 ` Claude review: " Claude Code Review Bot
2026-04-28 5:18 ` Claude Code Review Bot
0 siblings, 2 replies; 3+ messages in thread
From: gyeyoung baek @ 2026-04-26 10:37 UTC (permalink / raw)
To: Tomeu Vizoso, Oded Gabbay, dri-devel, linux-kernel; +Cc: gyeyoung baek
The drm_mm and its mutex live in rocket_file_priv, which
rocket_postclose() frees on fd close. But a BO held by an
in-flight job needs to access them later -- when its destructor
runs from the drm_sched free_job worker after the NPU IRQ --
and that access hits freed memory.
BUG: KASAN: slab-use-after-free in __mutex_trylock_common+0x90/0x1e8
Workqueue: <NPU>.npu drm_sched_free_job_work [gpu_sched]
Call trace:
__mutex_lock
rocket_gem_bo_free
rocket_job_cleanup
rocket_job_free
drm_sched_free_job_work [gpu_sched]
Move drm_mm and the mutex out of rocket_file_priv into the
kref-managed rocket_iommu_domain (renamed to rocket_vm).
Their lifetime now follows the vm: while any job references
the vm, the address-space state stays alive.
Fixes: ed98261b4168 ("accel/rocket: Add a new driver for Rockchip's NPU")
Signed-off-by: gyeyoung baek <gye976@gmail.com>
---
drivers/accel/rocket/rocket_drv.c | 74 +++++++++++++++----------------
drivers/accel/rocket/rocket_drv.h | 13 +++---
drivers/accel/rocket/rocket_gem.c | 29 ++++++------
drivers/accel/rocket/rocket_gem.h | 4 +-
drivers/accel/rocket/rocket_job.c | 6 +--
drivers/accel/rocket/rocket_job.h | 2 +-
6 files changed, 63 insertions(+), 65 deletions(-)
diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c
index 8bbbce594..bddcfc0ff 100644
--- a/drivers/accel/rocket/rocket_drv.c
+++ b/drivers/accel/rocket/rocket_drv.c
@@ -26,46 +26,54 @@ static struct platform_device *drm_dev;
static struct rocket_device *rdev;
static void
-rocket_iommu_domain_destroy(struct kref *kref)
+rocket_vm_destroy(struct kref *kref)
{
- struct rocket_iommu_domain *domain = container_of(kref, struct rocket_iommu_domain, kref);
+ struct rocket_vm *vm = container_of(kref, struct rocket_vm, kref);
- iommu_domain_free(domain->domain);
- domain->domain = NULL;
- kfree(domain);
+ drm_mm_takedown(&vm->mm);
+ mutex_destroy(&vm->lock);
+ iommu_domain_free(vm->domain);
+ vm->domain = NULL;
+ kfree(vm);
}
-static struct rocket_iommu_domain*
-rocket_iommu_domain_create(struct device *dev)
+static struct rocket_vm *
+rocket_vm_create(struct device *dev)
{
- struct rocket_iommu_domain *domain = kmalloc_obj(*domain);
+ struct rocket_vm *vm = kmalloc_obj(*vm);
+ u64 start, end;
void *err;
- if (!domain)
+ if (!vm)
return ERR_PTR(-ENOMEM);
- domain->domain = iommu_paging_domain_alloc(dev);
- if (IS_ERR(domain->domain)) {
- err = ERR_CAST(domain->domain);
- kfree(domain);
+ vm->domain = iommu_paging_domain_alloc(dev);
+ if (IS_ERR(vm->domain)) {
+ err = ERR_CAST(vm->domain);
+ kfree(vm);
return err;
}
- kref_init(&domain->kref);
- return domain;
+ start = vm->domain->geometry.aperture_start;
+ end = vm->domain->geometry.aperture_end;
+ drm_mm_init(&vm->mm, start, end - start + 1);
+ mutex_init(&vm->lock);
+ kref_init(&vm->kref);
+
+ return vm;
}
-struct rocket_iommu_domain *
-rocket_iommu_domain_get(struct rocket_file_priv *rocket_priv)
+struct rocket_vm *
+rocket_vm_get(struct rocket_file_priv *rocket_priv)
{
- kref_get(&rocket_priv->domain->kref);
- return rocket_priv->domain;
+ kref_get(&rocket_priv->vm->kref);
+ return rocket_priv->vm;
}
void
-rocket_iommu_domain_put(struct rocket_iommu_domain *domain)
+rocket_vm_put(struct rocket_vm *vm)
{
- kref_put(&domain->kref, rocket_iommu_domain_destroy);
+ kref_put(&vm->kref, rocket_vm_destroy);
}
static int
@@ -73,7 +81,6 @@ rocket_open(struct drm_device *dev, struct drm_file *file)
{
struct rocket_device *rdev = to_rocket_device(dev);
struct rocket_file_priv *rocket_priv;
- u64 start, end;
int ret;
if (!try_module_get(THIS_MODULE))
@@ -86,29 +93,22 @@ rocket_open(struct drm_device *dev, struct drm_file *file)
}
rocket_priv->rdev = rdev;
- rocket_priv->domain = rocket_iommu_domain_create(rdev->cores[0].dev);
- if (IS_ERR(rocket_priv->domain)) {
- ret = PTR_ERR(rocket_priv->domain);
+ rocket_priv->vm = rocket_vm_create(rdev->cores[0].dev);
+ if (IS_ERR(rocket_priv->vm)) {
+ ret = PTR_ERR(rocket_priv->vm);
goto err_free;
}
file->driver_priv = rocket_priv;
- start = rocket_priv->domain->domain->geometry.aperture_start;
- end = rocket_priv->domain->domain->geometry.aperture_end;
- drm_mm_init(&rocket_priv->mm, start, end - start + 1);
- mutex_init(&rocket_priv->mm_lock);
-
ret = rocket_job_open(rocket_priv);
if (ret)
- goto err_mm_takedown;
+ goto err_vm_put;
return 0;
-err_mm_takedown:
- mutex_destroy(&rocket_priv->mm_lock);
- drm_mm_takedown(&rocket_priv->mm);
- rocket_iommu_domain_put(rocket_priv->domain);
+err_vm_put:
+ rocket_vm_put(rocket_priv->vm);
err_free:
kfree(rocket_priv);
err_put_mod:
@@ -122,9 +122,7 @@ rocket_postclose(struct drm_device *dev, struct drm_file *file)
struct rocket_file_priv *rocket_priv = file->driver_priv;
rocket_job_close(rocket_priv);
- mutex_destroy(&rocket_priv->mm_lock);
- drm_mm_takedown(&rocket_priv->mm);
- rocket_iommu_domain_put(rocket_priv->domain);
+ rocket_vm_put(rocket_priv->vm);
kfree(rocket_priv);
module_put(THIS_MODULE);
}
diff --git a/drivers/accel/rocket/rocket_drv.h b/drivers/accel/rocket/rocket_drv.h
index 2c673bb99..2754f46f1 100644
--- a/drivers/accel/rocket/rocket_drv.h
+++ b/drivers/accel/rocket/rocket_drv.h
@@ -11,22 +11,23 @@
extern const struct dev_pm_ops rocket_pm_ops;
-struct rocket_iommu_domain {
+struct rocket_vm {
struct iommu_domain *domain;
+ struct drm_mm mm;
+ /* protects @mm */
+ struct mutex lock;
struct kref kref;
};
struct rocket_file_priv {
struct rocket_device *rdev;
- struct rocket_iommu_domain *domain;
- struct drm_mm mm;
- struct mutex mm_lock;
+ struct rocket_vm *vm;
struct drm_sched_entity sched_entity;
};
-struct rocket_iommu_domain *rocket_iommu_domain_get(struct rocket_file_priv *rocket_priv);
-void rocket_iommu_domain_put(struct rocket_iommu_domain *domain);
+struct rocket_vm *rocket_vm_get(struct rocket_file_priv *rocket_priv);
+void rocket_vm_put(struct rocket_vm *vm);
#endif
diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c
index b6a385d2e..7f64134fd 100644
--- a/drivers/accel/rocket/rocket_gem.c
+++ b/drivers/accel/rocket/rocket_gem.c
@@ -14,20 +14,20 @@
static void rocket_gem_bo_free(struct drm_gem_object *obj)
{
struct rocket_gem_object *bo = to_rocket_bo(obj);
- struct rocket_file_priv *rocket_priv = bo->driver_priv;
+ struct rocket_vm *vm = bo->vm;
size_t unmapped;
drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
- unmapped = iommu_unmap(bo->domain->domain, bo->mm.start, bo->size);
+ unmapped = iommu_unmap(vm->domain, bo->mm.start, bo->size);
drm_WARN_ON(obj->dev, unmapped != bo->size);
- mutex_lock(&rocket_priv->mm_lock);
+ mutex_lock(&vm->lock);
drm_mm_remove_node(&bo->mm);
- mutex_unlock(&rocket_priv->mm_lock);
+ mutex_unlock(&vm->lock);
- rocket_iommu_domain_put(bo->domain);
- bo->domain = NULL;
+ rocket_vm_put(vm);
+ bo->vm = NULL;
drm_gem_shmem_free(&bo->base);
}
@@ -64,6 +64,7 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
struct drm_gem_shmem_object *shmem_obj;
struct rocket_gem_object *rkt_obj;
struct drm_gem_object *gem_obj;
+ struct rocket_vm *vm;
struct sg_table *sgt;
int ret;
@@ -74,8 +75,8 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
gem_obj = &shmem_obj->base;
rkt_obj = to_rocket_bo(gem_obj);
- rkt_obj->driver_priv = rocket_priv;
- rkt_obj->domain = rocket_iommu_domain_get(rocket_priv);
+ vm = rocket_vm_get(rocket_priv);
+ rkt_obj->vm = vm;
rkt_obj->size = args->size;
rkt_obj->offset = 0;
@@ -90,13 +91,13 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
goto err;
}
- mutex_lock(&rocket_priv->mm_lock);
- ret = drm_mm_insert_node_generic(&rocket_priv->mm, &rkt_obj->mm,
+ mutex_lock(&vm->lock);
+ ret = drm_mm_insert_node_generic(&vm->mm, &rkt_obj->mm,
rkt_obj->size, PAGE_SIZE,
0, 0);
- mutex_unlock(&rocket_priv->mm_lock);
+ mutex_unlock(&vm->lock);
- ret = iommu_map_sgtable(rocket_priv->domain->domain,
+ ret = iommu_map_sgtable(vm->domain,
rkt_obj->mm.start,
shmem_obj->sgt,
IOMMU_READ | IOMMU_WRITE);
@@ -115,9 +116,9 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
return 0;
err_remove_node:
- mutex_lock(&rocket_priv->mm_lock);
+ mutex_lock(&vm->lock);
drm_mm_remove_node(&rkt_obj->mm);
- mutex_unlock(&rocket_priv->mm_lock);
+ mutex_unlock(&vm->lock);
err:
drm_gem_shmem_object_free(gem_obj);
diff --git a/drivers/accel/rocket/rocket_gem.h b/drivers/accel/rocket/rocket_gem.h
index 240430334..e1fbbd8cf 100644
--- a/drivers/accel/rocket/rocket_gem.h
+++ b/drivers/accel/rocket/rocket_gem.h
@@ -9,9 +9,7 @@
struct rocket_gem_object {
struct drm_gem_shmem_object base;
- struct rocket_file_priv *driver_priv;
-
- struct rocket_iommu_domain *domain;
+ struct rocket_vm *vm;
struct drm_mm_node mm;
size_t size;
u32 offset;
diff --git a/drivers/accel/rocket/rocket_job.c b/drivers/accel/rocket/rocket_job.c
index 2f1861f96..7695fca02 100644
--- a/drivers/accel/rocket/rocket_job.c
+++ b/drivers/accel/rocket/rocket_job.c
@@ -233,7 +233,7 @@ static void rocket_job_cleanup(struct kref *ref)
refcount);
unsigned int i;
- rocket_iommu_domain_put(job->domain);
+ rocket_vm_put(job->vm);
dma_fence_put(job->done_fence);
dma_fence_put(job->inference_done_fence);
@@ -314,7 +314,7 @@ static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
if (ret < 0)
return fence;
- ret = iommu_attach_group(job->domain->domain, core->iommu_group);
+ ret = iommu_attach_group(job->vm->domain, core->iommu_group);
if (ret < 0)
return fence;
@@ -573,7 +573,7 @@ static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file
rjob->out_bo_count = job->out_bo_handle_count;
- rjob->domain = rocket_iommu_domain_get(file_priv);
+ rjob->vm = rocket_vm_get(file_priv);
ret = rocket_job_push(rjob);
if (ret)
diff --git a/drivers/accel/rocket/rocket_job.h b/drivers/accel/rocket/rocket_job.h
index 4ae00feec..9373c3d02 100644
--- a/drivers/accel/rocket/rocket_job.h
+++ b/drivers/accel/rocket/rocket_job.h
@@ -36,7 +36,7 @@ struct rocket_job {
/* Fence to be signaled by IRQ handler when the job is complete. */
struct dma_fence *done_fence;
- struct rocket_iommu_domain *domain;
+ struct rocket_vm *vm;
struct kref refcount;
};
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Claude review: accel/rocket: Fix drm_mm UAF on close vs in-flight job
2026-04-26 10:37 [PATCH] accel/rocket: Fix drm_mm UAF on close vs in-flight job gyeyoung baek
@ 2026-04-28 5:18 ` Claude Code Review Bot
2026-04-28 5:18 ` Claude Code Review Bot
1 sibling, 0 replies; 3+ messages in thread
From: Claude Code Review Bot @ 2026-04-28 5:18 UTC (permalink / raw)
To: dri-devel-reviews
Patch Review
**Positive aspects:**
1. The commit message is excellent — it includes the KASAN splat, clearly explains the root cause and the fix approach, and has a proper `Fixes:` tag.
2. The lifetime model is now correct: `rocket_vm` is kref-managed, and both `rocket_gem_object` (via `bo->vm`) and `rocket_job` (via `job->vm`) hold references, so the `drm_mm` stays alive as long as any BO or job needs it.
3. The `rocket_vm_destroy()` correctly tears down in the right order — `drm_mm_takedown`, `mutex_destroy`, then `iommu_domain_free`:
```c
drm_mm_takedown(&vm->mm);
mutex_destroy(&vm->lock);
iommu_domain_free(vm->domain);
vm->domain = NULL;
kfree(vm);
```
4. Initializing `drm_mm` inside `rocket_vm_create()` from the IOMMU geometry aperture is cleaner than the previous approach in `rocket_open()`:
```c
start = vm->domain->geometry.aperture_start;
end = vm->domain->geometry.aperture_end;
drm_mm_init(&vm->mm, start, end - start + 1);
```
**Issues:**
1. **Bug (pre-existing, carried forward): Missing error check on `drm_mm_insert_node_generic`** — In `rocket_gem.c:94-98`, the return value from `drm_mm_insert_node_generic()` is assigned to `ret`, but it is immediately overwritten by the `iommu_map_sgtable()` call on line 100 without checking for failure:
```c
mutex_lock(&vm->lock);
ret = drm_mm_insert_node_generic(&vm->mm, &rkt_obj->mm,
rkt_obj->size, PAGE_SIZE,
0, 0);
mutex_unlock(&vm->lock);
ret = iommu_map_sgtable(vm->domain,
rkt_obj->mm.start,
shmem_obj->sgt,
IOMMU_READ | IOMMU_WRITE);
```
If `drm_mm_insert_node_generic()` fails, `rkt_obj->mm` is not allocated, yet `iommu_map_sgtable()` will use `rkt_obj->mm.start` (which is uninitialized/zero) and the `err_remove_node` path will try to remove a node that was never inserted. This is a pre-existing bug, but since this patch is reworking the function, it should add the missing check:
```c
if (ret)
goto err;
```
2. **Bug (pre-existing, carried forward): Missing `rocket_vm_put` in the `err` path of `rocket_ioctl_create_bo`** — At line 78-79, a `rocket_vm_get()` reference is taken and stored in `rkt_obj->vm`:
```c
vm = rocket_vm_get(rocket_priv);
rkt_obj->vm = vm;
```
If any subsequent step fails and we jump to `err:`, the code calls `drm_gem_shmem_object_free(gem_obj)` — but that calls `rocket_gem_bo_free()` only if the GEM object's free callback is wired up, and the BO destructor path already calls `rocket_vm_put()`. However, looking more carefully: after `drm_gem_handle_create()` + `drm_gem_object_put()` on line 83-84, the driver drops its reference. If `drm_gem_handle_create()` succeeded but a later step fails, the GEM handle still exists, and jumping to `err:` calls `drm_gem_shmem_object_free()` without first closing the handle — this leaks the GEM handle. This is also a pre-existing bug but worth fixing in this series.
3. **Minor: `void *err` type in `rocket_vm_create`** — The error return variable is declared as `void *err` rather than the more typical `struct rocket_vm *err` or just using `ERR_PTR` inline. This is carried forward from the original code but is slightly unusual — no functional impact.
4. **Nit: `vm->domain = NULL` before `kfree(vm)`** — In `rocket_vm_destroy()`, setting `vm->domain = NULL` just before `kfree(vm)` is pointless since the memory is about to be freed. This was also present in the original code.
**Summary**: The core fix is sound and addresses the reported UAF correctly. The `drm_mm_insert_node_generic` missing error check should be fixed — either in this patch or as a clearly-marked follow-up — since it can cause further UAFs and undefined behavior on the new `vm->mm`. The other issues are lower priority.
---
Generated by Claude Code Patch Reviewer
^ permalink raw reply [flat|nested] 3+ messages in thread* Claude review: accel/rocket: Fix drm_mm UAF on close vs in-flight job
2026-04-26 10:37 [PATCH] accel/rocket: Fix drm_mm UAF on close vs in-flight job gyeyoung baek
2026-04-28 5:18 ` Claude review: " Claude Code Review Bot
@ 2026-04-28 5:18 ` Claude Code Review Bot
1 sibling, 0 replies; 3+ messages in thread
From: Claude Code Review Bot @ 2026-04-28 5:18 UTC (permalink / raw)
To: dri-devel-reviews
Overall Series Review
Subject: accel/rocket: Fix drm_mm UAF on close vs in-flight job
Author: gyeyoung baek <gye976@gmail.com>
Patches: 1
Reviewed: 2026-04-28T15:18:19.348685
---
This is a single-patch fix for a real use-after-free bug in the `accel/rocket` NPU driver. The core insight is correct: `drm_mm` and its mutex were embedded in `rocket_file_priv`, which is freed in `rocket_postclose()` on fd close — but BO destructors running from the scheduler's `free_job` worker still need to call `drm_mm_remove_node()` against that same allocator, causing a UAF. Moving these into the kref-managed `rocket_vm` struct (which jobs and BOs hold references to) is the right fix.
The rename from `rocket_iommu_domain` to `rocket_vm` is clean and improves clarity. The mechanical changes are consistent across all six files.
However, there are two pre-existing bugs that this patch carries forward without fixing, and one of them interacts with the new code. These should be addressed either here or in follow-up patches.
**Verdict**: Correct fix for the stated UAF, but needs at least the `drm_mm_insert_node_generic` error-handling issue fixed before merging.
---
---
Generated by Claude Code Patch Reviewer
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-04-28 5:18 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-26 10:37 [PATCH] accel/rocket: Fix drm_mm UAF on close vs in-flight job gyeyoung baek
2026-04-28 5:18 ` Claude review: " Claude Code Review Bot
2026-04-28 5:18 ` Claude Code Review Bot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox