public inbox for drm-ai-reviews@public-inbox.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
@ 2026-03-24 16:31 Lizhi Hou
  2026-03-24 17:01 ` Mario Limonciello
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Lizhi Hou @ 2026-03-24 16:31 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan, Lizhi Hou

From: Max Zhen <max.zhen@amd.com>

Add support for querying per-process buffer object (BO) memory
usage through the amdxdna GET_ARRAY UAPI.

Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
struct amdxdna_drm_bo_usage to report BO memory usage statistics,
including heap, total, and internal usage.

Track BO memory usage on a per-client basis by maintaining counters
in GEM open/close and heap allocation/free paths. This ensures the
reported statistics reflect the current memory footprint of each
process.

Wire the new query into the GET_ARRAY implementation to expose
the usage information to userspace.

Signed-off-by: Max Zhen <max.zhen@amd.com>
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_pci.c        |   4 +
 drivers/accel/amdxdna/amdxdna_gem.c     | 134 ++++++++++++++++++++++--
 drivers/accel/amdxdna/amdxdna_gem.h     |   7 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.c |   6 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +
 include/uapi/drm/amdxdna_accel.h        |  35 +++++++
 6 files changed, 177 insertions(+), 13 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 9e39bfe75971..f1ac4e00bd9f 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
 	tmp->command_submissions = hwctx->priv->seq;
 	tmp->command_completions = hwctx->priv->completed;
 	tmp->pasid = hwctx->client->pasid;
+	tmp->heap_usage = hwctx->client->heap_usage;
 	tmp->priority = hwctx->qos.priority;
 	tmp->gops = hwctx->qos.gops;
 	tmp->fps = hwctx->qos.fps;
@@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client *client,
 	case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
 		ret = aie2_get_array_async_error(xdna->dev_handle, args);
 		break;
+	case DRM_AMDXDNA_BO_USAGE:
+		ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
+		break;
 	default:
 		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
 		ret = -EOPNOTSUPP;
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 27712704e42d..238ee244d4a6 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
 		goto unlock_out;
 	}
 
+	client->heap_usage += mem->size;
+
 	drm_gem_object_get(to_gobj(heap));
 
 unlock_out:
@@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
 static void
 amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
 {
+	struct amdxdna_client *client = abo->client;
 	struct amdxdna_gem_obj *heap;
 
-	mutex_lock(&abo->client->mm_lock);
+	mutex_lock(&client->mm_lock);
 
 	drm_mm_remove_node(&abo->mm_node);
-
-	heap = abo->client->dev_heap;
+	client->heap_usage -= abo->mem.size;
+	heap = client->dev_heap;
 	drm_gem_object_put(to_gobj(heap));
 
-	mutex_unlock(&abo->client->mm_lock);
+	mutex_unlock(&client->mm_lock);
 }
 
 static struct amdxdna_gem_obj *
@@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
 	abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
 	abo->mem.uva = AMDXDNA_INVALID_ADDR;
 	abo->mem.size = size;
+	abo->open_ref = 0;
+	abo->internal = false;
 	INIT_LIST_HEAD(&abo->mem.umap_list);
 
 	return abo;
@@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
 	kfree(abo);
 }
 
+static inline bool
+amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
+{
+	/* Do not count imported BOs since the buffer is not allocated by us. */
+	if (is_import_bo(abo))
+		return true;
+
+	/* Already counted as part of HEAP BO */
+	if (abo->type == AMDXDNA_BO_DEV)
+		return true;
+
+	return false;
+}
+
+static void
+amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
+{
+	struct amdxdna_client *client = abo->client;
+
+	if (amdxdna_gem_skip_bo_usage(abo))
+		return;
+
+	guard(mutex)(&client->mm_lock);
+
+	client->total_bo_usage += abo->mem.size;
+	if (abo->internal)
+		client->total_int_bo_usage += abo->mem.size;
+}
+
+static void
+amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
+{
+	struct amdxdna_client *client = abo->client;
+
+	if (amdxdna_gem_skip_bo_usage(abo))
+		return;
+
+	guard(mutex)(&client->mm_lock);
+
+	client->total_bo_usage -= abo->mem.size;
+	if (abo->internal)
+		client->total_int_bo_usage -= abo->mem.size;
+}
+
 static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
 {
 	struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
 	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
 
-	XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, amdxdna_gem_dev_addr(abo));
-
 	amdxdna_hmm_unregister(abo, NULL);
 	flush_workqueue(xdna->notifier_wq);
 
@@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
 	int ret;
 
 	guard(mutex)(&abo->lock);
+	abo->open_ref++;
 
-	if (!abo->client)
+	if (abo->open_ref == 1) {
+		/* Attached to the client when first opened by it. */
 		abo->client = filp->driver_priv;
+		amdxdna_gem_add_bo_usage(abo);
+	}
 	if (amdxdna_iova_on(xdna)) {
 		ret = amdxdna_iommu_map_bo(xdna, abo);
 		if (ret)
@@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
 	return 0;
 }
 
+static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *filp)
+{
+	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+
+	guard(mutex)(&abo->lock);
+	abo->open_ref--;
+
+	if (abo->open_ref == 0) {
+		amdxdna_gem_del_bo_usage(abo);
+		/* Detach from the client when last closed by it. */
+		abo->client = NULL;
+	}
+}
+
 static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
 {
 	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
@@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
 static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
 	.free = amdxdna_gem_obj_free,
 	.open = amdxdna_gem_obj_open,
+	.close = amdxdna_gem_obj_close,
 	.print_info = drm_gem_shmem_object_print_info,
 	.pin = drm_gem_shmem_object_pin,
 	.unpin = drm_gem_shmem_object_unpin,
@@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
 	if (IS_ERR(abo))
 		return ERR_CAST(abo);
 
-	if (args->type == AMDXDNA_BO_DEV_HEAP)
+	if (args->type == AMDXDNA_BO_DEV_HEAP) {
 		abo->type = AMDXDNA_BO_DEV_HEAP;
-	else
+		abo->internal = true;
+	} else {
 		abo->type = AMDXDNA_BO_SHARE;
+		abo->internal = args->type == AMDXDNA_BO_CMD;
+	}
 
 	return abo;
 }
@@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
 	gobj = to_gobj(abo);
 	gobj->funcs = &amdxdna_gem_dev_obj_funcs;
 	abo->type = AMDXDNA_BO_DEV;
+	abo->internal = true;
+	/*
+	 * DEV BOs cannot be alive when client is gone, it's OK to
+	 * always establish the connection.
+	 */
 	abo->client = client;
 
 	ret = amdxdna_gem_heap_alloc(abo);
@@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
 	if (IS_ERR(abo))
 		return PTR_ERR(abo);
 
-	/* ready to publish object to userspace */
+	/* Ready to publish object to userspace and count for BO usage. */
 	ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
 	if (ret) {
 		XDNA_ERR(xdna, "Create handle failed");
@@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
 	drm_gem_object_put(gobj);
 	return ret;
 }
+
+int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args)
+{
+	size_t min_sz = min(args->element_size, sizeof(struct amdxdna_drm_bo_usage));
+	char __user *buf = u64_to_user_ptr(args->buffer);
+	struct amdxdna_dev *xdna = to_xdna_dev(dev);
+	struct amdxdna_client *tmp_client;
+	struct amdxdna_drm_bo_usage tmp;
+
+	drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
+
+	if (args->num_element != 1)
+		return -EINVAL;
+
+	if (copy_from_user(&tmp, buf, min_sz))
+		return -EFAULT;
+
+	if (!tmp.pid)
+		return -EINVAL;
+
+	tmp.total_usage = 0;
+	tmp.internal_usage = 0;
+	tmp.heap_usage = 0;
+
+	list_for_each_entry(tmp_client, &xdna->client_list, node) {
+		if (tmp.pid != tmp_client->pid)
+			continue;
+
+		mutex_lock(&tmp_client->mm_lock);
+		tmp.total_usage += tmp_client->total_bo_usage;
+		tmp.internal_usage += tmp_client->total_int_bo_usage;
+		tmp.heap_usage += tmp_client->heap_usage;
+		mutex_unlock(&tmp_client->mm_lock);
+	}
+
+	if (copy_to_user(buf, &tmp, min_sz))
+		return -EFAULT;
+
+	return 0;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index a77d9344f8a4..4fc48a1189d2 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
 	struct amdxdna_client		*client;
 	u8				type;
 	bool				pinned;
-	struct mutex			lock; /* Protects: pinned, mem.kva */
+	struct mutex			lock; /* Protects: pinned, mem.kva, open_ref */
 	struct amdxdna_mem		mem;
+	int				open_ref;
 
 	/* Below members are initialized when needed */
 	struct drm_mm			mm; /* For AMDXDNA_BO_DEV_HEAP */
@@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
 	u32				assigned_hwctx;
 	struct dma_buf			*dma_buf;
 	struct dma_buf_attachment	*attach;
+
+	/* True, if BO is managed by XRT, not application */
+	bool				internal;
 };
 
 #define to_gobj(obj)    (&(obj)->base.base)
@@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
 int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args);
 
 #endif /* _AMDXDNA_GEM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index d83be00daf2b..b50a7d1f8a11 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
  * 0.5: Support getting telemetry data
  * 0.6: Support preemption
  * 0.7: Support getting power and utilization data
+ * 0.8: Support BO usage query
  */
 #define AMDXDNA_DRIVER_MAJOR		0
-#define AMDXDNA_DRIVER_MINOR		7
+#define AMDXDNA_DRIVER_MINOR		8
 
 /*
  * Bind the driver base on (vendor_id, device_id) pair and later use the
@@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
 	amdxdna_hwctx_remove_all(client);
 	xa_destroy(&client->hwctx_xa);
 	cleanup_srcu_struct(&client->hwctx_srcu);
-	mutex_destroy(&client->mm_lock);
 
 	if (client->dev_heap)
 		drm_gem_object_put(to_gobj(client->dev_heap));
 
+	mutex_destroy(&client->mm_lock);
+
 	if (!IS_ERR_OR_NULL(client->sva))
 		iommu_sva_unbind_device(client->sva);
 	mmdrop(client->mm);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index e91d14ae5190..0661749917d6 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -138,6 +138,10 @@ struct amdxdna_client {
 	struct iommu_sva		*sva;
 	int				pasid;
 	struct mm_struct		*mm;
+
+	size_t				heap_usage;
+	size_t				total_bo_usage;
+	size_t				total_int_bo_usage;
 };
 
 #define amdxdna_for_each_hwctx(client, hwctx_id, entry)		\
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index bddaaaf945cf..61d3686fa3b1 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -591,8 +591,37 @@ struct amdxdna_async_error {
 	__u64 ex_err_code;
 };
 
+/**
+ * struct amdxdna_drm_bo_usage - all types of BO usage
+ * BOs managed by XRT/SHIM/driver is counted as internal.
+ * Others are counted as external which are managed by applications.
+ *
+ * Among all types of BOs:
+ *   AMDXDNA_BO_DEV_HEAP - is counted for internal.
+ *   AMDXDNA_BO_SHARE    - is counted for external.
+ *   AMDXDNA_BO_CMD      - is counted for internal.
+ *   AMDXDNA_BO_DEV      - is counted by heap_usage only, not internal
+ *                         or external. It does not add to the total memory
+ *                         footprint since its mem comes from heap which is
+ *                         already counted as internal.
+ */
+struct amdxdna_drm_bo_usage {
+	/** @pid: The ID of the process to query from. */
+	__s64 pid;
+	/** @total_usage: Total BO size used by process. */
+	__u64 total_usage;
+	/** @internal_usage: Total internal BO size used by process. */
+	__u64 internal_usage;
+	/** @heap_usage: Total device BO size used by process. */
+	__u64 heap_usage;
+};
+
+/*
+ * Supported params in struct amdxdna_drm_get_array
+ */
 #define DRM_AMDXDNA_HW_CONTEXT_ALL	0
 #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR	2
+#define DRM_AMDXDNA_BO_USAGE		6
 
 /**
  * struct amdxdna_drm_get_array - Get information array.
@@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
 	 *
 	 * %DRM_AMDXDNA_HW_CONTEXT_ALL:
 	 * Returns all created hardware contexts.
+	 *
+	 * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
+	 * Returns last async error.
+	 *
+	 * %DRM_AMDXDNA_BO_USAGE:
+	 * Returns usage of heap/internal/external BOs.
 	 */
 	__u32 param;
 	/**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
  2026-03-24 16:31 [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support Lizhi Hou
@ 2026-03-24 17:01 ` Mario Limonciello
  2026-03-24 20:39 ` Claude review: " Claude Code Review Bot
  2026-03-24 20:39 ` Claude Code Review Bot
  2 siblings, 0 replies; 4+ messages in thread
From: Mario Limonciello @ 2026-03-24 17:01 UTC (permalink / raw)
  To: Lizhi Hou, ogabbay, quic_jhugo, dri-devel, maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan



On 3/24/26 11:31, Lizhi Hou wrote:
> From: Max Zhen <max.zhen@amd.com>
> 
> Add support for querying per-process buffer object (BO) memory
> usage through the amdxdna GET_ARRAY UAPI.
> 
> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
> including heap, total, and internal usage.
> 
> Track BO memory usage on a per-client basis by maintaining counters
> in GEM open/close and heap allocation/free paths. This ensures the
> reported statistics reflect the current memory footprint of each
> process.
> 
> Wire the new query into the GET_ARRAY implementation to expose
> the usage information to userspace.
> 
> Signed-off-by: Max Zhen <max.zhen@amd.com>
> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
I'm assuming you also have userspace side ready for this too right?
If you have a link handy can you please include it when committing.

Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>

> ---
>   drivers/accel/amdxdna/aie2_pci.c        |   4 +
>   drivers/accel/amdxdna/amdxdna_gem.c     | 134 ++++++++++++++++++++++--
>   drivers/accel/amdxdna/amdxdna_gem.h     |   7 +-
>   drivers/accel/amdxdna/amdxdna_pci_drv.c |   6 +-
>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +
>   include/uapi/drm/amdxdna_accel.h        |  35 +++++++
>   6 files changed, 177 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 9e39bfe75971..f1ac4e00bd9f 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
>   	tmp->command_submissions = hwctx->priv->seq;
>   	tmp->command_completions = hwctx->priv->completed;
>   	tmp->pasid = hwctx->client->pasid;
> +	tmp->heap_usage = hwctx->client->heap_usage;
>   	tmp->priority = hwctx->qos.priority;
>   	tmp->gops = hwctx->qos.gops;
>   	tmp->fps = hwctx->qos.fps;
> @@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client *client,
>   	case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>   		ret = aie2_get_array_async_error(xdna->dev_handle, args);
>   		break;
> +	case DRM_AMDXDNA_BO_USAGE:
> +		ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
> +		break;
>   	default:
>   		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
>   		ret = -EOPNOTSUPP;
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
> index 27712704e42d..238ee244d4a6 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.c
> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
> @@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>   		goto unlock_out;
>   	}
>   
> +	client->heap_usage += mem->size;
> +
>   	drm_gem_object_get(to_gobj(heap));
>   
>   unlock_out:
> @@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>   static void
>   amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
>   {
> +	struct amdxdna_client *client = abo->client;
>   	struct amdxdna_gem_obj *heap;
>   
> -	mutex_lock(&abo->client->mm_lock);
> +	mutex_lock(&client->mm_lock);
>   
>   	drm_mm_remove_node(&abo->mm_node);
> -
> -	heap = abo->client->dev_heap;
> +	client->heap_usage -= abo->mem.size;
> +	heap = client->dev_heap;
>   	drm_gem_object_put(to_gobj(heap));
>   
> -	mutex_unlock(&abo->client->mm_lock);
> +	mutex_unlock(&client->mm_lock);
>   }
>   
>   static struct amdxdna_gem_obj *
> @@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
>   	abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
>   	abo->mem.uva = AMDXDNA_INVALID_ADDR;
>   	abo->mem.size = size;
> +	abo->open_ref = 0;
> +	abo->internal = false;
>   	INIT_LIST_HEAD(&abo->mem.umap_list);
>   
>   	return abo;
> @@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
>   	kfree(abo);
>   }
>   
> +static inline bool
> +amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> +	/* Do not count imported BOs since the buffer is not allocated by us. */
> +	if (is_import_bo(abo))
> +		return true;
> +
> +	/* Already counted as part of HEAP BO */
> +	if (abo->type == AMDXDNA_BO_DEV)
> +		return true;
> +
> +	return false;
> +}
> +
> +static void
> +amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> +	struct amdxdna_client *client = abo->client;
> +
> +	if (amdxdna_gem_skip_bo_usage(abo))
> +		return;
> +
> +	guard(mutex)(&client->mm_lock);
> +
> +	client->total_bo_usage += abo->mem.size;
> +	if (abo->internal)
> +		client->total_int_bo_usage += abo->mem.size;
> +}
> +
> +static void
> +amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> +	struct amdxdna_client *client = abo->client;
> +
> +	if (amdxdna_gem_skip_bo_usage(abo))
> +		return;
> +
> +	guard(mutex)(&client->mm_lock);
> +
> +	client->total_bo_usage -= abo->mem.size;
> +	if (abo->internal)
> +		client->total_int_bo_usage -= abo->mem.size;
> +}
> +
>   static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
>   {
>   	struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
>   	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>   
> -	XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, amdxdna_gem_dev_addr(abo));
> -
>   	amdxdna_hmm_unregister(abo, NULL);
>   	flush_workqueue(xdna->notifier_wq);
>   
> @@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
>   	int ret;
>   
>   	guard(mutex)(&abo->lock);
> +	abo->open_ref++;
>   
> -	if (!abo->client)
> +	if (abo->open_ref == 1) {
> +		/* Attached to the client when first opened by it. */
>   		abo->client = filp->driver_priv;
> +		amdxdna_gem_add_bo_usage(abo);
> +	}
>   	if (amdxdna_iova_on(xdna)) {
>   		ret = amdxdna_iommu_map_bo(xdna, abo);
>   		if (ret)
> @@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
>   	return 0;
>   }
>   
> +static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *filp)
> +{
> +	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
> +
> +	guard(mutex)(&abo->lock);
> +	abo->open_ref--;
> +
> +	if (abo->open_ref == 0) {
> +		amdxdna_gem_del_bo_usage(abo);
> +		/* Detach from the client when last closed by it. */
> +		abo->client = NULL;
> +	}
> +}
> +
>   static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
>   {
>   	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> @@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
>   static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
>   	.free = amdxdna_gem_obj_free,
>   	.open = amdxdna_gem_obj_open,
> +	.close = amdxdna_gem_obj_close,
>   	.print_info = drm_gem_shmem_object_print_info,
>   	.pin = drm_gem_shmem_object_pin,
>   	.unpin = drm_gem_shmem_object_unpin,
> @@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
>   	if (IS_ERR(abo))
>   		return ERR_CAST(abo);
>   
> -	if (args->type == AMDXDNA_BO_DEV_HEAP)
> +	if (args->type == AMDXDNA_BO_DEV_HEAP) {
>   		abo->type = AMDXDNA_BO_DEV_HEAP;
> -	else
> +		abo->internal = true;
> +	} else {
>   		abo->type = AMDXDNA_BO_SHARE;
> +		abo->internal = args->type == AMDXDNA_BO_CMD;
> +	}
>   
>   	return abo;
>   }
> @@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
>   	gobj = to_gobj(abo);
>   	gobj->funcs = &amdxdna_gem_dev_obj_funcs;
>   	abo->type = AMDXDNA_BO_DEV;
> +	abo->internal = true;
> +	/*
> +	 * DEV BOs cannot be alive when client is gone, it's OK to
> +	 * always establish the connection.
> +	 */
>   	abo->client = client;
>   
>   	ret = amdxdna_gem_heap_alloc(abo);
> @@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
>   	if (IS_ERR(abo))
>   		return PTR_ERR(abo);
>   
> -	/* ready to publish object to userspace */
> +	/* Ready to publish object to userspace and count for BO usage. */
>   	ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
>   	if (ret) {
>   		XDNA_ERR(xdna, "Create handle failed");
> @@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
>   	drm_gem_object_put(gobj);
>   	return ret;
>   }
> +
> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args)
> +{
> +	size_t min_sz = min(args->element_size, sizeof(struct amdxdna_drm_bo_usage));
> +	char __user *buf = u64_to_user_ptr(args->buffer);
> +	struct amdxdna_dev *xdna = to_xdna_dev(dev);
> +	struct amdxdna_client *tmp_client;
> +	struct amdxdna_drm_bo_usage tmp;
> +
> +	drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
> +
> +	if (args->num_element != 1)
> +		return -EINVAL;
> +
> +	if (copy_from_user(&tmp, buf, min_sz))
> +		return -EFAULT;
> +
> +	if (!tmp.pid)
> +		return -EINVAL;
> +
> +	tmp.total_usage = 0;
> +	tmp.internal_usage = 0;
> +	tmp.heap_usage = 0;
> +
> +	list_for_each_entry(tmp_client, &xdna->client_list, node) {
> +		if (tmp.pid != tmp_client->pid)
> +			continue;
> +
> +		mutex_lock(&tmp_client->mm_lock);
> +		tmp.total_usage += tmp_client->total_bo_usage;
> +		tmp.internal_usage += tmp_client->total_int_bo_usage;
> +		tmp.heap_usage += tmp_client->heap_usage;
> +		mutex_unlock(&tmp_client->mm_lock);
> +	}
> +
> +	if (copy_to_user(buf, &tmp, min_sz))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
> index a77d9344f8a4..4fc48a1189d2 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.h
> +++ b/drivers/accel/amdxdna/amdxdna_gem.h
> @@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
>   	struct amdxdna_client		*client;
>   	u8				type;
>   	bool				pinned;
> -	struct mutex			lock; /* Protects: pinned, mem.kva */
> +	struct mutex			lock; /* Protects: pinned, mem.kva, open_ref */
>   	struct amdxdna_mem		mem;
> +	int				open_ref;
>   
>   	/* Below members are initialized when needed */
>   	struct drm_mm			mm; /* For AMDXDNA_BO_DEV_HEAP */
> @@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
>   	u32				assigned_hwctx;
>   	struct dma_buf			*dma_buf;
>   	struct dma_buf_attachment	*attach;
> +
> +	/* True, if BO is managed by XRT, not application */
> +	bool				internal;
>   };
>   
>   #define to_gobj(obj)    (&(obj)->base.base)
> @@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
>   int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>   int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>   int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args);
>   
>   #endif /* _AMDXDNA_GEM_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index d83be00daf2b..b50a7d1f8a11 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
>    * 0.5: Support getting telemetry data
>    * 0.6: Support preemption
>    * 0.7: Support getting power and utilization data
> + * 0.8: Support BO usage query
>    */
>   #define AMDXDNA_DRIVER_MAJOR		0
> -#define AMDXDNA_DRIVER_MINOR		7
> +#define AMDXDNA_DRIVER_MINOR		8
>   
>   /*
>    * Bind the driver base on (vendor_id, device_id) pair and later use the
> @@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
>   	amdxdna_hwctx_remove_all(client);
>   	xa_destroy(&client->hwctx_xa);
>   	cleanup_srcu_struct(&client->hwctx_srcu);
> -	mutex_destroy(&client->mm_lock);
>   
>   	if (client->dev_heap)
>   		drm_gem_object_put(to_gobj(client->dev_heap));
>   
> +	mutex_destroy(&client->mm_lock);
> +
>   	if (!IS_ERR_OR_NULL(client->sva))
>   		iommu_sva_unbind_device(client->sva);
>   	mmdrop(client->mm);
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index e91d14ae5190..0661749917d6 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -138,6 +138,10 @@ struct amdxdna_client {
>   	struct iommu_sva		*sva;
>   	int				pasid;
>   	struct mm_struct		*mm;
> +
> +	size_t				heap_usage;
> +	size_t				total_bo_usage;
> +	size_t				total_int_bo_usage;
>   };
>   
>   #define amdxdna_for_each_hwctx(client, hwctx_id, entry)		\
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index bddaaaf945cf..61d3686fa3b1 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -591,8 +591,37 @@ struct amdxdna_async_error {
>   	__u64 ex_err_code;
>   };
>   
> +/**
> + * struct amdxdna_drm_bo_usage - all types of BO usage
> + * BOs managed by XRT/SHIM/driver is counted as internal.
> + * Others are counted as external which are managed by applications.
> + *
> + * Among all types of BOs:
> + *   AMDXDNA_BO_DEV_HEAP - is counted for internal.
> + *   AMDXDNA_BO_SHARE    - is counted for external.
> + *   AMDXDNA_BO_CMD      - is counted for internal.
> + *   AMDXDNA_BO_DEV      - is counted by heap_usage only, not internal
> + *                         or external. It does not add to the total memory
> + *                         footprint since its mem comes from heap which is
> + *                         already counted as internal.
> + */
> +struct amdxdna_drm_bo_usage {
> +	/** @pid: The ID of the process to query from. */
> +	__s64 pid;
> +	/** @total_usage: Total BO size used by process. */
> +	__u64 total_usage;
> +	/** @internal_usage: Total internal BO size used by process. */
> +	__u64 internal_usage;
> +	/** @heap_usage: Total device BO size used by process. */
> +	__u64 heap_usage;
> +};
> +
> +/*
> + * Supported params in struct amdxdna_drm_get_array
> + */
>   #define DRM_AMDXDNA_HW_CONTEXT_ALL	0
>   #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR	2
> +#define DRM_AMDXDNA_BO_USAGE		6
>   
>   /**
>    * struct amdxdna_drm_get_array - Get information array.
> @@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
>   	 *
>   	 * %DRM_AMDXDNA_HW_CONTEXT_ALL:
>   	 * Returns all created hardware contexts.
> +	 *
> +	 * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
> +	 * Returns last async error.
> +	 *
> +	 * %DRM_AMDXDNA_BO_USAGE:
> +	 * Returns usage of heap/internal/external BOs.
>   	 */
>   	__u32 param;
>   	/**


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Claude review: accel/amdxdna: Add per-process BO memory usage query support
  2026-03-24 16:31 [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support Lizhi Hou
  2026-03-24 17:01 ` Mario Limonciello
@ 2026-03-24 20:39 ` Claude Code Review Bot
  2026-03-24 20:39 ` Claude Code Review Bot
  2 siblings, 0 replies; 4+ messages in thread
From: Claude Code Review Bot @ 2026-03-24 20:39 UTC (permalink / raw)
  To: dri-devel-reviews

Overall Series Review

Subject: accel/amdxdna: Add per-process BO memory usage query support
Author: Lizhi Hou <lizhi.hou@amd.com>
Patches: 2
Reviewed: 2026-03-25T06:39:41.810597

---

This is a single patch adding per-process BO memory usage query support to the amdxdna accelerator driver. The feature introduces a new `DRM_AMDXDNA_BO_USAGE` query type through the existing `GET_ARRAY` UAPI, tracks BO memory usage per-client via counters maintained in GEM open/close and heap alloc/free paths, and exposes the data to userspace.

The approach is reasonable overall, but there are several issues with the UAPI design, locking, and reference counting that need attention. The patch was also clearly developed against an older base — the current drm-next tree already has `ref` and `close` callbacks in the gem layer, so the patch renames `ref` to `open_ref` and introduces a `close` that already exists.

---
Generated by Claude Code Patch Reviewer

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Claude review: accel/amdxdna: Add per-process BO memory usage query support
  2026-03-24 16:31 [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support Lizhi Hou
  2026-03-24 17:01 ` Mario Limonciello
  2026-03-24 20:39 ` Claude review: " Claude Code Review Bot
@ 2026-03-24 20:39 ` Claude Code Review Bot
  2 siblings, 0 replies; 4+ messages in thread
From: Claude Code Review Bot @ 2026-03-24 20:39 UTC (permalink / raw)
  To: dri-devel-reviews

Patch Review

**1. UAPI: `pid` type is `__s64` — should be `__u32` or use proper pid_t semantics**

```c
struct amdxdna_drm_bo_usage {
	/** @pid: The ID of the process to query from. */
	__s64 pid;
```

Using `__s64` for a PID is unusual. The existing `amdxdna_drm_hwctx_entry` also uses `__s64` for pid, so there's precedent within this driver, but PIDs are typically positive integers that fit in 32 bits. More importantly, there's no validation that `pid` is a valid (positive) value — only that it's non-zero. A negative pid would pass the check and silently match nothing.

**2. Security concern: any process can query any other process's BO usage**

```c
	if (!tmp.pid)
		return -EINVAL;

	list_for_each_entry(tmp_client, &xdna->client_list, node) {
		if (tmp.pid != tmp_client->pid)
			continue;
```

The ioctl has no DRM_ROOT_ONLY flag (line 218 in the current tree shows `DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0)`), meaning any unprivileged process can query memory usage of any other process by PID. This leaks information about other users' memory usage. Consider restricting to the calling client's own PID unless the caller is privileged, or querying only the caller's own usage (removing the PID lookup entirely).

**3. `copy_from_user` with `min_sz` can read uninitialized stack data**

```c
	size_t min_sz = min(args->element_size, sizeof(struct amdxdna_drm_bo_usage));
	...
	if (copy_from_user(&tmp, buf, min_sz))
		return -EFAULT;
```

If `element_size < sizeof(struct amdxdna_drm_bo_usage)`, only a partial read occurs, but `tmp` is not zero-initialized. The `tmp.pid` check that follows may read uninitialized memory if `element_size` is smaller than `offsetof(amdxdna_drm_bo_usage, pid) + sizeof(pid)`. Use `= {}` or `memset` to zero-initialize `tmp` before the partial copy.

**4. Locking: `amdxdna_gem_add_bo_usage` / `amdxdna_gem_del_bo_usage` take `mm_lock`, but `open_ref` is protected by `abo->lock`**

```c
static int amdxdna_gem_obj_open(...)
{
	guard(mutex)(&abo->lock);
	abo->open_ref++;
	if (abo->open_ref == 1) {
		abo->client = filp->driver_priv;
		amdxdna_gem_add_bo_usage(abo);  // takes client->mm_lock inside
	}
```

The `abo->lock` is held while acquiring `client->mm_lock` inside `amdxdna_gem_add_bo_usage`. In `amdxdna_gem_heap_alloc`, `client->mm_lock` is held and `abo->client` is accessed. This creates a potential lock ordering dependency (abo->lock → mm_lock). Verify no code path takes these locks in the reverse order, or document the required ordering.

**5. Conflict with current tree: `open_ref` vs existing `ref`**

The current drm-next tree already has a `ref` field in `amdxdna_gem_obj` and already has both `open` and `close` callbacks. This patch renames `ref` to `open_ref` and introduces `close` as if it doesn't exist. The patch description and diffstat don't mention this as a rebase, which will cause conflicts. The patch needs to be rebased onto the current tree.

**6. `abo->client` is set/cleared based on `open_ref`, but `amdxdna_gem_obj_open` also does IOMMU mapping unconditionally**

```c
	if (abo->open_ref == 1) {
		abo->client = filp->driver_priv;
		amdxdna_gem_add_bo_usage(abo);
	}
	if (amdxdna_iova_on(xdna)) {
		ret = amdxdna_iommu_map_bo(xdna, abo);
		if (ret)
			return ret;
	}
```

The IOMMU mapping happens on every open (including `open_ref > 1`), but `client` is only assigned on the first open. If the IOMMU map fails on `open_ref > 1`, `open_ref` is already incremented but never decremented — the error path doesn't clean up. In the current tree this is handled correctly (early return when `ref > 0`), but the patch breaks this by removing that early return. The error path needs to decrement `open_ref` and potentially call `del_bo_usage` if it was the first open.

**7. DEV_HEAP is marked `internal = true` but is also counted via `heap_usage`**

```c
	if (args->type == AMDXDNA_BO_DEV_HEAP) {
		abo->type = AMDXDNA_BO_DEV_HEAP;
		abo->internal = true;
```

And in `amdxdna_gem_add_bo_usage`:
```c
	client->total_bo_usage += abo->mem.size;
	if (abo->internal)
		client->total_int_bo_usage += abo->mem.size;
```

So DEV_HEAP is counted in both `total_bo_usage` and `total_int_bo_usage`. But DEV BO heap allocations are also counted separately in `heap_usage`. The UAPI doc says DEV BOs don't add to the total footprint since their memory comes from the heap. This seems correct for DEV BOs (they're skipped), but the heap itself is counted in `total_bo_usage` AND individual heap sub-allocations are counted in `heap_usage`. The comment in the UAPI header is confusing — clarify whether `total_usage` includes the heap or not, and whether `heap_usage` is a subset of `total_usage` or separate.

**8. `mutex_destroy` reordering in cleanup**

```c
-	mutex_destroy(&client->mm_lock);
 	if (client->dev_heap)
 		drm_gem_object_put(to_gobj(client->dev_heap));
+	mutex_destroy(&client->mm_lock);
```

This reordering is correct — `drm_gem_object_put` on the dev_heap could trigger `amdxdna_gem_heap_free` which takes `mm_lock`, so the mutex must still be valid. Good fix but should be mentioned separately or in the commit message.

**9. `num_element` forced to 1 is not really an "array" query**

```c
	if (args->num_element != 1)
		return -EINVAL;
```

The `GET_ARRAY` UAPI is designed for returning arrays of elements. Forcing exactly 1 element is a bit of an API misuse. Consider whether this should instead be a `GET_INFO` query, which seems more appropriate for a single-value lookup.

**10. The `drm_WARN_ON` for lock assertion will fire in test scenarios**

```c
	drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
```

This is fine for documenting the expected locking context, but note it relies on the caller (aie2_get_array) always holding dev_lock, which is currently true via the ioctl path. However, the function signature takes `struct drm_device *` rather than `struct amdxdna_client *` (unlike other get_array handlers), which is inconsistent. The function is called from `aie2_get_array` which receives a `client`, and the dev_lock is held by the ioctl wrapper — passing `&xdna->ddev` directly couples the function to the aie2 backend's locking assumptions rather than the ioctl framework's guarantees.

---
Generated by Claude Code Patch Reviewer

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-03-24 20:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-24 16:31 [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support Lizhi Hou
2026-03-24 17:01 ` Mario Limonciello
2026-03-24 20:39 ` Claude review: " Claude Code Review Bot
2026-03-24 20:39 ` Claude Code Review Bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox