public inbox for drm-ai-reviews@public-inbox.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH] dma-buf: heaps: Add Coherent heap to dmabuf heaps
@ 2026-02-24  7:57 Albert Esteve
  2026-02-26 10:12 ` Maxime Ripard
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Albert Esteve @ 2026-02-24  7:57 UTC (permalink / raw)
  To: Sumit Semwal, Benjamin Gaignard, Brian Starkey, John Stultz,
	T.J. Mercier, Christian König, Marek Szyprowski,
	Robin Murphy
  Cc: linux-kernel, linux-media, dri-devel, linaro-mm-sig, iommu,
	echanude, mripard, Albert Esteve

Add a dma-buf heap for DT coherent reserved-memory
(i.e., 'shared-dma-pool' without 'reusable' property),
exposing one heap per region for userspace buffers.

The heap binds a synthetic platform device to each region
so coherent allocations use the correct dev->dma_mem,
and it defers registration until late_initcall when
normal allocator are available.

This patch includes charging of the coherent heap
allocator to the dmem cgroup.

Signed-off-by: Albert Esteve <aesteve@redhat.com>
---
This patch introduces a new driver to expose DT coherent reserved-memory
regions as dma-buf heaps, allowing userspace buffers to be created.

Since these regions are device-dependent, we bind a synthetic platform
device to each region so coherent allocations use the correct dev->dma_mem.

Following Eric’s [1] and Maxime’s [2] work on charging DMA buffers
allocated from userspace to cgroups (dmem), this patch adds the same
charging pattern used by CMA heaps patch. Charging is done only through
the dma-buf heap interface so it can be attributed to a userspace allocator.

This allows each device-specific reserved-memory region to enforce its
own limits.

[1] https://lore.kernel.org/all/20260218-dmabuf-heap-cma-dmem-v2-0-b249886fb7b2@redhat.com/
[2] https://lore.kernel.org/all/20250310-dmem-cgroups-v1-0-2984c1bc9312@kernel.org/
---
 drivers/dma-buf/heaps/Kconfig         |  17 ++
 drivers/dma-buf/heaps/Makefile        |   1 +
 drivers/dma-buf/heaps/coherent_heap.c | 485 ++++++++++++++++++++++++++++++++++
 include/linux/dma-heap.h              |  11 +
 kernel/dma/coherent.c                 |   9 +
 5 files changed, 523 insertions(+)

diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig
index a5eef06c42264..93765dca164e3 100644
--- a/drivers/dma-buf/heaps/Kconfig
+++ b/drivers/dma-buf/heaps/Kconfig
@@ -12,3 +12,20 @@ config DMABUF_HEAPS_CMA
 	  Choose this option to enable dma-buf CMA heap. This heap is backed
 	  by the Contiguous Memory Allocator (CMA). If your system has these
 	  regions, you should say Y here.
+
+config DMABUF_HEAPS_COHERENT
+	bool "DMA-BUF Coherent Reserved-Memory Heap"
+	depends on DMABUF_HEAPS && OF_RESERVED_MEM && DMA_DECLARE_COHERENT
+	help
+	  Choose this option to enable coherent reserved-memory dma-buf heaps.
+	  This heap is backed by non-reusable DT "shared-dma-pool" regions.
+	  If your system defines coherent reserved-memory regions, you should
+	  say Y here.
+
+config COHERENT_AREAS_DEFERRED
+	int "Max deferred coherent reserved-memory regions"
+	depends on DMABUF_HEAPS_COHERENT
+	default 16
+	help
+	  Maximum number of coherent reserved-memory regions that can be
+	  deferred for later registration during early boot.
diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile
index 974467791032f..96bda7a65f041 100644
--- a/drivers/dma-buf/heaps/Makefile
+++ b/drivers/dma-buf/heaps/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DMABUF_HEAPS_SYSTEM)	+= system_heap.o
 obj-$(CONFIG_DMABUF_HEAPS_CMA)		+= cma_heap.o
+obj-$(CONFIG_DMABUF_HEAPS_COHERENT)	+= coherent_heap.o
diff --git a/drivers/dma-buf/heaps/coherent_heap.c b/drivers/dma-buf/heaps/coherent_heap.c
new file mode 100644
index 0000000000000..870b2b89aefcb
--- /dev/null
+++ b/drivers/dma-buf/heaps/coherent_heap.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DMABUF heap for coherent reserved-memory regions
+ *
+ * Copyright (C) 2026 Red Hat, Inc.
+ * Author: Albert Esteve <aesteve@redhat.com>
+ *
+ */
+
+#include <linux/cgroup_dmem.h>
+#include <linux/dma-heap.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/iosys-map.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#define DEFERRED_AREAS_MAX CONFIG_COHERENT_AREAS_DEFERRED
+
+/*
+ * Early init can't use normal memory management yet (memblock is used
+ * instead), so keep a small deferred list and retry at late_initcall.
+ */
+static struct reserved_mem *rmem_areas_deferred[DEFERRED_AREAS_MAX];
+static unsigned int rmem_areas_deferred_num;
+
+static int coherent_heap_add_deferred(struct reserved_mem *rmem)
+{
+	if (rmem_areas_deferred_num >= DEFERRED_AREAS_MAX) {
+		pr_warn("Deferred heap areas list full, dropping %s\n",
+			rmem->name ? rmem->name : "unknown");
+		return -EINVAL;
+	}
+	rmem_areas_deferred[rmem_areas_deferred_num++] = rmem;
+	return 0;
+}
+
+struct coherent_heap {
+	struct dma_heap *heap;
+	struct reserved_mem *rmem;
+	char *name;
+	struct device *dev;
+	struct platform_device *pdev;
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+	struct dmem_cgroup_region *cg;
+#endif
+};
+
+struct coherent_heap_buffer {
+	struct coherent_heap *heap;
+	struct list_head attachments;
+	struct mutex lock;
+	unsigned long len;
+	dma_addr_t dma_addr;
+	void *alloc_vaddr;
+	struct page **pages;
+	pgoff_t pagecount;
+	int vmap_cnt;
+	void *vaddr;
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+	struct dmem_cgroup_pool_state *pool;
+#endif
+};
+
+struct dma_heap_attachment {
+	struct device *dev;
+	struct sg_table table;
+	struct list_head list;
+	bool mapped;
+};
+
+static int coherent_heap_attach(struct dma_buf *dmabuf,
+				struct dma_buf_attachment *attachment)
+{
+	struct coherent_heap_buffer *buffer = dmabuf->priv;
+	struct dma_heap_attachment *a;
+	int ret;
+
+	a = kzalloc_obj(*a);
+	if (!a)
+		return -ENOMEM;
+
+	ret = sg_alloc_table_from_pages(&a->table, buffer->pages,
+					buffer->pagecount, 0,
+					buffer->pagecount << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (ret) {
+		kfree(a);
+		return ret;
+	}
+
+	a->dev = attachment->dev;
+	INIT_LIST_HEAD(&a->list);
+	a->mapped = false;
+
+	attachment->priv = a;
+
+	mutex_lock(&buffer->lock);
+	list_add(&a->list, &buffer->attachments);
+	mutex_unlock(&buffer->lock);
+
+	return 0;
+}
+
+static void coherent_heap_detach(struct dma_buf *dmabuf,
+				 struct dma_buf_attachment *attachment)
+{
+	struct coherent_heap_buffer *buffer = dmabuf->priv;
+	struct dma_heap_attachment *a = attachment->priv;
+
+	mutex_lock(&buffer->lock);
+	list_del(&a->list);
+	mutex_unlock(&buffer->lock);
+
+	sg_free_table(&a->table);
+	kfree(a);
+}
+
+static struct sg_table *coherent_heap_map_dma_buf(struct dma_buf_attachment *attachment,
+						  enum dma_data_direction direction)
+{
+	struct dma_heap_attachment *a = attachment->priv;
+	struct sg_table *table = &a->table;
+	int ret;
+
+	ret = dma_map_sgtable(attachment->dev, table, direction, 0);
+	if (ret)
+		return ERR_PTR(-ENOMEM);
+	a->mapped = true;
+
+	return table;
+}
+
+static void coherent_heap_unmap_dma_buf(struct dma_buf_attachment *attachment,
+					struct sg_table *table,
+					enum dma_data_direction direction)
+{
+	struct dma_heap_attachment *a = attachment->priv;
+
+	a->mapped = false;
+	dma_unmap_sgtable(attachment->dev, table, direction, 0);
+}
+
+static int coherent_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+						  enum dma_data_direction direction)
+{
+	struct coherent_heap_buffer *buffer = dmabuf->priv;
+	struct dma_heap_attachment *a;
+
+	mutex_lock(&buffer->lock);
+	if (buffer->vmap_cnt)
+		invalidate_kernel_vmap_range(buffer->vaddr, buffer->len);
+
+	list_for_each_entry(a, &buffer->attachments, list) {
+		if (!a->mapped)
+			continue;
+		dma_sync_sgtable_for_cpu(a->dev, &a->table, direction);
+	}
+	mutex_unlock(&buffer->lock);
+
+	return 0;
+}
+
+static int coherent_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+						enum dma_data_direction direction)
+{
+	struct coherent_heap_buffer *buffer = dmabuf->priv;
+	struct dma_heap_attachment *a;
+
+	mutex_lock(&buffer->lock);
+	if (buffer->vmap_cnt)
+		flush_kernel_vmap_range(buffer->vaddr, buffer->len);
+
+	list_for_each_entry(a, &buffer->attachments, list) {
+		if (!a->mapped)
+			continue;
+		dma_sync_sgtable_for_device(a->dev, &a->table, direction);
+	}
+	mutex_unlock(&buffer->lock);
+
+	return 0;
+}
+
+static int coherent_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct coherent_heap_buffer *buffer = dmabuf->priv;
+	struct coherent_heap *coh_heap = buffer->heap;
+
+	return dma_mmap_coherent(coh_heap->dev, vma, buffer->alloc_vaddr,
+				 buffer->dma_addr, buffer->len);
+}
+
+static void *coherent_heap_do_vmap(struct coherent_heap_buffer *buffer)
+{
+	void *vaddr;
+
+	vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, PAGE_KERNEL);
+	if (!vaddr)
+		return ERR_PTR(-ENOMEM);
+
+	return vaddr;
+}
+
+static int coherent_heap_vmap(struct dma_buf *dmabuf, struct iosys_map *map)
+{
+	struct coherent_heap_buffer *buffer = dmabuf->priv;
+	void *vaddr;
+	int ret = 0;
+
+	mutex_lock(&buffer->lock);
+	if (buffer->vmap_cnt) {
+		buffer->vmap_cnt++;
+		iosys_map_set_vaddr(map, buffer->vaddr);
+		goto out;
+	}
+
+	vaddr = coherent_heap_do_vmap(buffer);
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		goto out;
+	}
+
+	buffer->vaddr = vaddr;
+	buffer->vmap_cnt++;
+	iosys_map_set_vaddr(map, buffer->vaddr);
+out:
+	mutex_unlock(&buffer->lock);
+
+	return ret;
+}
+
+static void coherent_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map)
+{
+	struct coherent_heap_buffer *buffer = dmabuf->priv;
+
+	mutex_lock(&buffer->lock);
+	if (!--buffer->vmap_cnt) {
+		vunmap(buffer->vaddr);
+		buffer->vaddr = NULL;
+	}
+	mutex_unlock(&buffer->lock);
+	iosys_map_clear(map);
+}
+
+static void coherent_heap_dma_buf_release(struct dma_buf *dmabuf)
+{
+	struct coherent_heap_buffer *buffer = dmabuf->priv;
+	struct coherent_heap *coh_heap = buffer->heap;
+
+	if (buffer->vmap_cnt > 0) {
+		WARN(1, "%s: buffer still mapped in the kernel\n", __func__);
+		vunmap(buffer->vaddr);
+		buffer->vaddr = NULL;
+		buffer->vmap_cnt = 0;
+	}
+
+	if (buffer->alloc_vaddr)
+		dma_free_coherent(coh_heap->dev, buffer->len, buffer->alloc_vaddr,
+			       buffer->dma_addr);
+	kfree(buffer->pages);
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+	dmem_cgroup_uncharge(buffer->pool, buffer->len);
+#endif
+	kfree(buffer);
+}
+
+static const struct dma_buf_ops coherent_heap_buf_ops = {
+	.attach = coherent_heap_attach,
+	.detach = coherent_heap_detach,
+	.map_dma_buf = coherent_heap_map_dma_buf,
+	.unmap_dma_buf = coherent_heap_unmap_dma_buf,
+	.begin_cpu_access = coherent_heap_dma_buf_begin_cpu_access,
+	.end_cpu_access = coherent_heap_dma_buf_end_cpu_access,
+	.mmap = coherent_heap_mmap,
+	.vmap = coherent_heap_vmap,
+	.vunmap = coherent_heap_vunmap,
+	.release = coherent_heap_dma_buf_release,
+};
+
+static struct dma_buf *coherent_heap_allocate(struct dma_heap *heap,
+					      unsigned long len,
+					      u32 fd_flags,
+					      u64 heap_flags)
+{
+	struct coherent_heap *coh_heap;
+	struct coherent_heap_buffer *buffer;
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	size_t size = PAGE_ALIGN(len);
+	pgoff_t pagecount = size >> PAGE_SHIFT;
+	struct dma_buf *dmabuf;
+	int ret = -ENOMEM;
+	pgoff_t pg;
+
+	coh_heap = dma_heap_get_drvdata(heap);
+	if (!coh_heap)
+		return ERR_PTR(-EINVAL);
+	if (!coh_heap->dev)
+		return ERR_PTR(-ENODEV);
+
+	buffer = kzalloc_obj(*buffer);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&buffer->attachments);
+	mutex_init(&buffer->lock);
+	buffer->len = size;
+	buffer->heap = coh_heap;
+	buffer->pagecount = pagecount;
+
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+	if (mem_accounting) {
+		ret = dmem_cgroup_try_charge(coh_heap->cg, size,
+					     &buffer->pool, NULL);
+		if (ret)
+			goto free_buffer;
+	}
+#endif
+
+	buffer->alloc_vaddr = dma_alloc_coherent(coh_heap->dev, buffer->len,
+						 &buffer->dma_addr, GFP_KERNEL);
+	if (!buffer->alloc_vaddr) {
+		ret = -ENOMEM;
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+		goto uncharge_cgroup;
+#else
+		goto free_buffer;
+#endif
+	}
+
+	buffer->pages = kmalloc_array(pagecount, sizeof(*buffer->pages),
+				      GFP_KERNEL);
+	if (!buffer->pages) {
+		ret = -ENOMEM;
+		goto free_dma;
+	}
+
+	for (pg = 0; pg < pagecount; pg++)
+		buffer->pages[pg] = virt_to_page((char *)buffer->alloc_vaddr +
+						 (pg * PAGE_SIZE));
+
+	/* create the dmabuf */
+	exp_info.exp_name = dma_heap_get_name(heap);
+	exp_info.ops = &coherent_heap_buf_ops;
+	exp_info.size = buffer->len;
+	exp_info.flags = fd_flags;
+	exp_info.priv = buffer;
+	dmabuf = dma_buf_export(&exp_info);
+	if (IS_ERR(dmabuf)) {
+		ret = PTR_ERR(dmabuf);
+		goto free_pages;
+	}
+	return dmabuf;
+
+free_pages:
+	kfree(buffer->pages);
+free_dma:
+	dma_free_coherent(coh_heap->dev, buffer->len, buffer->alloc_vaddr,
+			  buffer->dma_addr);
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+uncharge_cgroup:
+	dmem_cgroup_uncharge(buffer->pool, size);
+#endif
+free_buffer:
+	kfree(buffer);
+	return ERR_PTR(ret);
+}
+
+static const struct dma_heap_ops coherent_heap_ops = {
+	.allocate = coherent_heap_allocate,
+};
+
+static int __coherent_heap_register(struct reserved_mem *rmem)
+{
+	struct dma_heap_export_info exp_info;
+	struct coherent_heap *coh_heap;
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+	struct dmem_cgroup_region *region;
+#endif
+	const char *rmem_name;
+	int ret;
+
+	if (!rmem)
+		return -EINVAL;
+
+	rmem_name = rmem->name ? rmem->name : "unknown";
+
+	coh_heap = kzalloc_obj(*coh_heap);
+	if (!coh_heap)
+		return -ENOMEM;
+
+	coh_heap->name = kasprintf(GFP_KERNEL, "coherent_%s", rmem_name);
+	if (!coh_heap->name) {
+		ret = -ENOMEM;
+		goto free_coherent_heap;
+	}
+
+	coh_heap->rmem = rmem;
+
+	/* create a platform device per rmem and bind it */
+	coh_heap->pdev = platform_device_register_simple("coherent-heap",
+							 PLATFORM_DEVID_AUTO,
+							 NULL, 0);
+	if (IS_ERR(coh_heap->pdev)) {
+		ret = PTR_ERR(coh_heap->pdev);
+		goto free_name;
+	}
+
+	if (rmem->ops && rmem->ops->device_init) {
+		ret = rmem->ops->device_init(rmem, &coh_heap->pdev->dev);
+		if (ret)
+			goto pdev_unregister;
+	}
+
+	coh_heap->dev = &coh_heap->pdev->dev;
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+	region = dmem_cgroup_register_region(rmem->size, "coh/%s", rmem_name);
+	if (IS_ERR(region)) {
+		ret = PTR_ERR(region);
+		goto pdev_unregister;
+	}
+	coh_heap->cg = region;
+#endif
+
+	exp_info.name = coh_heap->name;
+	exp_info.ops = &coherent_heap_ops;
+	exp_info.priv = coh_heap;
+
+	coh_heap->heap = dma_heap_add(&exp_info);
+	if (IS_ERR(coh_heap->heap)) {
+		ret = PTR_ERR(coh_heap->heap);
+		goto cg_unregister;
+	}
+
+	return 0;
+
+cg_unregister:
+#if IS_ENABLED(CONFIG_CGROUP_DMEM)
+	dmem_cgroup_unregister_region(coh_heap->cg);
+#endif
+pdev_unregister:
+	platform_device_unregister(coh_heap->pdev);
+	coh_heap->pdev = NULL;
+free_name:
+	kfree(coh_heap->name);
+free_coherent_heap:
+	kfree(coh_heap);
+
+	return ret;
+}
+
+int dma_heap_coherent_register(struct reserved_mem *rmem)
+{
+	int ret;
+
+	ret = __coherent_heap_register(rmem);
+	if (ret == -ENOMEM)
+		return coherent_heap_add_deferred(rmem);
+	return ret;
+}
+
+static int __init coherent_heap_register_deferred(void)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < rmem_areas_deferred_num; i++) {
+		struct reserved_mem *rmem = rmem_areas_deferred[i];
+
+		ret = __coherent_heap_register(rmem);
+		if (ret) {
+			pr_warn("Failed to add coherent heap %s",
+				rmem->name ? rmem->name : "unknown");
+			continue;
+		}
+	}
+
+	return 0;
+}
+late_initcall(coherent_heap_register_deferred);
+MODULE_DESCRIPTION("DMA-BUF heap for coherent reserved-memory regions");
diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h
index 648328a64b27e..e894cfa1ecf1a 100644
--- a/include/linux/dma-heap.h
+++ b/include/linux/dma-heap.h
@@ -9,9 +9,11 @@
 #ifndef _DMA_HEAPS_H
 #define _DMA_HEAPS_H
 
+#include <linux/errno.h>
 #include <linux/types.h>
 
 struct dma_heap;
+struct reserved_mem;
 
 /**
  * struct dma_heap_ops - ops to operate on a given heap
@@ -48,4 +50,13 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info);
 
 extern bool mem_accounting;
 
+#if IS_ENABLED(CONFIG_DMABUF_HEAPS_COHERENT)
+int dma_heap_coherent_register(struct reserved_mem *rmem);
+#else
+static inline int dma_heap_coherent_register(struct reserved_mem *rmem)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 #endif /* _DMA_HEAPS_H */
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 1147497bc512c..f49d13e460e4b 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/dma-direct.h>
 #include <linux/dma-map-ops.h>
+#include <linux/dma-heap.h>
 
 struct dma_coherent_mem {
 	void		*virt_base;
@@ -393,6 +394,14 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
 	rmem->ops = &rmem_dma_ops;
 	pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
 		&rmem->base, (unsigned long)rmem->size / SZ_1M);
+
+	if (IS_ENABLED(CONFIG_DMABUF_HEAPS_COHERENT)) {
+		int ret = dma_heap_coherent_register(rmem);
+
+		if (ret)
+			pr_warn("Reserved memory: failed to register coherent heap for %s (%d)\n",
+				rmem->name ? rmem->name : "unknown", ret);
+	}
 	return 0;
 }
 

---
base-commit: 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f
change-id: 20260223-b4-dmabuf-heap-coherent-rmem-91fd3926afe9

Best regards,
-- 
Albert Esteve <aesteve@redhat.com>


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-02-27  5:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-24  7:57 [PATCH] dma-buf: heaps: Add Coherent heap to dmabuf heaps Albert Esteve
2026-02-26 10:12 ` Maxime Ripard
2026-02-26 15:22 ` Robin Murphy
2026-02-27  5:38 ` Claude review: " Claude Code Review Bot
2026-02-27  5:38 ` Claude Code Review Bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox