On 2/11/2026 11:01 AM, Sanjay Yadav wrote: > Add missing kernel-doc for GPU buddy allocator flags, > gpu_buddy_block, and gpu_buddy. The documentation covers block > header fields, allocator roots, free trees, and allocation flags > such as RANGE, TOPDOWN, CONTIGUOUS, CLEAR, and TRIM_DISABLE. > Private members are marked with kernel-doc private markers > and documented with regular comments. > > No functional changes. > > v2: > - Corrected GPU_BUDDY_CLEAR_TREE and GPU_BUDDY_DIRTY_TREE index values (Arun) > - Rebased after DRM buddy allocator moved to drivers/gpu/ > - Updated commit message > > Cc: Christian König > Cc: Arunpravin Paneer Selvam > Suggested-by: Matthew Auld > Signed-off-by: Sanjay Yadav > --- > include/linux/gpu_buddy.h | 122 +++++++++++++++++++++++++++++++------- > 1 file changed, 102 insertions(+), 20 deletions(-) > > diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h > index 07ac65db6d2e..b06241c78437 100644 > --- a/include/linux/gpu_buddy.h > +++ b/include/linux/gpu_buddy.h > @@ -12,11 +12,58 @@ > #include > #include > > +/** > + * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range > + * > + * When set, allocation is restricted to the range [start, end) specified > + * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored > + * and allocation can use any free space. > + */ > #define GPU_BUDDY_RANGE_ALLOCATION BIT(0) > + > +/** > + * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space > + * > + * Allocate starting from high addresses and working down. Useful for > + * separating different allocation types (e.g., kernel vs userspace) > + * to reduce fragmentation. > + */ > #define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1) > + > +/** > + * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks > + * > + * The allocation must be satisfied with a single contiguous block. > + * If the requested size cannot be allocated contiguously, the > + * allocation fails with -ENOSPC. > + */ > #define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) > + > +/** > + * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory > + * > + * Attempt to allocate from the clear tree first. If insufficient clear > + * memory is available, falls back to dirty memory. Useful when the > + * caller needs zeroed memory and wants to avoid GPU clear operations. > + */ > #define GPU_BUDDY_CLEAR_ALLOCATION BIT(3) > + > +/** > + * GPU_BUDDY_CLEARED - Mark returned blocks as cleared > + * > + * Used with gpu_buddy_free_list() to indicate that the memory being > + * freed has been cleared (zeroed). The blocks will be placed in the > + * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests. > + */ > #define GPU_BUDDY_CLEARED BIT(4) > + > +/** > + * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming > + * > + * By default, if an allocation is smaller than the allocated block, > + * excess memory is trimmed and returned to the free pool. This flag > + * disables trimming, keeping the full power-of-two block size. > + */ > #define GPU_BUDDY_TRIM_DISABLE BIT(5) > > enum gpu_buddy_free_tree { > @@ -28,7 +75,27 @@ enum gpu_buddy_free_tree { > #define for_each_free_tree(tree) \ > for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++) > > +/** > + * struct gpu_buddy_block - Block within a buddy allocator > + * > + * Each block in the buddy allocator is represented by this structure. > + * Blocks are organized in a binary tree where each parent block can be > + * split into two children (left and right buddies). The allocator manages > + * blocks at various orders (power-of-2 sizes) from chunk_size up to the > + * largest contiguous region. > + * > + * @private: Private data owned by the allocator user (e.g., driver-specific data) > + * @link: List node for user ownership while block is allocated > + */ > struct gpu_buddy_block { > +/* private: */ > + /* > + * Header bit layout: > + * - Bits 63:12: block offset within the address space > + * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT) > + * - Bit 9: clear bit (1 if memory is zeroed) I think we should add - Bits 8:9: reserved Apart from that, looks good to me. Reviewed-by: Arunpravin Paneer Selvam > + * - Bits 5:0: order (log2 of size relative to chunk_size) > + */ > #define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) > #define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) > #define GPU_BUDDY_ALLOCATED (1 << 10) > @@ -43,7 +110,7 @@ struct gpu_buddy_block { > struct gpu_buddy_block *left; > struct gpu_buddy_block *right; > struct gpu_buddy_block *parent; > - > +/* public: */ > void *private; /* owned by creator */ > > /* > @@ -53,43 +120,58 @@ struct gpu_buddy_block { > * gpu_buddy_free* ownership is given back to the mm. > */ > union { > +/* private: */ > struct rb_node rb; > +/* public: */ > struct list_head link; > }; > - > +/* private: */ > struct list_head tmp_link; > }; > > /* Order-zero must be at least SZ_4K */ > #define GPU_BUDDY_MAX_ORDER (63 - 12) > > -/* > - * Binary Buddy System. > +/** > + * struct gpu_buddy - GPU binary buddy allocator > + * > + * The buddy allocator provides efficient power-of-two memory allocation > + * with fast allocation and free operations. It is commonly used for GPU > + * memory management where allocations can be split into power-of-two > + * block sizes. > * > - * Locking should be handled by the user, a simple mutex around > - * gpu_buddy_alloc* and gpu_buddy_free* should suffice. > + * Locking should be handled by the user; a simple mutex around > + * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list() > + * should suffice. > + * > + * @n_roots: Number of root blocks in the roots array. > + * @max_order: Maximum block order (log2 of largest block size / chunk_size). > + * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K. > + * @size: Total size of the address space managed by this allocator in bytes. > + * @avail: Total free space currently available for allocation in bytes. > + * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes. > + * This is a subset of @avail. > */ > struct gpu_buddy { > - /* Maintain a free list for each order. */ > - struct rb_root **free_trees; > - > +/* private: */ > /* > - * Maintain explicit binary tree(s) to track the allocation of the > - * address space. This gives us a simple way of finding a buddy block > - * and performing the potentially recursive merge step when freeing a > - * block. Nodes are either allocated or free, in which case they will > - * also exist on the respective free list. > + * Array of red-black trees for free block management. > + * Indexed as free_trees[clear/dirty][order] where: > + * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content > + * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content > + * Each tree holds free blocks of the corresponding order. > */ > - struct gpu_buddy_block **roots; > - > + struct rb_root **free_trees; > /* > - * Anything from here is public, and remains static for the lifetime of > - * the mm. Everything above is considered do-not-touch. > + * Array of root blocks representing the top-level blocks of the > + * binary tree(s). Multiple roots exist when the total size is not > + * a power of two, with each root being the largest power-of-two > + * that fits in the remaining space. > */ > + struct gpu_buddy_block **roots; > +/* public: */ > unsigned int n_roots; > unsigned int max_order; > - > - /* Must be at least SZ_4K */ > u64 chunk_size; > u64 size; > u64 avail;