From 0bf37f45d5c472aebdf32da64775cac1110c085c Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Mon, 15 Sep 2025 12:34:37 +0200 Subject: accel/ivpu: Add support for user-managed preemption buffer Allow user mode drivers to manage preemption buffers, enabling memory savings by sharing a single buffer across multiple command queues within the same memory context. Introduce DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE to report the required preemption buffer size as specified by the firmware. The preemption buffer is now passed from user space as an entry in the BO list of DRM_IVPU_CMDQ_SUBMIT. The buffer must be non-mappable and large enough to hold preemption data. For backward compatibility, the kernel will allocate an internal preemption buffer if user space does not provide one. User space can only provide a single preemption buffer, simplifying the ioctl interface and parameter validation. A separate secondary preemption buffer is only needed to save below 4GB address space on 37xx and only if preemption buffers are not shared. Signed-off-by: Andrzej Kacprowski Reviewed-by: Lizhi Hou Signed-off-by: Karol Wachowski Link: https://lore.kernel.org/r/20250915103437.830086-1-karol.wachowski@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 160ee1411d4a..e470b0221e02 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -90,6 +90,7 @@ extern "C" { #define DRM_IVPU_PARAM_TILE_CONFIG 11 #define DRM_IVPU_PARAM_SKU 12 #define DRM_IVPU_PARAM_CAPABILITIES 13 +#define DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE 14 #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 @@ -176,6 +177,9 @@ struct drm_ivpu_param { * * %DRM_IVPU_PARAM_CAPABILITIES: * Supported capabilities (read-only) + * + * %DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE: + * Size of the preemption buffer (read-only) */ __u32 param; @@ -371,6 +375,13 @@ struct drm_ivpu_cmdq_submit { * to be executed. The offset has to be 8-byte aligned. */ __u32 commands_offset; + /** + * @preempt_buffer_index: + * + * Index of the preemption buffer in the buffers_ptr array. + */ + __u32 preempt_buffer_index; + __u32 reserved; }; /* drm_ivpu_bo_wait job status codes */ -- cgit v1.2.3 From b060004f06ae0a3064bddb87a3f8ad13f859fcf3 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 17 Sep 2025 20:18:37 +0100 Subject: drm/panfrost: Introduce uAPI for JM context creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new uAPI lets user space query the KM driver for the available priorities a job can be given at submit time. These are managed through the notion of a context, for which we also provide new creation and destruction ioctls. Reviewed-by: Steven Price Signed-off-by: Boris Brezillon Signed-off-by: Adrián Larumbe Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20250917191859.500279-2-adrian.larumbe@collabora.com --- include/uapi/drm/panfrost_drm.h | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index ed67510395bd..e8b47c9f6976 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -22,6 +22,8 @@ extern "C" { #define DRM_PANFROST_PERFCNT_DUMP 0x07 #define DRM_PANFROST_MADVISE 0x08 #define DRM_PANFROST_SET_LABEL_BO 0x09 +#define DRM_PANFROST_JM_CTX_CREATE 0x0a +#define DRM_PANFROST_JM_CTX_DESTROY 0x0b #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -31,6 +33,8 @@ extern "C" { #define DRM_IOCTL_PANFROST_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset) #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) #define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo) +#define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) +#define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -71,6 +75,12 @@ struct drm_panfrost_submit { /** A combination of PANFROST_JD_REQ_* */ __u32 requirements; + + /** JM context handle. Zero if you want to use the default context. */ + __u32 jm_ctx_handle; + + /** Padding field. MBZ. */ + __u32 pad; }; /** @@ -177,6 +187,7 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_AFBC_FEATURES, DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, + DRM_PANFROST_PARAM_ALLOWED_JM_CTX_PRIORITIES, }; struct drm_panfrost_get_param { @@ -299,6 +310,45 @@ struct panfrost_dump_registers { __u32 value; }; +enum drm_panfrost_jm_ctx_priority { + /** + * @PANFROST_JM_CTX_PRIORITY_LOW: Low priority context. + */ + PANFROST_JM_CTX_PRIORITY_LOW = 0, + + /** + * @PANFROST_JM_CTX_PRIORITY_MEDIUM: Medium priority context. + */ + PANFROST_JM_CTX_PRIORITY_MEDIUM, + + /** + * @PANFROST_JM_CTX_PRIORITY_HIGH: High priority context. + * + * Requires CAP_SYS_NICE or DRM_MASTER. + */ + PANFROST_JM_CTX_PRIORITY_HIGH, +}; + +struct drm_panfrost_jm_ctx_create { + /** @handle: Handle of the created JM context */ + __u32 handle; + + /** @priority: Context priority (see enum drm_panfrost_jm_ctx_priority). */ + __u32 priority; +}; + +struct drm_panfrost_jm_ctx_destroy { + /** + * @handle: Handle of the JM context to destroy. + * + * Must be a valid context handle returned by DRM_IOCTL_PANTHOR_JM_CTX_CREATE. + */ + __u32 handle; + + /** @pad: Padding field, MBZ. */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From fb24aaf5415cc686fb0473eb782a7c8a7bab0469 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 21 Aug 2025 10:17:09 +0200 Subject: drm/dumb-buffers: Provide helper to set pitch and size Add drm_modes_size_dumb(), a helper to calculate the dumb-buffer scanline pitch and allocation size. Implementations of struct drm_driver.dumb_create can call the new helper for their size computations. There is currently quite a bit of code duplication among DRM's memory managers. Each calculates scanline pitch and buffer size from the given arguments, but the implementations are inconsistent in how they treat alignment and format support. Later patches will unify this code on top of drm_mode_size_dumb() as much as possible. drm_mode_size_dumb() uses existing 4CC format helpers to interpret the given color mode. This makes the dumb-buffer interface behave similar the kernel's video= parameter. Current per-driver implementations again likely have subtle differences or bugs in how they support color modes. The dumb-buffer UAPI is only specified for known color modes. These values describe linear, single-plane RGB color formats or legacy index formats. Other values should not be specified. But some user space still does. So for unknown color modes, there are a number of known exceptions for which drm_mode_size_dumb() calculates the pitch from the bpp value, as before. All other values work the same but print an error. v6: - document additional use cases for DUMB_CREATE2 in TODO list (Tomi) - fix typos in documentation (Tomi) v5: - check for overflows with check_mul_overflow() (Tomi) v4: - use %u conversion specifier (Geert) - list DRM_FORMAT_Dn in UAPI docs (Geert) - avoid dmesg spamming with drm_warn_once() (Sima) - add more information about bpp special case (Sima) - clarify parameters for hardware alignment - add a TODO item for DUMB_CREATE2 v3: - document the UAPI semantics - compute scanline pitch from for unknown color modes (Andy, Tomi) Signed-off-by: Thomas Zimmermann Reviewed-by: Tomi Valkeinen Reviewed-by: Tomi Valkeinen Link: https://lore.kernel.org/r/20250821081918.79786-3-tzimmermann@suse.de --- Documentation/gpu/todo.rst | 37 +++++++++++ drivers/gpu/drm/drm_dumb_buffers.c | 130 +++++++++++++++++++++++++++++++++++++ include/drm/drm_dumb_buffers.h | 14 ++++ include/uapi/drm/drm_mode.h | 50 +++++++++++++- 4 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 include/drm/drm_dumb_buffers.h (limited to 'include/uapi') diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 92db80793bba..98ed38241dc6 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -648,6 +648,43 @@ Contact: Thomas Zimmermann , Simona Vetter Level: Advanced +Implement a new DUMB_CREATE2 ioctl +---------------------------------- + +The current DUMB_CREATE ioctl is not well defined. Instead of a pixel and +framebuffer format, it only accepts a color mode of vague semantics. Assuming +a linear framebuffer, the color mode gives an idea of the supported pixel +format. But userspace effectively has to guess the correct values. It really +only works reliably with framebuffers in XRGB8888. Userspace has begun to +workaround these limitations by computing arbitrary format's buffer sizes and +calculating their sizes in terms of XRGB8888 pixels. + +One possible solution is a new ioctl DUMB_CREATE2. It should accept a DRM +format and a format modifier to resolve the color mode's ambiguity. As +framebuffers can be multi-planar, the new ioctl has to return the buffer size, +pitch and GEM handle for each individual color plane. + +In the first step, the new ioctl can be limited to the current features of +the existing DUMB_CREATE. Individual drivers can then be extended to support +multi-planar formats. Rockchip might require this and would be a good candidate. + +It might also be helpful to userspace to query information about the size of +a potential buffer, if allocated. Userspace would supply geometry and format; +the kernel would return minimal allocation sizes and scanline pitch. There is +interest to allocate that memory from another device and provide it to the +DRM driver (say via dma-buf). + +Another requested feature is the ability to allocate a buffer by size, without +format. Accelators use this for their buffer allocation and it could likely be +generalized. + +In addition to the kernel implementation, there must be user-space support +for the new ioctl. There's code in Mesa that might be able to use the new +call. + +Contact: Thomas Zimmermann + +Level: Advanced Better Testing ============== diff --git a/drivers/gpu/drm/drm_dumb_buffers.c b/drivers/gpu/drm/drm_dumb_buffers.c index 9916aaf5b3f2..e9eed9a5b760 100644 --- a/drivers/gpu/drm/drm_dumb_buffers.c +++ b/drivers/gpu/drm/drm_dumb_buffers.c @@ -25,6 +25,8 @@ #include #include +#include +#include #include #include @@ -57,6 +59,134 @@ * a hardware-specific ioctl to allocate suitable buffer objects. */ +static int drm_mode_align_dumb(struct drm_mode_create_dumb *args, + unsigned long hw_pitch_align, + unsigned long hw_size_align) +{ + u32 pitch = args->pitch; + u32 size; + + if (!pitch) + return -EINVAL; + + if (hw_pitch_align) + pitch = roundup(pitch, hw_pitch_align); + + if (!hw_size_align) + hw_size_align = PAGE_SIZE; + else if (!IS_ALIGNED(hw_size_align, PAGE_SIZE)) + return -EINVAL; /* TODO: handle this if necessary */ + + if (check_mul_overflow(args->height, pitch, &size)) + return -EINVAL; + size = ALIGN(size, hw_size_align); + if (!size) + return -EINVAL; + + args->pitch = pitch; + args->size = size; + + return 0; +} + +/** + * drm_mode_size_dumb - Calculates the scanline and buffer sizes for dumb buffers + * @dev: DRM device + * @args: Parameters for the dumb buffer + * @hw_pitch_align: Hardware scanline alignment in bytes + * @hw_size_align: Hardware buffer-size alignment in bytes + * + * The helper drm_mode_size_dumb() calculates the size of the buffer + * allocation and the scanline size for a dumb buffer. Callers have to + * set the buffers width, height and color mode in the argument @arg. + * The helper validates the correctness of the input and tests for + * possible overflows. If successful, it returns the dumb buffer's + * required scanline pitch and size in &args. + * + * The parameter @hw_pitch_align allows the driver to specifies an + * alignment for the scanline pitch, if the hardware requires any. The + * calculated pitch will be a multiple of the alignment. The parameter + * @hw_size_align allows to specify an alignment for buffer sizes. The + * provided alignment should represent requirements of the graphics + * hardware. drm_mode_size_dumb() handles GEM-related constraints + * automatically across all drivers and hardware. For example, the + * returned buffer size is always a multiple of PAGE_SIZE, which is + * required by mmap(). + * + * Returns: + * Zero on success, or a negative error code otherwise. + */ +int drm_mode_size_dumb(struct drm_device *dev, + struct drm_mode_create_dumb *args, + unsigned long hw_pitch_align, + unsigned long hw_size_align) +{ + u64 pitch = 0; + u32 fourcc; + + /* + * The scanline pitch depends on the buffer width and the color + * format. The latter is specified as a color-mode constant for + * which we first have to find the corresponding color format. + * + * Different color formats can have the same color-mode constant. + * For example XRGB8888 and BGRX8888 both have a color mode of 32. + * It is possible to use different formats for dumb-buffer allocation + * and rendering as long as all involved formats share the same + * color-mode constant. + */ + fourcc = drm_driver_color_mode_format(dev, args->bpp); + if (fourcc != DRM_FORMAT_INVALID) { + const struct drm_format_info *info = drm_format_info(fourcc); + + if (!info) + return -EINVAL; + pitch = drm_format_info_min_pitch(info, 0, args->width); + } else if (args->bpp) { + /* + * Some userspace throws in arbitrary values for bpp and + * relies on the kernel to figure it out. In this case we + * fall back to the old method of using bpp directly. The + * over-commitment of memory from the rounding is acceptable + * for compatibility with legacy userspace. We have a number + * of deprecated legacy values that are explicitly supported. + */ + switch (args->bpp) { + default: + drm_warn_once(dev, + "Unknown color mode %u; guessing buffer size.\n", + args->bpp); + fallthrough; + /* + * These constants represent various YUV formats supported by + * drm_gem_afbc_get_bpp(). + */ + case 12: // DRM_FORMAT_YUV420_8BIT + case 15: // DRM_FORMAT_YUV420_10BIT + case 30: // DRM_FORMAT_VUY101010 + fallthrough; + /* + * Used by Mesa and Gstreamer to allocate NV formats and others + * as RGB buffers. Technically, XRGB16161616F formats are RGB, + * but the dumb buffers are not supposed to be used for anything + * beyond 32 bits per pixels. + */ + case 10: // DRM_FORMAT_NV{15,20,30}, DRM_FORMAT_P010 + case 64: // DRM_FORMAT_{XRGB,XBGR,ARGB,ABGR}16161616F + pitch = args->width * DIV_ROUND_UP(args->bpp, SZ_8); + break; + } + } + + if (!pitch || pitch > U32_MAX) + return -EINVAL; + + args->pitch = pitch; + + return drm_mode_align_dumb(args, hw_pitch_align, hw_size_align); +} +EXPORT_SYMBOL(drm_mode_size_dumb); + int drm_mode_create_dumb(struct drm_device *dev, struct drm_mode_create_dumb *args, struct drm_file *file_priv) diff --git a/include/drm/drm_dumb_buffers.h b/include/drm/drm_dumb_buffers.h new file mode 100644 index 000000000000..1f3a8236fb3d --- /dev/null +++ b/include/drm/drm_dumb_buffers.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __DRM_DUMB_BUFFERS_H__ +#define __DRM_DUMB_BUFFERS_H__ + +struct drm_device; +struct drm_mode_create_dumb; + +int drm_mode_size_dumb(struct drm_device *dev, + struct drm_mode_create_dumb *args, + unsigned long hw_pitch_align, + unsigned long hw_size_align); + +#endif diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index a122bea25593..1e0e02a79b5c 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -1066,7 +1066,7 @@ struct drm_mode_crtc_page_flip_target { * struct drm_mode_create_dumb - Create a KMS dumb buffer for scanout. * @height: buffer height in pixels * @width: buffer width in pixels - * @bpp: bits per pixel + * @bpp: color mode * @flags: must be zero * @handle: buffer object handle * @pitch: number of bytes between two consecutive lines @@ -1074,6 +1074,54 @@ struct drm_mode_crtc_page_flip_target { * * User-space fills @height, @width, @bpp and @flags. If the IOCTL succeeds, * the kernel fills @handle, @pitch and @size. + * + * The value of @bpp is a color-mode number describing a specific format + * or a variant thereof. The value often corresponds to the number of bits + * per pixel for most modes, although there are exceptions. Each color mode + * maps to a DRM format plus a number of modes with similar pixel layout. + * Framebuffer layout is always linear. + * + * Support for all modes and formats is optional. Even if dumb-buffer + * creation with a certain color mode succeeds, it is not guaranteed that + * the DRM driver supports any of the related formats. Most drivers support + * a color mode of 32 with a format of DRM_FORMAT_XRGB8888 on their primary + * plane. + * + * +------------+------------------------+------------------------+ + * | Color mode | Framebuffer format | Compatible formats | + * +============+========================+========================+ + * | 32 | * DRM_FORMAT_XRGB8888 | * DRM_FORMAT_BGRX8888 | + * | | | * DRM_FORMAT_RGBX8888 | + * | | | * DRM_FORMAT_XBGR8888 | + * +------------+------------------------+------------------------+ + * | 24 | * DRM_FORMAT_RGB888 | * DRM_FORMAT_BGR888 | + * +------------+------------------------+------------------------+ + * | 16 | * DRM_FORMAT_RGB565 | * DRM_FORMAT_BGR565 | + * +------------+------------------------+------------------------+ + * | 15 | * DRM_FORMAT_XRGB1555 | * DRM_FORMAT_BGRX1555 | + * | | | * DRM_FORMAT_RGBX1555 | + * | | | * DRM_FORMAT_XBGR1555 | + * +------------+------------------------+------------------------+ + * | 8 | * DRM_FORMAT_C8 | * DRM_FORMAT_D8 | + * | | | * DRM_FORMAT_R8 | + * +------------+------------------------+------------------------+ + * | 4 | * DRM_FORMAT_C4 | * DRM_FORMAT_D4 | + * | | | * DRM_FORMAT_R4 | + * +------------+------------------------+------------------------+ + * | 2 | * DRM_FORMAT_C2 | * DRM_FORMAT_D2 | + * | | | * DRM_FORMAT_R2 | + * +------------+------------------------+------------------------+ + * | 1 | * DRM_FORMAT_C1 | * DRM_FORMAT_D1 | + * | | | * DRM_FORMAT_R1 | + * +------------+------------------------+------------------------+ + * + * Color modes of 10, 12, 15, 30 and 64 are only supported for use by + * legacy user space. Please don't use them in new code. Other modes + * are not support. + * + * Do not attempt to allocate anything but linear framebuffer memory + * with single-plane RGB data. Allocation of other framebuffer + * layouts requires dedicated ioctls in the respective DRM driver. */ struct drm_mode_create_dumb { __u32 height; -- cgit v1.2.3 From 409b9499099b4ad14ca60b59c6edfebfaf74f907 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Tue, 14 Oct 2025 19:58:24 +0530 Subject: drm/xe/uapi: Add documentation for DRM_XE_GEM_CREATE_FLAG_SCANOUT Add documentation for drm_xe_gem_create structure flag DRM_XE_GEM_CREATE_FLAG_SCANOUT. Signed-off-by: Sanjay Yadav Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://lore.kernel.org/r/20251014142823.3701228-2-sanjay.kumar.yadav@intel.com --- include/uapi/drm/xe_drm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 40ff19f52a8d..2d7945cda739 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -771,7 +771,11 @@ struct drm_xe_device_query { * until the object is either bound to a virtual memory region via * VM_BIND or accessed by the CPU. As a result, no backing memory is * reserved at the time of GEM object creation. - * - %DRM_XE_GEM_CREATE_FLAG_SCANOUT + * - %DRM_XE_GEM_CREATE_FLAG_SCANOUT - Indicates that the GEM object is + * intended for scanout via the display engine. When set, kernel ensures + * that the allocation is placed in a memory region compatible with the + * display engine requirements. This may impose restrictions on tiling, + * alignment, and memory placement to guarantee proper display functionality. * - %DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM - When using VRAM as a * possible placement, ensure that the corresponding VRAM allocation * will always use the CPU accessible part of VRAM. This is important -- cgit v1.2.3 From b291e4f1a4951204ce858cd01801291d34962a33 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 14 Oct 2025 16:41:19 -0700 Subject: accel/amdxdna: Support getting last hardware error Add new parameter DRM_AMDXDNA_HW_LAST_ASYNC_ERR to get array IOCTL. When hardware reports an error, the driver save the error information and timestamp. This new get array parameter retrieves the last error. Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20251014234119.628453-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_error.c | 95 +++++++++++++++++++++++++++------ drivers/accel/amdxdna/aie2_pci.c | 3 ++ drivers/accel/amdxdna/aie2_pci.h | 5 +- drivers/accel/amdxdna/amdxdna_error.h | 59 ++++++++++++++++++++ drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +- include/uapi/drm/amdxdna_accel.h | 13 +++++ 6 files changed, 159 insertions(+), 19 deletions(-) create mode 100644 drivers/accel/amdxdna/amdxdna_error.h (limited to 'include/uapi') diff --git a/drivers/accel/amdxdna/aie2_error.c b/drivers/accel/amdxdna/aie2_error.c index 5ee905632a39..d452008ec4f4 100644 --- a/drivers/accel/amdxdna/aie2_error.c +++ b/drivers/accel/amdxdna/aie2_error.c @@ -13,6 +13,7 @@ #include "aie2_msg_priv.h" #include "aie2_pci.h" +#include "amdxdna_error.h" #include "amdxdna_mailbox.h" #include "amdxdna_pci_drv.h" @@ -46,6 +47,7 @@ enum aie_module_type { AIE_MEM_MOD = 0, AIE_CORE_MOD, AIE_PL_MOD, + AIE_UNKNOWN_MOD, }; enum aie_error_category { @@ -143,6 +145,31 @@ static const struct aie_event_category aie_ml_shim_tile_event_cat[] = { EVENT_CATEGORY(74U, AIE_ERROR_LOCK), }; +static const enum amdxdna_error_num aie_cat_err_num_map[] = { + [AIE_ERROR_SATURATION] = AMDXDNA_ERROR_NUM_AIE_SATURATION, + [AIE_ERROR_FP] = AMDXDNA_ERROR_NUM_AIE_FP, + [AIE_ERROR_STREAM] = AMDXDNA_ERROR_NUM_AIE_STREAM, + [AIE_ERROR_ACCESS] = AMDXDNA_ERROR_NUM_AIE_ACCESS, + [AIE_ERROR_BUS] = AMDXDNA_ERROR_NUM_AIE_BUS, + [AIE_ERROR_INSTRUCTION] = AMDXDNA_ERROR_NUM_AIE_INSTRUCTION, + [AIE_ERROR_ECC] = AMDXDNA_ERROR_NUM_AIE_ECC, + [AIE_ERROR_LOCK] = AMDXDNA_ERROR_NUM_AIE_LOCK, + [AIE_ERROR_DMA] = AMDXDNA_ERROR_NUM_AIE_DMA, + [AIE_ERROR_MEM_PARITY] = AMDXDNA_ERROR_NUM_AIE_MEM_PARITY, + [AIE_ERROR_UNKNOWN] = AMDXDNA_ERROR_NUM_UNKNOWN, +}; + +static_assert(ARRAY_SIZE(aie_cat_err_num_map) == AIE_ERROR_UNKNOWN + 1); + +static const enum amdxdna_error_module aie_err_mod_map[] = { + [AIE_MEM_MOD] = AMDXDNA_ERROR_MODULE_AIE_MEMORY, + [AIE_CORE_MOD] = AMDXDNA_ERROR_MODULE_AIE_CORE, + [AIE_PL_MOD] = AMDXDNA_ERROR_MODULE_AIE_PL, + [AIE_UNKNOWN_MOD] = AMDXDNA_ERROR_MODULE_UNKNOWN, +}; + +static_assert(ARRAY_SIZE(aie_err_mod_map) == AIE_UNKNOWN_MOD + 1); + static enum aie_error_category aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type) { @@ -176,12 +203,40 @@ aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type) if (event_id != lut[i].event_id) continue; + if (lut[i].category > AIE_ERROR_UNKNOWN) + return AIE_ERROR_UNKNOWN; + return lut[i].category; } return AIE_ERROR_UNKNOWN; } +static void aie2_update_last_async_error(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err) +{ + struct aie_error *errs = err_info; + enum amdxdna_error_module err_mod; + enum aie_error_category aie_err; + enum amdxdna_error_num err_num; + struct aie_error *last_err; + + last_err = &errs[num_err - 1]; + if (last_err->mod_type >= AIE_UNKNOWN_MOD) { + err_num = aie_cat_err_num_map[AIE_ERROR_UNKNOWN]; + err_mod = aie_err_mod_map[AIE_UNKNOWN_MOD]; + } else { + aie_err = aie_get_error_category(last_err->row, + last_err->event_id, + last_err->mod_type); + err_num = aie_cat_err_num_map[aie_err]; + err_mod = aie_err_mod_map[last_err->mod_type]; + } + + ndev->last_async_err.err_code = AMDXDNA_ERROR_ENCODE(err_num, err_mod); + ndev->last_async_err.ts_us = ktime_to_us(ktime_get_real()); + ndev->last_async_err.ex_err_code = AMDXDNA_EXTRA_ERR_ENCODE(last_err->row, last_err->col); +} + static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err) { struct aie_error *errs = err_info; @@ -264,29 +319,14 @@ static void aie2_error_worker(struct work_struct *err_work) } mutex_lock(&xdna->dev_lock); + aie2_update_last_async_error(e->ndev, info->payload, info->err_cnt); + /* Re-sent this event to firmware */ if (aie2_error_event_send(e)) XDNA_WARN(xdna, "Unable to register async event"); mutex_unlock(&xdna->dev_lock); } -int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev) -{ - struct amdxdna_dev *xdna = ndev->xdna; - struct async_event *e; - int i, ret; - - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); - for (i = 0; i < ndev->async_events->event_cnt; i++) { - e = &ndev->async_events->event[i]; - ret = aie2_error_event_send(e); - if (ret) - return ret; - } - - return 0; -} - void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev) { struct amdxdna_dev *xdna = ndev->xdna; @@ -341,6 +381,10 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) e->size = ASYNC_BUF_SIZE; e->resp.status = MAX_AIE2_STATUS_CODE; INIT_WORK(&e->work, aie2_error_worker); + + ret = aie2_error_event_send(e); + if (ret) + goto free_wq; } ndev->async_events = events; @@ -349,6 +393,8 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev) events->event_cnt, events->size); return 0; +free_wq: + destroy_workqueue(events->wq); free_buf: dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf, events->addr, DMA_FROM_DEVICE); @@ -356,3 +402,18 @@ free_events: kfree(events); return ret; } + +int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, struct amdxdna_drm_get_array *args) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + args->num_element = 1; + args->element_size = sizeof(ndev->last_async_err); + if (copy_to_user(u64_to_user_ptr(args->buffer), + &ndev->last_async_err, args->element_size)) + return -EFAULT; + + return 0; +} diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 8a66f276100e..cfca4e456b61 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -924,6 +924,9 @@ static int aie2_get_array(struct amdxdna_client *client, case DRM_AMDXDNA_HW_CONTEXT_ALL: ret = aie2_query_ctx_status_array(client, args); break; + case DRM_AMDXDNA_HW_LAST_ASYNC_ERR: + ret = aie2_get_array_async_error(xdna->dev_handle, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index 289a23ecd5f1..34bc35479f42 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -190,6 +190,8 @@ struct amdxdna_dev_hdl { enum aie2_dev_status dev_status; u32 hwctx_num; + + struct amdxdna_async_error last_async_err; }; #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ @@ -253,8 +255,9 @@ void aie2_psp_stop(struct psp_device *psp); /* aie2_error.c */ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev); -int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev); int aie2_error_async_msg_thread(void *data); +int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, + struct amdxdna_drm_get_array *args); /* aie2_message.c */ int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); diff --git a/drivers/accel/amdxdna/amdxdna_error.h b/drivers/accel/amdxdna/amdxdna_error.h new file mode 100644 index 000000000000..c51de86ec12b --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_error.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_ERROR_H_ +#define _AMDXDNA_ERROR_H_ + +#include +#include + +#define AMDXDNA_ERR_DRV_AIE 4 +#define AMDXDNA_ERR_SEV_CRITICAL 3 +#define AMDXDNA_ERR_CLASS_AIE 2 + +#define AMDXDNA_ERR_NUM_MASK GENMASK_U64(15, 0) +#define AMDXDNA_ERR_DRV_MASK GENMASK_U64(23, 16) +#define AMDXDNA_ERR_SEV_MASK GENMASK_U64(31, 24) +#define AMDXDNA_ERR_MOD_MASK GENMASK_U64(39, 32) +#define AMDXDNA_ERR_CLASS_MASK GENMASK_U64(47, 40) + +enum amdxdna_error_num { + AMDXDNA_ERROR_NUM_AIE_SATURATION = 3, + AMDXDNA_ERROR_NUM_AIE_FP, + AMDXDNA_ERROR_NUM_AIE_STREAM, + AMDXDNA_ERROR_NUM_AIE_ACCESS, + AMDXDNA_ERROR_NUM_AIE_BUS, + AMDXDNA_ERROR_NUM_AIE_INSTRUCTION, + AMDXDNA_ERROR_NUM_AIE_ECC, + AMDXDNA_ERROR_NUM_AIE_LOCK, + AMDXDNA_ERROR_NUM_AIE_DMA, + AMDXDNA_ERROR_NUM_AIE_MEM_PARITY, + AMDXDNA_ERROR_NUM_UNKNOWN = 15, +}; + +enum amdxdna_error_module { + AMDXDNA_ERROR_MODULE_AIE_CORE = 3, + AMDXDNA_ERROR_MODULE_AIE_MEMORY, + AMDXDNA_ERROR_MODULE_AIE_SHIM, + AMDXDNA_ERROR_MODULE_AIE_NOC, + AMDXDNA_ERROR_MODULE_AIE_PL, + AMDXDNA_ERROR_MODULE_UNKNOWN = 8, +}; + +#define AMDXDNA_ERROR_ENCODE(err_num, err_mod) \ + (FIELD_PREP(AMDXDNA_ERR_NUM_MASK, err_num) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_DRV_MASK, AMDXDNA_ERR_DRV_AIE) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_SEV_MASK, AMDXDNA_ERR_SEV_CRITICAL) | \ + FIELD_PREP(AMDXDNA_ERR_MOD_MASK, err_mod) | \ + FIELD_PREP_CONST(AMDXDNA_ERR_CLASS_MASK, AMDXDNA_ERR_CLASS_AIE)) + +#define AMDXDNA_EXTRA_ERR_COL_MASK GENMASK_U64(7, 0) +#define AMDXDNA_EXTRA_ERR_ROW_MASK GENMASK_U64(15, 8) + +#define AMDXDNA_EXTRA_ERR_ENCODE(row, col) \ + (FIELD_PREP(AMDXDNA_EXTRA_ERR_COL_MASK, col) | \ + FIELD_PREP(AMDXDNA_EXTRA_ERR_ROW_MASK, row)) + +#endif /* _AMDXDNA_ERROR_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index aa04452310e5..696fdac8ad3c 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -27,9 +27,10 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); /* * 0.0: Initial version * 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY + * 0.2: Support getting last error hardware error */ #define AMDXDNA_DRIVER_MAJOR 0 -#define AMDXDNA_DRIVER_MINOR 1 +#define AMDXDNA_DRIVER_MINOR 2 /* * Bind the driver base on (vendor_id, device_id) pair and later use the diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index a1fb9785db77..c7eec9ceb2ae 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -523,7 +523,20 @@ struct amdxdna_drm_hwctx_entry { __u32 pad; }; +/** + * struct amdxdna_async_error - XDNA async error structure + */ +struct amdxdna_async_error { + /** @err_code: Error code. */ + __u64 err_code; + /** @ts_us: Timestamp. */ + __u64 ts_us; + /** @ex_err_code: Extra error code */ + __u64 ex_err_code; +}; + #define DRM_AMDXDNA_HW_CONTEXT_ALL 0 +#define DRM_AMDXDNA_HW_LAST_ASYNC_ERR 2 /** * struct amdxdna_drm_get_array - Get information array. -- cgit v1.2.3 From 59a2d3f38ab23cce4cd9f0c4a5e08fdfe9e67ae7 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 15 Oct 2025 19:07:26 +0200 Subject: drm/xe/uapi: Hide the madvise autoreset behind a VM_BIND flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The madvise implementation currently resets the SVM madvise if the underlying CPU map is unmapped. This is in an attempt to mimic the CPU madvise behaviour. However, it's not clear that this is a desired behaviour since if the end app user relies on it for malloc()ed objects or stack objects, it may not work as intended. Instead of having the autoreset functionality being a direct application-facing implicit UAPI, make the UMD explicitly choose this behaviour if it wants to expose it by introducing DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET, and add a semantics description. v2: - Kerneldoc fixes. Fix a commit log message. Fixes: a2eb8aec3ebe ("drm/xe: Reset VMA attributes to default in SVM garbage collector") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: "Falkowski, John" Cc: "Mrozek, Michal" Signed-off-by: Thomas Hellström Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251015170726.178685-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_svm.c | 5 +++++ drivers/gpu/drm/xe/xe_vm.c | 12 +++++++++--- drivers/gpu/drm/xe/xe_vm_types.h | 1 + include/uapi/drm/xe_drm.h | 15 +++++++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index da2a412f80c0..129e7818565c 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -302,6 +302,11 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 if (!vma) return -EINVAL; + if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { + drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); + return 0; + } + if (xe_vma_has_default_mem_attrs(vma)) return 0; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c3230d3f9e6f..10d77666a425 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -646,7 +646,8 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, XE_VMA_READ_ONLY | \ XE_VMA_DUMPABLE | \ XE_VMA_SYSTEM_ALLOCATOR | \ - DRM_GPUVA_SPARSE) + DRM_GPUVA_SPARSE | \ + XE_VMA_MADV_AUTORESET) static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) @@ -2297,6 +2298,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) op->map.vma_flags |= XE_VMA_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) + op->map.vma_flags |= XE_VMA_MADV_AUTORESET; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); @@ -3280,7 +3283,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE | \ DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ + DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -3395,7 +3399,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && !(BIT(prefetch_region) & xe->info.mem_region_mask))) || XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { + op == DRM_XE_VM_BIND_OP_UNMAP) || + XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && + (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { err = -EINVAL; goto free_bind_ops; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a3b422b27ae8..d6e2a0fdd4b3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -46,6 +46,7 @@ struct xe_vm_pgtable_update_op; #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) +#define XE_VMA_MADV_AUTORESET (DRM_GPUVA_USERBITS << 10) /** * struct xe_vma_mem_attr - memory attributes associated with vma diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2d7945cda739..47853659a705 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1017,6 +1017,20 @@ struct drm_xe_vm_destroy { * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. + * - %DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET - Can be used in combination with + * %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR to reset madvises when the underlying + * CPU address space range is unmapped (typically with munmap(2) or brk(2)). + * The madvise values set with &DRM_IOCTL_XE_MADVISE are reset to the values + * that were present immediately after the &DRM_IOCTL_XE_VM_BIND. + * The reset GPU virtual address range is the intersection of the range bound + * using &DRM_IOCTL_XE_VM_BIND and the virtual CPU address space range + * unmapped. + * This functionality is present to mimic the behaviour of CPU address space + * madvises set using madvise(2), which are typically reset on unmap. + * Note: free(3) may or may not call munmap(2) and/or brk(2), and may thus + * not invoke autoreset. Neither will stack variables going out of scope. + * Therefore it's recommended to always explicitly reset the madvises when + * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call. * * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be: * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in @@ -1123,6 +1137,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) #define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) #define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) +#define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6) /** @flags: Bind flags */ __u32 flags; -- cgit v1.2.3 From 5a5e9c0228e613f0ef2a58b9782d7c0ea8f1e58b Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Mon, 20 Oct 2025 14:33:28 -0500 Subject: accel: Add Arm Ethos-U NPU driver Add a driver for Arm Ethos-U65/U85 NPUs. The Ethos-U NPU has a relatively simple interface with single command stream to describe buffers, operation settings, and network operations. It supports up to 8 memory regions (though no h/w bounds on a region). The Ethos NPUs are designed to use an SRAM for scratch memory. Region 2 is reserved for SRAM (like the downstream driver stack and compiler). Userspace doesn't need access to the SRAM. The h/w has no MMU nor external IOMMU and is a DMA engine which can read and write anywhere in memory without h/w bounds checks. The user submitted command streams must be validated against the bounds of the GEM BOs. This is similar to the VC4 design which validates shaders. The job submit is based on the rocket driver for the Rockchip NPU utilizing the GPU scheduler. It is simpler as there's only 1 core rather than 3. Tested on i.MX93 platform (U65) and FVP (U85) with Mesa Teflon support. Acked-by: Thomas Zimmermann Acked-by: Tomeu Vizoso Reviewed-by: Frank Li Link: https://patch.msgid.link/20251020-ethos-v6-2-ecebc383c4b7@kernel.org Signed-off-by: Rob Herring (Arm) --- MAINTAINERS | 9 + drivers/accel/Kconfig | 1 + drivers/accel/Makefile | 1 + drivers/accel/ethosu/Kconfig | 11 + drivers/accel/ethosu/Makefile | 4 + drivers/accel/ethosu/ethosu_device.h | 197 ++++++++++ drivers/accel/ethosu/ethosu_drv.c | 403 ++++++++++++++++++++ drivers/accel/ethosu/ethosu_drv.h | 15 + drivers/accel/ethosu/ethosu_gem.c | 704 +++++++++++++++++++++++++++++++++++ drivers/accel/ethosu/ethosu_gem.h | 46 +++ drivers/accel/ethosu/ethosu_job.c | 496 ++++++++++++++++++++++++ drivers/accel/ethosu/ethosu_job.h | 40 ++ include/uapi/drm/ethosu_accel.h | 261 +++++++++++++ 13 files changed, 2188 insertions(+) create mode 100644 drivers/accel/ethosu/Kconfig create mode 100644 drivers/accel/ethosu/Makefile create mode 100644 drivers/accel/ethosu/ethosu_device.h create mode 100644 drivers/accel/ethosu/ethosu_drv.c create mode 100644 drivers/accel/ethosu/ethosu_drv.h create mode 100644 drivers/accel/ethosu/ethosu_gem.c create mode 100644 drivers/accel/ethosu/ethosu_gem.h create mode 100644 drivers/accel/ethosu/ethosu_job.c create mode 100644 drivers/accel/ethosu/ethosu_job.h create mode 100644 include/uapi/drm/ethosu_accel.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index 5f53e385a261..874fcbe59990 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2017,6 +2017,15 @@ F: arch/arm64/include/asm/arch_timer.h F: drivers/clocksource/arm_arch_timer.c F: drivers/clocksource/arm_arch_timer_mmio.c +ARM ETHOS-U NPU DRIVER +M: Rob Herring (Arm) +M: Tomeu Vizoso +L: dri-devel@lists.freedesktop.org +S: Supported +T: git https://gitlab.freedesktop.org/drm/misc/kernel.git +F: drivers/accel/ethosu/ +F: include/uapi/drm/ethosu_accel.h + ARM GENERIC INTERRUPT CONTROLLER DRIVERS M: Marc Zyngier L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig index bb01cebc42bf..bdf48ccafcf2 100644 --- a/drivers/accel/Kconfig +++ b/drivers/accel/Kconfig @@ -25,6 +25,7 @@ menuconfig DRM_ACCEL and debugfs). source "drivers/accel/amdxdna/Kconfig" +source "drivers/accel/ethosu/Kconfig" source "drivers/accel/habanalabs/Kconfig" source "drivers/accel/ivpu/Kconfig" source "drivers/accel/qaic/Kconfig" diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile index ffc3fa588666..1d3a7251b950 100644 --- a/drivers/accel/Makefile +++ b/drivers/accel/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/ +obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) += ethosu/ obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/ obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/ obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/ diff --git a/drivers/accel/ethosu/Kconfig b/drivers/accel/ethosu/Kconfig new file mode 100644 index 000000000000..d25f9b3eb317 --- /dev/null +++ b/drivers/accel/ethosu/Kconfig @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_ACCEL_ARM_ETHOSU + tristate "Arm Ethos-U65/U85 NPU" + depends on HAS_IOMEM + depends on DRM_ACCEL + select DRM_GEM_DMA_HELPER + select DRM_SCHED + select GENERIC_ALLOCATOR + help + Enables driver for Arm Ethos-U65/U85 NPUs diff --git a/drivers/accel/ethosu/Makefile b/drivers/accel/ethosu/Makefile new file mode 100644 index 000000000000..17db5a600416 --- /dev/null +++ b/drivers/accel/ethosu/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) := ethosu.o +ethosu-y += ethosu_drv.o ethosu_gem.o ethosu_job.o diff --git a/drivers/accel/ethosu/ethosu_device.h b/drivers/accel/ethosu/ethosu_device.h new file mode 100644 index 000000000000..b189fa783d6a --- /dev/null +++ b/drivers/accel/ethosu/ethosu_device.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: GPL-2.0-only or MIT */ +/* Copyright 2025 Arm, Ltd. */ + +#ifndef __ETHOSU_DEVICE_H__ +#define __ETHOSU_DEVICE_H__ + +#include +#include +#include + +#include +#include + +#include + +struct clk; +struct gen_pool; + +#define NPU_REG_ID 0x0000 +#define NPU_REG_STATUS 0x0004 +#define NPU_REG_CMD 0x0008 +#define NPU_REG_RESET 0x000c +#define NPU_REG_QBASE 0x0010 +#define NPU_REG_QBASE_HI 0x0014 +#define NPU_REG_QREAD 0x0018 +#define NPU_REG_QCONFIG 0x001c +#define NPU_REG_QSIZE 0x0020 +#define NPU_REG_PROT 0x0024 +#define NPU_REG_CONFIG 0x0028 +#define NPU_REG_REGIONCFG 0x003c +#define NPU_REG_AXILIMIT0 0x0040 // U65 +#define NPU_REG_AXILIMIT1 0x0044 // U65 +#define NPU_REG_AXILIMIT2 0x0048 // U65 +#define NPU_REG_AXILIMIT3 0x004c // U65 +#define NPU_REG_MEM_ATTR0 0x0040 // U85 +#define NPU_REG_MEM_ATTR1 0x0044 // U85 +#define NPU_REG_MEM_ATTR2 0x0048 // U85 +#define NPU_REG_MEM_ATTR3 0x004c // U85 +#define NPU_REG_AXI_SRAM 0x0050 // U85 +#define NPU_REG_AXI_EXT 0x0054 // U85 + +#define NPU_REG_BASEP(x) (0x0080 + (x) * 8) +#define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8) +#define NPU_BASEP_REGION_MAX 8 + +#define ID_ARCH_MAJOR_MASK GENMASK(31, 28) +#define ID_ARCH_MINOR_MASK GENMASK(27, 20) +#define ID_ARCH_PATCH_MASK GENMASK(19, 16) +#define ID_VER_MAJOR_MASK GENMASK(11, 8) +#define ID_VER_MINOR_MASK GENMASK(7, 4) + +#define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0) +#define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4) + +#define STATUS_STATE_RUNNING BIT(0) +#define STATUS_IRQ_RAISED BIT(1) +#define STATUS_BUS_STATUS BIT(2) +#define STATUS_RESET_STATUS BIT(3) +#define STATUS_CMD_PARSE_ERR BIT(4) +#define STATUS_CMD_END_REACHED BIT(5) + +#define CMD_CLEAR_IRQ BIT(1) +#define CMD_TRANSITION_TO_RUN BIT(0) + +#define RESET_PENDING_CSL BIT(1) +#define RESET_PENDING_CPL BIT(0) + +#define PROT_ACTIVE_CSL BIT(1) + +enum ethosu_cmds { + NPU_OP_CONV = 0x2, + NPU_OP_DEPTHWISE = 0x3, + NPU_OP_POOL = 0x5, + NPU_OP_ELEMENTWISE = 0x6, + NPU_OP_RESIZE = 0x7, // U85 only + NPU_OP_DMA_START = 0x10, + NPU_SET_IFM_PAD_TOP = 0x100, + NPU_SET_IFM_PAD_LEFT = 0x101, + NPU_SET_IFM_PAD_RIGHT = 0x102, + NPU_SET_IFM_PAD_BOTTOM = 0x103, + NPU_SET_IFM_DEPTH_M1 = 0x104, + NPU_SET_IFM_PRECISION = 0x105, + NPU_SET_IFM_BROADCAST = 0x108, + NPU_SET_IFM_WIDTH0_M1 = 0x10a, + NPU_SET_IFM_HEIGHT0_M1 = 0x10b, + NPU_SET_IFM_HEIGHT1_M1 = 0x10c, + NPU_SET_IFM_REGION = 0x10f, + NPU_SET_OFM_WIDTH_M1 = 0x111, + NPU_SET_OFM_HEIGHT_M1 = 0x112, + NPU_SET_OFM_DEPTH_M1 = 0x113, + NPU_SET_OFM_PRECISION = 0x114, + NPU_SET_OFM_WIDTH0_M1 = 0x11a, + NPU_SET_OFM_HEIGHT0_M1 = 0x11b, + NPU_SET_OFM_HEIGHT1_M1 = 0x11c, + NPU_SET_OFM_REGION = 0x11f, + NPU_SET_KERNEL_WIDTH_M1 = 0x120, + NPU_SET_KERNEL_HEIGHT_M1 = 0x121, + NPU_SET_KERNEL_STRIDE = 0x122, + NPU_SET_WEIGHT_REGION = 0x128, + NPU_SET_SCALE_REGION = 0x129, + NPU_SET_DMA0_SRC_REGION = 0x130, + NPU_SET_DMA0_DST_REGION = 0x131, + NPU_SET_DMA0_SIZE0 = 0x132, + NPU_SET_DMA0_SIZE1 = 0x133, + NPU_SET_IFM2_BROADCAST = 0x180, + NPU_SET_IFM2_PRECISION = 0x185, + NPU_SET_IFM2_WIDTH0_M1 = 0x18a, + NPU_SET_IFM2_HEIGHT0_M1 = 0x18b, + NPU_SET_IFM2_HEIGHT1_M1 = 0x18c, + NPU_SET_IFM2_REGION = 0x18f, + NPU_SET_IFM_BASE0 = 0x4000, + NPU_SET_IFM_BASE1 = 0x4001, + NPU_SET_IFM_BASE2 = 0x4002, + NPU_SET_IFM_BASE3 = 0x4003, + NPU_SET_IFM_STRIDE_X = 0x4004, + NPU_SET_IFM_STRIDE_Y = 0x4005, + NPU_SET_IFM_STRIDE_C = 0x4006, + NPU_SET_OFM_BASE0 = 0x4010, + NPU_SET_OFM_BASE1 = 0x4011, + NPU_SET_OFM_BASE2 = 0x4012, + NPU_SET_OFM_BASE3 = 0x4013, + NPU_SET_OFM_STRIDE_X = 0x4014, + NPU_SET_OFM_STRIDE_Y = 0x4015, + NPU_SET_OFM_STRIDE_C = 0x4016, + NPU_SET_WEIGHT_BASE = 0x4020, + NPU_SET_WEIGHT_LENGTH = 0x4021, + NPU_SET_SCALE_BASE = 0x4022, + NPU_SET_SCALE_LENGTH = 0x4023, + NPU_SET_DMA0_SRC = 0x4030, + NPU_SET_DMA0_DST = 0x4031, + NPU_SET_DMA0_LEN = 0x4032, + NPU_SET_DMA0_SRC_STRIDE0 = 0x4033, + NPU_SET_DMA0_SRC_STRIDE1 = 0x4034, + NPU_SET_DMA0_DST_STRIDE0 = 0x4035, + NPU_SET_DMA0_DST_STRIDE1 = 0x4036, + NPU_SET_IFM2_BASE0 = 0x4080, + NPU_SET_IFM2_BASE1 = 0x4081, + NPU_SET_IFM2_BASE2 = 0x4082, + NPU_SET_IFM2_BASE3 = 0x4083, + NPU_SET_IFM2_STRIDE_X = 0x4084, + NPU_SET_IFM2_STRIDE_Y = 0x4085, + NPU_SET_IFM2_STRIDE_C = 0x4086, + NPU_SET_WEIGHT1_BASE = 0x4090, + NPU_SET_WEIGHT1_LENGTH = 0x4091, + NPU_SET_SCALE1_BASE = 0x4092, + NPU_SET_WEIGHT2_BASE = 0x4092, + NPU_SET_SCALE1_LENGTH = 0x4093, + NPU_SET_WEIGHT2_LENGTH = 0x4093, + NPU_SET_WEIGHT3_BASE = 0x4094, + NPU_SET_WEIGHT3_LENGTH = 0x4095, +}; + +#define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */ + +/** + * struct ethosu_device - Ethosu device + */ +struct ethosu_device { + /** @base: Base drm_device. */ + struct drm_device base; + + /** @iomem: CPU mapping of the registers. */ + void __iomem *regs; + + void __iomem *sram; + struct gen_pool *srampool; + dma_addr_t sramphys; + + struct clk_bulk_data *clks; + int num_clks; + int irq; + + struct drm_ethosu_npu_info npu_info; + + struct ethosu_job *in_flight_job; + /* For in_flight_job and ethosu_job_hw_submit() */ + struct mutex job_lock; + + /* For dma_fence */ + spinlock_t fence_lock; + + struct drm_gpu_scheduler sched; + /* For ethosu_job_do_push() */ + struct mutex sched_lock; + u64 fence_context; + u64 emit_seqno; +}; + +#define to_ethosu_device(drm_dev) \ + ((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base)) + +static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev) +{ + return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1; +} + +#endif diff --git a/drivers/accel/ethosu/ethosu_drv.c b/drivers/accel/ethosu/ethosu_drv.c new file mode 100644 index 000000000000..e05a69bf5574 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_drv.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: GPL-2.0-only or MIT +// Copyright (C) 2025 Arm, Ltd. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ethosu_drv.h" +#include "ethosu_device.h" +#include "ethosu_gem.h" +#include "ethosu_job.h" + +static int ethosu_ioctl_dev_query(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct ethosu_device *ethosudev = to_ethosu_device(ddev); + struct drm_ethosu_dev_query *args = data; + + if (!args->pointer) { + switch (args->type) { + case DRM_ETHOSU_DEV_QUERY_NPU_INFO: + args->size = sizeof(ethosudev->npu_info); + return 0; + default: + return -EINVAL; + } + } + + switch (args->type) { + case DRM_ETHOSU_DEV_QUERY_NPU_INFO: + if (args->size < offsetofend(struct drm_ethosu_npu_info, sram_size)) + return -EINVAL; + return copy_struct_to_user(u64_to_user_ptr(args->pointer), + args->size, + ðosudev->npu_info, + sizeof(ethosudev->npu_info), NULL); + default: + return -EINVAL; + } +} + +#define ETHOSU_BO_FLAGS DRM_ETHOSU_BO_NO_MMAP + +static int ethosu_ioctl_bo_create(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_ethosu_bo_create *args = data; + int cookie, ret; + + if (!drm_dev_enter(ddev, &cookie)) + return -ENODEV; + + if (!args->size || (args->flags & ~ETHOSU_BO_FLAGS)) { + ret = -EINVAL; + goto out_dev_exit; + } + + ret = ethosu_gem_create_with_handle(file, ddev, &args->size, + args->flags, &args->handle); + +out_dev_exit: + drm_dev_exit(cookie); + return ret; +} + +static int ethosu_ioctl_bo_wait(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_ethosu_bo_wait *args = data; + int cookie, ret; + unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + + if (args->pad) + return -EINVAL; + + if (!drm_dev_enter(ddev, &cookie)) + return -ENODEV; + + ret = drm_gem_dma_resv_wait(file, args->handle, true, timeout); + + drm_dev_exit(cookie); + return ret; +} + +static int ethosu_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_ethosu_bo_mmap_offset *args = data; + struct drm_gem_object *obj; + + if (args->pad) + return -EINVAL; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + args->offset = drm_vma_node_offset_addr(&obj->vma_node); + drm_gem_object_put(obj); + return 0; +} + +static int ethosu_ioctl_cmdstream_bo_create(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_ethosu_cmdstream_bo_create *args = data; + int cookie, ret; + + if (!drm_dev_enter(ddev, &cookie)) + return -ENODEV; + + if (!args->size || !args->data || args->pad || args->flags) { + ret = -EINVAL; + goto out_dev_exit; + } + + args->flags |= DRM_ETHOSU_BO_NO_MMAP; + + ret = ethosu_gem_cmdstream_create(file, ddev, args->size, args->data, + args->flags, &args->handle); + +out_dev_exit: + drm_dev_exit(cookie); + return ret; +} + +static int ethosu_open(struct drm_device *ddev, struct drm_file *file) +{ + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + + struct ethosu_file_priv __free(kfree) *priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto err_put_mod; + } + priv->edev = to_ethosu_device(ddev); + + ret = ethosu_job_open(priv); + if (ret) + goto err_put_mod; + + file->driver_priv = no_free_ptr(priv); + return 0; + +err_put_mod: + module_put(THIS_MODULE); + return ret; +} + +static void ethosu_postclose(struct drm_device *ddev, struct drm_file *file) +{ + ethosu_job_close(file->driver_priv); + kfree(file->driver_priv); + module_put(THIS_MODULE); +} + +static const struct drm_ioctl_desc ethosu_drm_driver_ioctls[] = { +#define ETHOSU_IOCTL(n, func, flags) \ + DRM_IOCTL_DEF_DRV(ETHOSU_##n, ethosu_ioctl_##func, flags) + + ETHOSU_IOCTL(DEV_QUERY, dev_query, 0), + ETHOSU_IOCTL(BO_CREATE, bo_create, 0), + ETHOSU_IOCTL(BO_WAIT, bo_wait, 0), + ETHOSU_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, 0), + ETHOSU_IOCTL(CMDSTREAM_BO_CREATE, cmdstream_bo_create, 0), + ETHOSU_IOCTL(SUBMIT, submit, 0), +}; + +DEFINE_DRM_ACCEL_FOPS(ethosu_drm_driver_fops); + +/* + * Ethosu driver version: + * - 1.0 - initial interface + */ +static const struct drm_driver ethosu_drm_driver = { + .driver_features = DRIVER_COMPUTE_ACCEL | DRIVER_GEM, + .open = ethosu_open, + .postclose = ethosu_postclose, + .ioctls = ethosu_drm_driver_ioctls, + .num_ioctls = ARRAY_SIZE(ethosu_drm_driver_ioctls), + .fops = ðosu_drm_driver_fops, + .name = "ethosu", + .desc = "Arm Ethos-U Accel driver", + .major = 1, + .minor = 0, + + .gem_create_object = ethosu_gem_create_object, +}; + +#define U65_DRAM_AXI_LIMIT_CFG 0x1f3f0002 +#define U65_SRAM_AXI_LIMIT_CFG 0x1f3f00b0 +#define U85_AXI_EXT_CFG 0x00021f3f +#define U85_AXI_SRAM_CFG 0x00021f3f +#define U85_MEM_ATTR0_CFG 0x00000000 +#define U85_MEM_ATTR2_CFG 0x000000b7 + +static int ethosu_reset(struct ethosu_device *ethosudev) +{ + int ret; + u32 reg; + + writel_relaxed(RESET_PENDING_CSL, ethosudev->regs + NPU_REG_RESET); + ret = readl_poll_timeout(ethosudev->regs + NPU_REG_STATUS, reg, + !FIELD_GET(STATUS_RESET_STATUS, reg), + USEC_PER_MSEC, USEC_PER_SEC); + if (ret) + return ret; + + if (!FIELD_GET(PROT_ACTIVE_CSL, readl_relaxed(ethosudev->regs + NPU_REG_PROT))) { + dev_warn(ethosudev->base.dev, "Could not reset to non-secure mode (PROT = %x)\n", + readl_relaxed(ethosudev->regs + NPU_REG_PROT)); + } + + /* + * Assign region 2 (SRAM) to AXI M0 (AXILIMIT0), + * everything else to AXI M1 (AXILIMIT2) + */ + writel_relaxed(0x0000aa8a, ethosudev->regs + NPU_REG_REGIONCFG); + if (ethosu_is_u65(ethosudev)) { + writel_relaxed(U65_SRAM_AXI_LIMIT_CFG, ethosudev->regs + NPU_REG_AXILIMIT0); + writel_relaxed(U65_DRAM_AXI_LIMIT_CFG, ethosudev->regs + NPU_REG_AXILIMIT2); + } else { + writel_relaxed(U85_AXI_SRAM_CFG, ethosudev->regs + NPU_REG_AXI_SRAM); + writel_relaxed(U85_AXI_EXT_CFG, ethosudev->regs + NPU_REG_AXI_EXT); + writel_relaxed(U85_MEM_ATTR0_CFG, ethosudev->regs + NPU_REG_MEM_ATTR0); // SRAM + writel_relaxed(U85_MEM_ATTR2_CFG, ethosudev->regs + NPU_REG_MEM_ATTR2); // DRAM + } + + if (ethosudev->sram) + memset_io(ethosudev->sram, 0, ethosudev->npu_info.sram_size); + + return 0; +} + +static int ethosu_device_resume(struct device *dev) +{ + struct ethosu_device *ethosudev = dev_get_drvdata(dev); + int ret; + + ret = clk_bulk_prepare_enable(ethosudev->num_clks, ethosudev->clks); + if (ret) + return ret; + + ret = ethosu_reset(ethosudev); + if (!ret) + return 0; + + clk_bulk_disable_unprepare(ethosudev->num_clks, ethosudev->clks); + return ret; +} + +static int ethosu_device_suspend(struct device *dev) +{ + struct ethosu_device *ethosudev = dev_get_drvdata(dev); + + clk_bulk_disable_unprepare(ethosudev->num_clks, ethosudev->clks); + return 0; +} + +static int ethosu_sram_init(struct ethosu_device *ethosudev) +{ + ethosudev->npu_info.sram_size = 0; + + ethosudev->srampool = of_gen_pool_get(ethosudev->base.dev->of_node, "sram", 0); + if (!ethosudev->srampool) + return 0; + + ethosudev->npu_info.sram_size = gen_pool_size(ethosudev->srampool); + + ethosudev->sram = (void __iomem *)gen_pool_dma_alloc(ethosudev->srampool, + ethosudev->npu_info.sram_size, + ðosudev->sramphys); + if (!ethosudev->sram) { + dev_err(ethosudev->base.dev, "failed to allocate from SRAM pool\n"); + return -ENOMEM; + } + + return 0; +} + +static int ethosu_init(struct ethosu_device *ethosudev) +{ + int ret; + u32 id, config; + + ret = ethosu_device_resume(ethosudev->base.dev); + if (ret) + return ret; + + pm_runtime_set_autosuspend_delay(ethosudev->base.dev, 50); + pm_runtime_use_autosuspend(ethosudev->base.dev); + ret = devm_pm_runtime_set_active_enabled(ethosudev->base.dev); + if (ret) + return ret; + pm_runtime_get_noresume(ethosudev->base.dev); + + ethosudev->npu_info.id = id = readl_relaxed(ethosudev->regs + NPU_REG_ID); + ethosudev->npu_info.config = config = readl_relaxed(ethosudev->regs + NPU_REG_CONFIG); + + ethosu_sram_init(ethosudev); + + dev_info(ethosudev->base.dev, + "Ethos-U NPU, arch v%ld.%ld.%ld, rev r%ldp%ld, cmd stream ver%ld, %d MACs, %dKB SRAM\n", + FIELD_GET(ID_ARCH_MAJOR_MASK, id), + FIELD_GET(ID_ARCH_MINOR_MASK, id), + FIELD_GET(ID_ARCH_PATCH_MASK, id), + FIELD_GET(ID_VER_MAJOR_MASK, id), + FIELD_GET(ID_VER_MINOR_MASK, id), + FIELD_GET(CONFIG_CMD_STREAM_VER_MASK, config), + 1 << FIELD_GET(CONFIG_MACS_PER_CC_MASK, config), + ethosudev->npu_info.sram_size / 1024); + + return 0; +} + +static int ethosu_probe(struct platform_device *pdev) +{ + int ret; + struct ethosu_device *ethosudev; + + ethosudev = devm_drm_dev_alloc(&pdev->dev, ðosu_drm_driver, + struct ethosu_device, base); + if (IS_ERR(ethosudev)) + return -ENOMEM; + platform_set_drvdata(pdev, ethosudev); + + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + + ethosudev->regs = devm_platform_ioremap_resource(pdev, 0); + + ethosudev->num_clks = devm_clk_bulk_get_all(&pdev->dev, ðosudev->clks); + if (ethosudev->num_clks < 0) + return ethosudev->num_clks; + + ret = ethosu_job_init(ethosudev); + if (ret) + return ret; + + ret = ethosu_init(ethosudev); + if (ret) + return ret; + + ret = drm_dev_register(ðosudev->base, 0); + if (ret) + pm_runtime_dont_use_autosuspend(ethosudev->base.dev); + + pm_runtime_put_autosuspend(ethosudev->base.dev); + return ret; +} + +static void ethosu_remove(struct platform_device *pdev) +{ + struct ethosu_device *ethosudev = dev_get_drvdata(&pdev->dev); + + drm_dev_unregister(ðosudev->base); + ethosu_job_fini(ethosudev); + if (ethosudev->sram) + gen_pool_free(ethosudev->srampool, (unsigned long)ethosudev->sram, + ethosudev->npu_info.sram_size); +} + +static const struct of_device_id dt_match[] = { + { .compatible = "arm,ethos-u65" }, + { .compatible = "arm,ethos-u85" }, + {} +}; +MODULE_DEVICE_TABLE(of, dt_match); + +static DEFINE_RUNTIME_DEV_PM_OPS(ethosu_pm_ops, + ethosu_device_suspend, + ethosu_device_resume, + NULL); + +static struct platform_driver ethosu_driver = { + .probe = ethosu_probe, + .remove = ethosu_remove, + .driver = { + .name = "ethosu", + .pm = pm_ptr(ðosu_pm_ops), + .of_match_table = dt_match, + }, +}; +module_platform_driver(ethosu_driver); + +MODULE_AUTHOR("Rob Herring "); +MODULE_DESCRIPTION("Arm Ethos-U Accel Driver"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/accel/ethosu/ethosu_drv.h b/drivers/accel/ethosu/ethosu_drv.h new file mode 100644 index 000000000000..9e21dfe94184 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_drv.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright 2025 Arm, Ltd. */ +#ifndef __ETHOSU_DRV_H__ +#define __ETHOSU_DRV_H__ + +#include + +struct ethosu_device; + +struct ethosu_file_priv { + struct ethosu_device *edev; + struct drm_sched_entity sched_entity; +}; + +#endif diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c new file mode 100644 index 000000000000..473b5f5d7514 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_gem.c @@ -0,0 +1,704 @@ +// SPDX-License-Identifier: GPL-2.0-only or MIT +/* Copyright 2025 Arm, Ltd. */ + +#include +#include + +#include + +#include "ethosu_device.h" +#include "ethosu_gem.h" + +static void ethosu_gem_free_object(struct drm_gem_object *obj) +{ + struct ethosu_gem_object *bo = to_ethosu_bo(obj); + + kfree(bo->info); + drm_gem_free_mmap_offset(&bo->base.base); + drm_gem_dma_free(&bo->base); +} + +static int ethosu_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct ethosu_gem_object *bo = to_ethosu_bo(obj); + + /* Don't allow mmap on objects that have the NO_MMAP flag set. */ + if (bo->flags & DRM_ETHOSU_BO_NO_MMAP) + return -EINVAL; + + return drm_gem_dma_object_mmap(obj, vma); +} + +static const struct drm_gem_object_funcs ethosu_gem_funcs = { + .free = ethosu_gem_free_object, + .print_info = drm_gem_dma_object_print_info, + .get_sg_table = drm_gem_dma_object_get_sg_table, + .vmap = drm_gem_dma_object_vmap, + .mmap = ethosu_gem_mmap, + .vm_ops = &drm_gem_dma_vm_ops, +}; + +/** + * ethosu_gem_create_object - Implementation of driver->gem_create_object. + * @ddev: DRM device + * @size: Size in bytes of the memory the object will reference + * + * This lets the GEM helpers allocate object structs for us, and keep + * our BO stats correct. + */ +struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, size_t size) +{ + struct ethosu_gem_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return ERR_PTR(-ENOMEM); + + obj->base.base.funcs = ðosu_gem_funcs; + return &obj->base.base; +} + +/** + * ethosu_gem_create_with_handle() - Create a GEM object and attach it to a handle. + * @file: DRM file. + * @ddev: DRM device. + * @size: Size of the GEM object to allocate. + * @flags: Combination of drm_ethosu_bo_flags flags. + * @handle: Pointer holding the handle pointing to the new GEM object. + * + * Return: Zero on success + */ +int ethosu_gem_create_with_handle(struct drm_file *file, + struct drm_device *ddev, + u64 *size, u32 flags, u32 *handle) +{ + struct drm_gem_dma_object *mem; + struct ethosu_gem_object *bo; + int ret; + + mem = drm_gem_dma_create(ddev, *size); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + bo = to_ethosu_bo(&mem->base); + bo->flags = flags; + + /* + * Allocate an id of idr table where the obj is registered + * and handle has the id what user can see. + */ + ret = drm_gem_handle_create(file, &mem->base, handle); + if (!ret) + *size = bo->base.base.size; + + /* drop reference from allocate - handle holds it now. */ + drm_gem_object_put(&mem->base); + + return ret; +} + +struct dma { + s8 region; + u64 len; + u64 offset; + s64 stride[2]; +}; + +struct dma_state { + u16 size0; + u16 size1; + s8 mode; + struct dma src; + struct dma dst; +}; + +struct buffer { + u64 base; + u32 length; + s8 region; +}; + +struct feat_matrix { + u64 base[4]; + s64 stride_x; + s64 stride_y; + s64 stride_c; + s8 region; + u8 broadcast; + u16 stride_kernel; + u16 precision; + u16 depth; + u16 width; + u16 width0; + u16 height[3]; + u8 pad_top; + u8 pad_left; + u8 pad_bottom; + u8 pad_right; +}; + +struct cmd_state { + struct dma_state dma; + struct buffer scale[2]; + struct buffer weight[4]; + struct feat_matrix ofm; + struct feat_matrix ifm; + struct feat_matrix ifm2; +}; + +static void cmd_state_init(struct cmd_state *st) +{ + /* Initialize to all 1s to detect missing setup */ + memset(st, 0xff, sizeof(*st)); +} + +static u64 cmd_to_addr(u32 *cmd) +{ + return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1]; +} + +static u64 dma_length(struct ethosu_validated_cmdstream_info *info, + struct dma_state *dma_st, struct dma *dma) +{ + s8 mode = dma_st->mode; + u64 len = dma->len; + + if (mode >= 1) { + len += dma->stride[0]; + len *= dma_st->size0; + } + if (mode == 2) { + len += dma->stride[1]; + len *= dma_st->size1; + } + if (dma->region >= 0) + info->region_size[dma->region] = max(info->region_size[dma->region], + len + dma->offset); + + return len; +} + +static u64 feat_matrix_length(struct ethosu_validated_cmdstream_info *info, + struct feat_matrix *fm, + u32 x, u32 y, u32 c) +{ + u32 element_size, storage = fm->precision >> 14; + int tile = 0; + u64 addr; + + if (fm->region < 0) + return U64_MAX; + + switch (storage) { + case 0: + if (x >= fm->width0 + 1) { + x -= fm->width0 + 1; + tile += 1; + } + if (y >= fm->height[tile] + 1) { + y -= fm->height[tile] + 1; + tile += 2; + } + break; + case 1: + if (y >= fm->height[1] + 1) { + y -= fm->height[1] + 1; + tile = 2; + } else if (y >= fm->height[0] + 1) { + y -= fm->height[0] + 1; + tile = 1; + } + break; + } + if (fm->base[tile] == U64_MAX) + return U64_MAX; + + addr = fm->base[tile] + y * fm->stride_y; + + switch ((fm->precision >> 6) & 0x3) { // format + case 0: //nhwc: + addr += x * fm->stride_x + c; + break; + case 1: //nhcwb16: + element_size = BIT((fm->precision >> 1) & 0x3); + + addr += (c / 16) * fm->stride_c + (16 * x + (c & 0xf)) * element_size; + break; + } + + info->region_size[fm->region] = max(info->region_size[fm->region], addr + 1); + + return addr; +} + +static int calc_sizes(struct drm_device *ddev, + struct ethosu_validated_cmdstream_info *info, + u16 op, struct cmd_state *st, + bool ifm, bool ifm2, bool weight, bool scale) +{ + u64 len; + + if (ifm) { + if (st->ifm.stride_kernel == U16_MAX) + return -EINVAL; + u32 stride_y = ((st->ifm.stride_kernel >> 8) & 0x2) + + ((st->ifm.stride_kernel >> 1) & 0x1) + 1; + u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) + + (st->ifm.stride_kernel & 0x1) + 1; + u32 ifm_height = st->ofm.height[2] * stride_y + + st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom); + u32 ifm_width = st->ofm.width * stride_x + + st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right); + + len = feat_matrix_length(info, &st->ifm, ifm_width, + ifm_height, st->ifm.depth); + dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n", + op, st->ifm.region, st->ifm.base[0], len); + if (len == U64_MAX) + return -EINVAL; + } + + if (ifm2) { + len = feat_matrix_length(info, &st->ifm2, st->ifm.depth, + 0, st->ofm.depth); + dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n", + op, st->ifm2.region, st->ifm2.base[0], len); + if (len == U64_MAX) + return -EINVAL; + } + + if (weight) { + dev_dbg(ddev->dev, "op %d: W:%d:0x%llx-0x%llx\n", + op, st->weight[0].region, st->weight[0].base, + st->weight[0].base + st->weight[0].length - 1); + if (st->weight[0].region < 0 || st->weight[0].base == U64_MAX || + st->weight[0].length == U32_MAX) + return -EINVAL; + info->region_size[st->weight[0].region] = + max(info->region_size[st->weight[0].region], + st->weight[0].base + st->weight[0].length); + } + + if (scale) { + dev_dbg(ddev->dev, "op %d: S:%d:0x%llx-0x%llx\n", + op, st->scale[0].region, st->scale[0].base, + st->scale[0].base + st->scale[0].length - 1); + if (st->scale[0].region < 0 || st->scale[0].base == U64_MAX || + st->scale[0].length == U32_MAX) + return -EINVAL; + info->region_size[st->scale[0].region] = + max(info->region_size[st->scale[0].region], + st->scale[0].base + st->scale[0].length); + } + + len = feat_matrix_length(info, &st->ofm, st->ofm.width, + st->ofm.height[2], st->ofm.depth); + dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n", + op, st->ofm.region, st->ofm.base[0], len); + if (len == U64_MAX) + return -EINVAL; + info->output_region[st->ofm.region] = true; + + return 0; +} + +static int calc_sizes_elemwise(struct drm_device *ddev, + struct ethosu_validated_cmdstream_info *info, + u16 op, struct cmd_state *st, + bool ifm, bool ifm2) +{ + u32 height, width, depth; + u64 len; + + if (ifm) { + height = st->ifm.broadcast & 0x1 ? 0 : st->ofm.height[2]; + width = st->ifm.broadcast & 0x2 ? 0 : st->ofm.width; + depth = st->ifm.broadcast & 0x4 ? 0 : st->ofm.depth; + + len = feat_matrix_length(info, &st->ifm, width, + height, depth); + dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n", + op, st->ifm.region, st->ifm.base[0], len); + if (len == U64_MAX) + return -EINVAL; + } + + if (ifm2) { + height = st->ifm2.broadcast & 0x1 ? 0 : st->ofm.height[2]; + width = st->ifm2.broadcast & 0x2 ? 0 : st->ofm.width; + depth = st->ifm2.broadcast & 0x4 ? 0 : st->ofm.depth; + + len = feat_matrix_length(info, &st->ifm2, width, + height, depth); + dev_dbg(ddev->dev, "op %d: IFM2:%d:0x%llx-0x%llx\n", + op, st->ifm2.region, st->ifm2.base[0], len); + if (len == U64_MAX) + return -EINVAL; + } + + len = feat_matrix_length(info, &st->ofm, st->ofm.width, + st->ofm.height[2], st->ofm.depth); + dev_dbg(ddev->dev, "op %d: OFM:%d:0x%llx-0x%llx\n", + op, st->ofm.region, st->ofm.base[0], len); + if (len == U64_MAX) + return -EINVAL; + info->output_region[st->ofm.region] = true; + + return 0; +} + +static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev, + u32 __user *ucmds, + struct ethosu_gem_object *bo, + u32 size) +{ + struct ethosu_validated_cmdstream_info __free(kfree) *info = kzalloc(sizeof(*info), GFP_KERNEL); + struct ethosu_device *edev = to_ethosu_device(ddev); + u32 *bocmds = bo->base.vaddr; + struct cmd_state st; + int i, ret; + + if (!info) + return -ENOMEM; + info->cmd_size = size; + + cmd_state_init(&st); + + for (i = 0; i < size / 4; i++) { + bool use_ifm, use_ifm2, use_scale; + u64 dstlen, srclen; + u16 cmd, param; + u32 cmds[2]; + u64 addr; + + if (get_user(cmds[0], ucmds++)) + return -EFAULT; + + bocmds[i] = cmds[0]; + + cmd = cmds[0]; + param = cmds[0] >> 16; + + if (cmd & 0x4000) { + if (get_user(cmds[1], ucmds++)) + return -EFAULT; + + i++; + bocmds[i] = cmds[1]; + addr = cmd_to_addr(cmds); + } + + switch (cmd) { + case NPU_OP_DMA_START: + srclen = dma_length(info, &st.dma, &st.dma.src); + dstlen = dma_length(info, &st.dma, &st.dma.dst); + + if (st.dma.dst.region >= 0) + info->output_region[st.dma.dst.region] = true; + dev_dbg(ddev->dev, "cmd: DMA SRC:%d:0x%llx+0x%llx DST:%d:0x%llx+0x%llx\n", + st.dma.src.region, st.dma.src.offset, srclen, + st.dma.dst.region, st.dma.dst.offset, dstlen); + break; + case NPU_OP_CONV: + case NPU_OP_DEPTHWISE: + use_ifm2 = param & 0x1; // weights_ifm2 + use_scale = !(st.ofm.precision & 0x100); + ret = calc_sizes(ddev, info, cmd, &st, true, use_ifm2, + !use_ifm2, use_scale); + if (ret) + return ret; + break; + case NPU_OP_POOL: + use_ifm = param != 0x4; // pooling mode + use_scale = !(st.ofm.precision & 0x100); + ret = calc_sizes(ddev, info, cmd, &st, use_ifm, false, + false, use_scale); + if (ret) + return ret; + break; + case NPU_OP_ELEMENTWISE: + use_ifm2 = !((st.ifm2.broadcast == 8) || (param == 5) || + (param == 6) || (param == 7) || (param == 0x24)); + use_ifm = st.ifm.broadcast != 8; + ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2); + if (ret) + return ret; + break; + case NPU_OP_RESIZE: // U85 only + WARN_ON(1); // TODO + break; + case NPU_SET_KERNEL_WIDTH_M1: + st.ifm.width = param; + break; + case NPU_SET_KERNEL_HEIGHT_M1: + st.ifm.height[2] = param; + break; + case NPU_SET_KERNEL_STRIDE: + st.ifm.stride_kernel = param; + break; + case NPU_SET_IFM_PAD_TOP: + st.ifm.pad_top = param & 0x7f; + break; + case NPU_SET_IFM_PAD_LEFT: + st.ifm.pad_left = param & 0x7f; + break; + case NPU_SET_IFM_PAD_RIGHT: + st.ifm.pad_right = param & 0xff; + break; + case NPU_SET_IFM_PAD_BOTTOM: + st.ifm.pad_bottom = param & 0xff; + break; + case NPU_SET_IFM_DEPTH_M1: + st.ifm.depth = param; + break; + case NPU_SET_IFM_PRECISION: + st.ifm.precision = param; + break; + case NPU_SET_IFM_BROADCAST: + st.ifm.broadcast = param; + break; + case NPU_SET_IFM_REGION: + st.ifm.region = param & 0x7f; + break; + case NPU_SET_IFM_WIDTH0_M1: + st.ifm.width0 = param; + break; + case NPU_SET_IFM_HEIGHT0_M1: + st.ifm.height[0] = param; + break; + case NPU_SET_IFM_HEIGHT1_M1: + st.ifm.height[1] = param; + break; + case NPU_SET_IFM_BASE0: + case NPU_SET_IFM_BASE1: + case NPU_SET_IFM_BASE2: + case NPU_SET_IFM_BASE3: + st.ifm.base[cmd & 0x3] = addr; + break; + case NPU_SET_IFM_STRIDE_X: + st.ifm.stride_x = addr; + break; + case NPU_SET_IFM_STRIDE_Y: + st.ifm.stride_y = addr; + break; + case NPU_SET_IFM_STRIDE_C: + st.ifm.stride_c = addr; + break; + + case NPU_SET_OFM_WIDTH_M1: + st.ofm.width = param; + break; + case NPU_SET_OFM_HEIGHT_M1: + st.ofm.height[2] = param; + break; + case NPU_SET_OFM_DEPTH_M1: + st.ofm.depth = param; + break; + case NPU_SET_OFM_PRECISION: + st.ofm.precision = param; + break; + case NPU_SET_OFM_REGION: + st.ofm.region = param & 0x7; + break; + case NPU_SET_OFM_WIDTH0_M1: + st.ofm.width0 = param; + break; + case NPU_SET_OFM_HEIGHT0_M1: + st.ofm.height[0] = param; + break; + case NPU_SET_OFM_HEIGHT1_M1: + st.ofm.height[1] = param; + break; + case NPU_SET_OFM_BASE0: + case NPU_SET_OFM_BASE1: + case NPU_SET_OFM_BASE2: + case NPU_SET_OFM_BASE3: + st.ofm.base[cmd & 0x3] = addr; + break; + case NPU_SET_OFM_STRIDE_X: + st.ofm.stride_x = addr; + break; + case NPU_SET_OFM_STRIDE_Y: + st.ofm.stride_y = addr; + break; + case NPU_SET_OFM_STRIDE_C: + st.ofm.stride_c = addr; + break; + + case NPU_SET_IFM2_BROADCAST: + st.ifm2.broadcast = param; + break; + case NPU_SET_IFM2_PRECISION: + st.ifm2.precision = param; + break; + case NPU_SET_IFM2_REGION: + st.ifm2.region = param & 0x7; + break; + case NPU_SET_IFM2_WIDTH0_M1: + st.ifm2.width0 = param; + break; + case NPU_SET_IFM2_HEIGHT0_M1: + st.ifm2.height[0] = param; + break; + case NPU_SET_IFM2_HEIGHT1_M1: + st.ifm2.height[1] = param; + break; + case NPU_SET_IFM2_BASE0: + case NPU_SET_IFM2_BASE1: + case NPU_SET_IFM2_BASE2: + case NPU_SET_IFM2_BASE3: + st.ifm2.base[cmd & 0x3] = addr; + break; + case NPU_SET_IFM2_STRIDE_X: + st.ifm2.stride_x = addr; + break; + case NPU_SET_IFM2_STRIDE_Y: + st.ifm2.stride_y = addr; + break; + case NPU_SET_IFM2_STRIDE_C: + st.ifm2.stride_c = addr; + break; + + case NPU_SET_WEIGHT_REGION: + st.weight[0].region = param & 0x7; + break; + case NPU_SET_SCALE_REGION: + st.scale[0].region = param & 0x7; + break; + case NPU_SET_WEIGHT_BASE: + st.weight[0].base = addr; + break; + case NPU_SET_WEIGHT_LENGTH: + st.weight[0].length = cmds[1]; + break; + case NPU_SET_SCALE_BASE: + st.scale[0].base = addr; + break; + case NPU_SET_SCALE_LENGTH: + st.scale[0].length = cmds[1]; + break; + case NPU_SET_WEIGHT1_BASE: + st.weight[1].base = addr; + break; + case NPU_SET_WEIGHT1_LENGTH: + st.weight[1].length = cmds[1]; + break; + case NPU_SET_SCALE1_BASE: // NPU_SET_WEIGHT2_BASE (U85) + if (ethosu_is_u65(edev)) + st.scale[1].base = addr; + else + st.weight[2].base = addr; + break; + case NPU_SET_SCALE1_LENGTH: // NPU_SET_WEIGHT2_LENGTH (U85) + if (ethosu_is_u65(edev)) + st.scale[1].length = cmds[1]; + else + st.weight[1].length = cmds[1]; + break; + case NPU_SET_WEIGHT3_BASE: + st.weight[3].base = addr; + break; + case NPU_SET_WEIGHT3_LENGTH: + st.weight[3].length = cmds[1]; + break; + + case NPU_SET_DMA0_SRC_REGION: + if (param & 0x100) + st.dma.src.region = -1; + else + st.dma.src.region = param & 0x7; + st.dma.mode = (param >> 9) & 0x3; + break; + case NPU_SET_DMA0_DST_REGION: + if (param & 0x100) + st.dma.dst.region = -1; + else + st.dma.dst.region = param & 0x7; + break; + case NPU_SET_DMA0_SIZE0: + st.dma.size0 = param; + break; + case NPU_SET_DMA0_SIZE1: + st.dma.size1 = param; + break; + case NPU_SET_DMA0_SRC_STRIDE0: + st.dma.src.stride[0] = ((s64)addr << 24) >> 24; + break; + case NPU_SET_DMA0_SRC_STRIDE1: + st.dma.src.stride[1] = ((s64)addr << 24) >> 24; + break; + case NPU_SET_DMA0_DST_STRIDE0: + st.dma.dst.stride[0] = ((s64)addr << 24) >> 24; + break; + case NPU_SET_DMA0_DST_STRIDE1: + st.dma.dst.stride[1] = ((s64)addr << 24) >> 24; + break; + case NPU_SET_DMA0_SRC: + st.dma.src.offset = addr; + break; + case NPU_SET_DMA0_DST: + st.dma.dst.offset = addr; + break; + case NPU_SET_DMA0_LEN: + st.dma.src.len = st.dma.dst.len = addr; + break; + default: + break; + } + } + + for (i = 0; i < NPU_BASEP_REGION_MAX; i++) { + if (!info->region_size[i]) + continue; + dev_dbg(ddev->dev, "region %d max size: 0x%llx\n", + i, info->region_size[i]); + } + + bo->info = no_free_ptr(info); + return 0; +} + +/** + * ethosu_gem_cmdstream_create() - Create a GEM object and attach it to a handle. + * @file: DRM file. + * @ddev: DRM device. + * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared. + * @size: Size of the GEM object to allocate. + * @flags: Combination of drm_ethosu_bo_flags flags. + * @handle: Pointer holding the handle pointing to the new GEM object. + * + * Return: Zero on success + */ +int ethosu_gem_cmdstream_create(struct drm_file *file, + struct drm_device *ddev, + u32 size, u64 data, u32 flags, u32 *handle) +{ + int ret; + struct drm_gem_dma_object *mem; + struct ethosu_gem_object *bo; + + mem = drm_gem_dma_create(ddev, size); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + bo = to_ethosu_bo(&mem->base); + bo->flags = flags; + + ret = ethosu_gem_cmdstream_copy_and_validate(ddev, + (void __user *)(uintptr_t)data, + bo, size); + if (ret) + goto fail; + + /* + * Allocate an id of idr table where the obj is registered + * and handle has the id what user can see. + */ + ret = drm_gem_handle_create(file, &mem->base, handle); + +fail: + /* drop reference from allocate - handle holds it now. */ + drm_gem_object_put(&mem->base); + + return ret; +} diff --git a/drivers/accel/ethosu/ethosu_gem.h b/drivers/accel/ethosu/ethosu_gem.h new file mode 100644 index 000000000000..3922895a60fb --- /dev/null +++ b/drivers/accel/ethosu/ethosu_gem.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* Copyright 2025 Arm, Ltd. */ + +#ifndef __ETHOSU_GEM_H__ +#define __ETHOSU_GEM_H__ + +#include "ethosu_device.h" +#include + +struct ethosu_validated_cmdstream_info { + u32 cmd_size; + u64 region_size[NPU_BASEP_REGION_MAX]; + bool output_region[NPU_BASEP_REGION_MAX]; +}; + +/** + * struct ethosu_gem_object - Driver specific GEM object. + */ +struct ethosu_gem_object { + /** @base: Inherit from drm_gem_shmem_object. */ + struct drm_gem_dma_object base; + + struct ethosu_validated_cmdstream_info *info; + + /** @flags: Combination of drm_ethosu_bo_flags flags. */ + u32 flags; +}; + +static inline +struct ethosu_gem_object *to_ethosu_bo(struct drm_gem_object *obj) +{ + return container_of(to_drm_gem_dma_obj(obj), struct ethosu_gem_object, base); +} + +struct drm_gem_object *ethosu_gem_create_object(struct drm_device *ddev, + size_t size); + +int ethosu_gem_create_with_handle(struct drm_file *file, + struct drm_device *ddev, + u64 *size, u32 flags, uint32_t *handle); + +int ethosu_gem_cmdstream_create(struct drm_file *file, + struct drm_device *ddev, + u32 size, u64 data, u32 flags, u32 *handle); + +#endif /* __ETHOSU_GEM_H__ */ diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c new file mode 100644 index 000000000000..32b89cbfbaad --- /dev/null +++ b/drivers/accel/ethosu/ethosu_job.c @@ -0,0 +1,496 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* Copyright 2024-2025 Tomeu Vizoso */ +/* Copyright 2025 Arm, Ltd. */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ethosu_device.h" +#include "ethosu_drv.h" +#include "ethosu_gem.h" +#include "ethosu_job.h" + +#define JOB_TIMEOUT_MS 500 + +static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct ethosu_job, base); +} + +static const char *ethosu_fence_get_driver_name(struct dma_fence *fence) +{ + return "ethosu"; +} + +static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence) +{ + return "ethosu-npu"; +} + +static const struct dma_fence_ops ethosu_fence_ops = { + .get_driver_name = ethosu_fence_get_driver_name, + .get_timeline_name = ethosu_fence_get_timeline_name, +}; + +static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job) +{ + struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo); + struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info; + + for (int i = 0; i < job->region_cnt; i++) { + struct drm_gem_dma_object *bo; + int region = job->region_bo_num[i]; + + bo = to_drm_gem_dma_obj(job->region_bo[i]); + writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region)); + writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region)); + dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr); + } + + if (job->sram_size) { + writel_relaxed(lower_32_bits(dev->sramphys), + dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION)); + writel_relaxed(upper_32_bits(dev->sramphys), + dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION)); + dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n", + ETHOSU_SRAM_REGION, &dev->sramphys); + } + + writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE); + writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI); + writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE); + + writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD); + + dev_dbg(dev->base.dev, + "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr); +} + +static int ethosu_acquire_object_fences(struct ethosu_job *job) +{ + int i, ret; + struct drm_gem_object **bos = job->region_bo; + struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; + + for (i = 0; i < job->region_cnt; i++) { + bool is_write; + + if (!bos[i]) + break; + + ret = dma_resv_reserve_fences(bos[i]->resv, 1); + if (ret) + return ret; + + is_write = info->output_region[job->region_bo_num[i]]; + ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i], + is_write); + if (ret) + return ret; + } + + return 0; +} + +static void ethosu_attach_object_fences(struct ethosu_job *job) +{ + int i; + struct dma_fence *fence = job->inference_done_fence; + struct drm_gem_object **bos = job->region_bo; + struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; + + for (i = 0; i < job->region_cnt; i++) + if (info->output_region[job->region_bo_num[i]]) + dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); +} + +static int ethosu_job_push(struct ethosu_job *job) +{ + struct ww_acquire_ctx acquire_ctx; + int ret; + + ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); + if (ret) + return ret; + + ret = ethosu_acquire_object_fences(job); + if (ret) + goto out; + + ret = pm_runtime_resume_and_get(job->dev->base.dev); + if (!ret) { + guard(mutex)(&job->dev->sched_lock); + + drm_sched_job_arm(&job->base); + job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); + kref_get(&job->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&job->base); + ethosu_attach_object_fences(job); + } + +out: + drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); + return ret; +} + +static void ethosu_job_cleanup(struct kref *ref) +{ + struct ethosu_job *job = container_of(ref, struct ethosu_job, + refcount); + unsigned int i; + + pm_runtime_put_autosuspend(job->dev->base.dev); + + dma_fence_put(job->done_fence); + dma_fence_put(job->inference_done_fence); + + for (i = 0; i < job->region_cnt; i++) + drm_gem_object_put(job->region_bo[i]); + + drm_gem_object_put(job->cmd_bo); + + kfree(job); +} + +static void ethosu_job_put(struct ethosu_job *job) +{ + kref_put(&job->refcount, ethosu_job_cleanup); +} + +static void ethosu_job_free(struct drm_sched_job *sched_job) +{ + struct ethosu_job *job = to_ethosu_job(sched_job); + + drm_sched_job_cleanup(sched_job); + ethosu_job_put(job); +} + +static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job) +{ + struct ethosu_job *job = to_ethosu_job(sched_job); + struct ethosu_device *dev = job->dev; + struct dma_fence *fence = job->done_fence; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + dma_fence_init(fence, ðosu_fence_ops, &dev->fence_lock, + dev->fence_context, ++dev->emit_seqno); + dma_fence_get(fence); + + scoped_guard(mutex, &dev->job_lock) { + dev->in_flight_job = job; + ethosu_job_hw_submit(dev, job); + } + + return fence; +} + +static void ethosu_job_handle_irq(struct ethosu_device *dev) +{ + u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); + + if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) { + dev_err(dev->base.dev, "Error IRQ - %x\n", status); + drm_sched_fault(&dev->sched); + return; + } + + scoped_guard(mutex, &dev->job_lock) { + if (dev->in_flight_job) { + dma_fence_signal(dev->in_flight_job->done_fence); + dev->in_flight_job = NULL; + } + } +} + +static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data) +{ + struct ethosu_device *dev = data; + + ethosu_job_handle_irq(dev); + + return IRQ_HANDLED; +} + +static irqreturn_t ethosu_job_irq_handler(int irq, void *data) +{ + struct ethosu_device *dev = data; + u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); + + if (!(status & STATUS_IRQ_RAISED)) + return IRQ_NONE; + + writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD); + return IRQ_WAKE_THREAD; +} + +static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad) +{ + struct ethosu_job *job = to_ethosu_job(bad); + struct ethosu_device *dev = job->dev; + bool running; + u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr; + u32 cmdaddr; + + cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD); + running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS)); + + if (running) { + int ret; + u32 reg; + + ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD, + reg, + reg != cmdaddr, + USEC_PER_MSEC, 100 * USEC_PER_MSEC); + + /* If still running and progress is being made, just return */ + if (!ret) + return DRM_GPU_SCHED_STAT_NO_HANG; + } + + dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n", + running ? "running" : "stopped", + cmdaddr, bocmds[cmdaddr / 4]); + + drm_sched_stop(&dev->sched, bad); + + scoped_guard(mutex, &dev->job_lock) + dev->in_flight_job = NULL; + + /* Proceed with reset now. */ + pm_runtime_force_suspend(dev->base.dev); + pm_runtime_force_resume(dev->base.dev); + + /* Restart the scheduler */ + drm_sched_start(&dev->sched, 0); + + return DRM_GPU_SCHED_STAT_RESET; +} + +static const struct drm_sched_backend_ops ethosu_sched_ops = { + .run_job = ethosu_job_run, + .timedout_job = ethosu_job_timedout, + .free_job = ethosu_job_free +}; + +int ethosu_job_init(struct ethosu_device *edev) +{ + struct device *dev = edev->base.dev; + struct drm_sched_init_args args = { + .ops = ðosu_sched_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .credit_limit = 1, + .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), + .name = dev_name(dev), + .dev = dev, + }; + int ret; + + spin_lock_init(&edev->fence_lock); + ret = devm_mutex_init(dev, &edev->job_lock); + if (ret) + return ret; + ret = devm_mutex_init(dev, &edev->sched_lock); + if (ret) + return ret; + + edev->irq = platform_get_irq(to_platform_device(dev), 0); + if (edev->irq < 0) + return edev->irq; + + ret = devm_request_threaded_irq(dev, edev->irq, + ethosu_job_irq_handler, + ethosu_job_irq_handler_thread, + IRQF_SHARED, KBUILD_MODNAME, + edev); + if (ret) { + dev_err(dev, "failed to request irq\n"); + return ret; + } + + edev->fence_context = dma_fence_context_alloc(1); + + ret = drm_sched_init(&edev->sched, &args); + if (ret) { + dev_err(dev, "Failed to create scheduler: %d\n", ret); + goto err_sched; + } + + return 0; + +err_sched: + drm_sched_fini(&edev->sched); + return ret; +} + +void ethosu_job_fini(struct ethosu_device *dev) +{ + drm_sched_fini(&dev->sched); +} + +int ethosu_job_open(struct ethosu_file_priv *ethosu_priv) +{ + struct ethosu_device *dev = ethosu_priv->edev; + struct drm_gpu_scheduler *sched = &dev->sched; + int ret; + + ret = drm_sched_entity_init(ðosu_priv->sched_entity, + DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + return WARN_ON(ret); +} + +void ethosu_job_close(struct ethosu_file_priv *ethosu_priv) +{ + struct drm_sched_entity *entity = ðosu_priv->sched_entity; + + drm_sched_entity_destroy(entity); +} + +static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, + struct drm_ethosu_job *job) +{ + struct ethosu_device *edev = to_ethosu_device(dev); + struct ethosu_file_priv *file_priv = file->driver_priv; + struct ethosu_job *ejob = NULL; + struct ethosu_validated_cmdstream_info *cmd_info; + int ret = 0; + + /* BO region 2 is reserved if SRAM is used */ + if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size) + return -EINVAL; + + if (edev->npu_info.sram_size < job->sram_size) + return -EINVAL; + + ejob = kzalloc(sizeof(*ejob), GFP_KERNEL); + if (!ejob) + return -ENOMEM; + + kref_init(&ejob->refcount); + + ejob->dev = edev; + ejob->sram_size = job->sram_size; + + ejob->done_fence = kzalloc(sizeof(*ejob->done_fence), GFP_KERNEL); + if (!ejob->done_fence) { + ret = -ENOMEM; + goto out_cleanup_job; + } + + ret = drm_sched_job_init(&ejob->base, + &file_priv->sched_entity, + 1, NULL, file->client_id); + if (ret) + goto out_put_job; + + ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo); + if (!ejob->cmd_bo) { + ret = -ENOENT; + goto out_cleanup_job; + } + cmd_info = to_ethosu_bo(ejob->cmd_bo)->info; + if (!cmd_info) { + ret = -EINVAL; + goto out_cleanup_job; + } + + for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) { + struct drm_gem_object *gem; + + /* Can only omit a BO handle if the region is not used or used for SRAM */ + if (!job->region_bo_handles[i] && + (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size))) + continue; + + if (job->region_bo_handles[i] && !cmd_info->region_size[i]) { + dev_err(dev->dev, + "Cmdstream BO handle %d set for unused region %d\n", + job->region_bo_handles[i], i); + ret = -EINVAL; + goto out_cleanup_job; + } + + gem = drm_gem_object_lookup(file, job->region_bo_handles[i]); + if (!gem) { + dev_err(dev->dev, + "Invalid BO handle %d for region %d\n", + job->region_bo_handles[i], i); + ret = -ENOENT; + goto out_cleanup_job; + } + + ejob->region_bo[ejob->region_cnt] = gem; + ejob->region_bo_num[ejob->region_cnt] = i; + ejob->region_cnt++; + + if (to_ethosu_bo(gem)->info) { + dev_err(dev->dev, + "Cmdstream BO handle %d used for region %d\n", + job->region_bo_handles[i], i); + ret = -EINVAL; + goto out_cleanup_job; + } + + /* Verify the command stream doesn't have accesses outside the BO */ + if (cmd_info->region_size[i] > gem->size) { + dev_err(dev->dev, + "cmd stream region %d size greater than BO size (%llu > %zu)\n", + i, cmd_info->region_size[i], gem->size); + ret = -EOVERFLOW; + goto out_cleanup_job; + } + } + ret = ethosu_job_push(ejob); + +out_cleanup_job: + if (ret) + drm_sched_job_cleanup(&ejob->base); +out_put_job: + ethosu_job_put(ejob); + + return ret; +} + +int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ethosu_submit *args = data; + int ret = 0; + unsigned int i = 0; + + if (args->pad) { + drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n"); + return -EINVAL; + } + + struct drm_ethosu_job __free(kvfree) *jobs = + kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); + if (!jobs) + return -ENOMEM; + + if (copy_from_user(jobs, + (void __user *)(uintptr_t)args->jobs, + args->job_count * sizeof(*jobs))) { + drm_dbg(dev, "Failed to copy incoming job array\n"); + return -EFAULT; + } + + for (i = 0; i < args->job_count; i++) { + ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/accel/ethosu/ethosu_job.h b/drivers/accel/ethosu/ethosu_job.h new file mode 100644 index 000000000000..ff1cf448d094 --- /dev/null +++ b/drivers/accel/ethosu/ethosu_job.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* Copyright 2024-2025 Tomeu Vizoso */ +/* Copyright 2025 Arm, Ltd. */ + +#ifndef __ETHOSU_JOB_H__ +#define __ETHOSU_JOB_H__ + +#include +#include + +struct ethosu_device; +struct ethosu_file_priv; + +struct ethosu_job { + struct drm_sched_job base; + struct ethosu_device *dev; + + struct drm_gem_object *cmd_bo; + struct drm_gem_object *region_bo[NPU_BASEP_REGION_MAX]; + u8 region_bo_num[NPU_BASEP_REGION_MAX]; + u8 region_cnt; + u32 sram_size; + + /* Fence to be signaled by drm-sched once its done with the job */ + struct dma_fence *inference_done_fence; + + /* Fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *done_fence; + + struct kref refcount; +}; + +int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file); + +int ethosu_job_init(struct ethosu_device *dev); +void ethosu_job_fini(struct ethosu_device *dev); +int ethosu_job_open(struct ethosu_file_priv *ethosu_priv); +void ethosu_job_close(struct ethosu_file_priv *ethosu_priv); + +#endif diff --git a/include/uapi/drm/ethosu_accel.h b/include/uapi/drm/ethosu_accel.h new file mode 100644 index 000000000000..af78bb4686d7 --- /dev/null +++ b/include/uapi/drm/ethosu_accel.h @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright (C) 2025 Arm, Ltd. */ +#ifndef _ETHOSU_DRM_H_ +#define _ETHOSU_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: IOCTL IDs + * + * enum drm_ethosu_ioctl_id - IOCTL IDs + * + * Place new ioctls at the end, don't re-order, don't replace or remove entries. + * + * These IDs are not meant to be used directly. Use the DRM_IOCTL_ETHOSU_xxx + * definitions instead. + */ +enum drm_ethosu_ioctl_id { + /** @DRM_ETHOSU_DEV_QUERY: Query device information. */ + DRM_ETHOSU_DEV_QUERY = 0, + + /** @DRM_ETHOSU_BO_CREATE: Create a buffer object. */ + DRM_ETHOSU_BO_CREATE, + + /** @DRM_ETHOSU_BO_WAIT: Wait on a buffer object's fence. */ + DRM_ETHOSU_BO_WAIT, + + /** + * @DRM_ETHOSU_BO_MMAP_OFFSET: Get the file offset to pass to + * mmap to map a GEM object. + */ + DRM_ETHOSU_BO_MMAP_OFFSET, + + /** + * @DRM_ETHOSU_CMDSTREAM_BO_CREATE: Create a command stream buffer + * object. + */ + DRM_ETHOSU_CMDSTREAM_BO_CREATE, + + /** @DRM_ETHOSU_SUBMIT: Submit a job and BOs to run. */ + DRM_ETHOSU_SUBMIT, +}; + +/** + * DOC: IOCTL arguments + */ + +/** + * enum drm_ethosu_dev_query_type - Query type + * + * Place new types at the end, don't re-order, don't remove or replace. + */ +enum drm_ethosu_dev_query_type { + /** @DRM_ETHOSU_DEV_QUERY_NPU_INFO: Query NPU information. */ + DRM_ETHOSU_DEV_QUERY_NPU_INFO = 0, +}; + +/** + * struct drm_ethosu_gpu_info - NPU information + * + * Structure grouping all queryable information relating to the NPU. + */ +struct drm_ethosu_npu_info { + /** @id : NPU ID. */ + __u32 id; +#define DRM_ETHOSU_ARCH_MAJOR(x) ((x) >> 28) +#define DRM_ETHOSU_ARCH_MINOR(x) (((x) >> 20) & 0xff) +#define DRM_ETHOSU_ARCH_PATCH(x) (((x) >> 16) & 0xf) +#define DRM_ETHOSU_PRODUCT_MAJOR(x) (((x) >> 12) & 0xf) +#define DRM_ETHOSU_VERSION_MAJOR(x) (((x) >> 8) & 0xf) +#define DRM_ETHOSU_VERSION_MINOR(x) (((x) >> 4) & 0xff) +#define DRM_ETHOSU_VERSION_STATUS(x) ((x) & 0xf) + + /** @gpu_rev: GPU revision. */ + __u32 config; + + __u32 sram_size; +}; + +/** + * struct drm_ethosu_dev_query - Arguments passed to DRM_ETHOSU_IOCTL_DEV_QUERY + */ +struct drm_ethosu_dev_query { + /** @type: the query type (see drm_ethosu_dev_query_type). */ + __u32 type; + + /** + * @size: size of the type being queried. + * + * If pointer is NULL, size is updated by the driver to provide the + * output structure size. If pointer is not NULL, the driver will + * only copy min(size, actual_structure_size) bytes to the pointer, + * and update the size accordingly. This allows us to extend query + * types without breaking userspace. + */ + __u32 size; + + /** + * @pointer: user pointer to a query type struct. + * + * Pointer can be NULL, in which case, nothing is copied, but the + * actual structure size is returned. If not NULL, it must point to + * a location that's large enough to hold size bytes. + */ + __u64 pointer; +}; + +/** + * enum drm_ethosu_bo_flags - Buffer object flags, passed at creation time. + */ +enum drm_ethosu_bo_flags { + /** + * @DRM_ETHOSU_BO_NO_MMAP: The buffer object will never be CPU-mapped + * in userspace. + */ + DRM_ETHOSU_BO_NO_MMAP = (1 << 0), +}; + +/** + * struct drm_ethosu_bo_create - Arguments passed to DRM_IOCTL_ETHOSU_BO_CREATE. + */ +struct drm_ethosu_bo_create { + /** + * @size: Requested size for the object + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + + /** + * @flags: Flags. Must be a combination of drm_ethosu_bo_flags flags. + */ + __u32 flags; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; +}; + +/** + * struct drm_ethosu_bo_mmap_offset - Arguments passed to DRM_IOCTL_ETHOSU_BO_MMAP_OFFSET. + */ +struct drm_ethosu_bo_mmap_offset { + /** @handle: Handle of the object we want an mmap offset for. */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** @offset: The fake offset to use for subsequent mmap calls. */ + __u64 offset; +}; + +/** + * struct drm_ethosu_wait_bo - ioctl argument for waiting for + * completion of the last DRM_ETHOSU_SUBMIT on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_ethosu_bo_wait { + __u32 handle; + __u32 pad; + __s64 timeout_ns; /* absolute */ +}; + +struct drm_ethosu_cmdstream_bo_create { + /* Size of the data argument. */ + __u32 size; + + /* Flags, currently must be 0. */ + __u32 flags; + + /* Pointer to the data. */ + __u64 data; + + /** Returned GEM handle for the BO. */ + __u32 handle; + + /* Pad, must be 0. */ + __u32 pad; +}; + +/** + * struct drm_ethosu_job - A job to be run on the NPU + * + * The kernel will schedule the execution of this job taking into account its + * dependencies with other jobs. All tasks in the same job will be executed + * sequentially on the same core, to benefit from memory residency in SRAM. + */ +struct drm_ethosu_job { + /** Input: BO handle for cmdstream. */ + __u32 cmd_bo; + + /** Input: Amount of SRAM to use. */ + __u32 sram_size; + +#define ETHOSU_MAX_REGIONS 8 + /** Input: Array of BO handles for each region. */ + __u32 region_bo_handles[ETHOSU_MAX_REGIONS]; +}; + +/** + * struct drm_ethosu_submit - ioctl argument for submitting commands to the NPU. + * + * The kernel will schedule the execution of these jobs in dependency order. + */ +struct drm_ethosu_submit { + /** Input: Pointer to an array of struct drm_ethosu_job. */ + __u64 jobs; + + /** Input: Number of jobs passed in. */ + __u32 job_count; + + /** Reserved, must be zero. */ + __u32 pad; +}; + +/** + * DRM_IOCTL_ETHOSU() - Build a ethosu IOCTL number + * @__access: Access type. Must be R, W or RW. + * @__id: One of the DRM_ETHOSU_xxx id. + * @__type: Suffix of the type being passed to the IOCTL. + * + * Don't use this macro directly, use the DRM_IOCTL_ETHOSU_xxx + * values instead. + * + * Return: An IOCTL number to be passed to ioctl() from userspace. + */ +#define DRM_IOCTL_ETHOSU(__access, __id, __type) \ + DRM_IO ## __access(DRM_COMMAND_BASE + DRM_ETHOSU_ ## __id, \ + struct drm_ethosu_ ## __type) + +enum { + DRM_IOCTL_ETHOSU_DEV_QUERY = + DRM_IOCTL_ETHOSU(WR, DEV_QUERY, dev_query), + DRM_IOCTL_ETHOSU_BO_CREATE = + DRM_IOCTL_ETHOSU(WR, BO_CREATE, bo_create), + DRM_IOCTL_ETHOSU_BO_WAIT = + DRM_IOCTL_ETHOSU(WR, BO_WAIT, bo_wait), + DRM_IOCTL_ETHOSU_BO_MMAP_OFFSET = + DRM_IOCTL_ETHOSU(WR, BO_MMAP_OFFSET, bo_mmap_offset), + DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE = + DRM_IOCTL_ETHOSU(WR, CMDSTREAM_BO_CREATE, cmdstream_bo_create), + DRM_IOCTL_ETHOSU_SUBMIT = + DRM_IOCTL_ETHOSU(WR, SUBMIT, submit), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _ETHOSU_DRM_H_ */ -- cgit v1.2.3 From 57557964b582238d5ee4b8538d1c4694f91c2186 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Wed, 29 Oct 2025 10:17:52 +0100 Subject: accel/ivpu: Add support for userptr buffer objects Introduce a new ioctl `drm_ivpu_bo_create_from_userptr` that allows users to create GEM buffer objects from user pointers to memory regions. The user pointer must be page-aligned and the memory region must remain valid for the buffer object's lifetime. Userptr buffers enable direct use of mmapped files (e.g. inference weights) in NPU workloads without copying data to NPU buffer objects. This reduces memory usage and provides better flexibility for NPU applications. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeff Hugo Signed-off-by: Karol Wachowski Link: https://patch.msgid.link/20251029091752.203198-1-karol.wachowski@linux.intel.com --- drivers/accel/ivpu/Makefile | 1 + drivers/accel/ivpu/ivpu_drv.c | 3 + drivers/accel/ivpu/ivpu_gem.c | 2 +- drivers/accel/ivpu/ivpu_gem.h | 7 ++ drivers/accel/ivpu/ivpu_gem_userptr.c | 202 ++++++++++++++++++++++++++++++++++ drivers/accel/ivpu/ivpu_mmu_context.c | 4 +- drivers/accel/ivpu/ivpu_mmu_context.h | 2 +- include/uapi/drm/ivpu_accel.h | 52 +++++++++ 8 files changed, 270 insertions(+), 3 deletions(-) create mode 100644 drivers/accel/ivpu/ivpu_gem_userptr.c (limited to 'include/uapi') diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 1029e0bab061..dbf76b8a5b4c 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -6,6 +6,7 @@ intel_vpu-y := \ ivpu_fw.o \ ivpu_fw_log.o \ ivpu_gem.o \ + ivpu_gem_userptr.o \ ivpu_hw.o \ ivpu_hw_btrs.o \ ivpu_hw_ip.o \ diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index c6fe7a408912..ca68730dee88 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -134,6 +134,8 @@ bool ivpu_is_capable(struct ivpu_device *vdev, u32 capability) return true; case DRM_IVPU_CAP_DMA_MEMORY_RANGE: return true; + case DRM_IVPU_CAP_BO_CREATE_FROM_USERPTR: + return true; case DRM_IVPU_CAP_MANAGE_CMDQ: return vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW; default: @@ -313,6 +315,7 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_CMDQ_CREATE, ivpu_cmdq_create_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_CMDQ_DESTROY, ivpu_cmdq_destroy_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_CMDQ_SUBMIT, ivpu_cmdq_submit_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE_FROM_USERPTR, ivpu_bo_create_from_userptr_ioctl, 0), }; static int ivpu_wait_for_ready(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 7353cfb73bcb..03d39615ad37 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -96,7 +96,7 @@ int __must_check ivpu_bo_bind(struct ivpu_bo *bo) if (!bo->mmu_mapped) { drm_WARN_ON(&vdev->drm, !bo->ctx); ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt, - ivpu_bo_is_snooped(bo)); + ivpu_bo_is_snooped(bo), ivpu_bo_is_read_only(bo)); if (ret) { ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret); goto unlock; diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 54452eb8a41f..2dcd7eba9cb7 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -38,6 +38,8 @@ void ivpu_bo_free(struct ivpu_bo *bo); int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_bo_create_from_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p); void ivpu_bo_list_print(struct drm_device *dev); @@ -75,6 +77,11 @@ static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; } +static inline bool ivpu_bo_is_read_only(struct ivpu_bo *bo) +{ + return bo->flags & DRM_IVPU_BO_READ_ONLY; +} + static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) { if (vpu_addr < bo->vpu_addr) diff --git a/drivers/accel/ivpu/ivpu_gem_userptr.c b/drivers/accel/ivpu/ivpu_gem_userptr.c new file mode 100644 index 000000000000..235c67959453 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_gem_userptr.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2025 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "ivpu_drv.h" +#include "ivpu_gem.h" + +static struct sg_table * +ivpu_gem_userptr_dmabuf_map(struct dma_buf_attachment *attachment, + enum dma_data_direction direction) +{ + struct sg_table *sgt = attachment->dmabuf->priv; + int ret; + + ret = dma_map_sgtable(attachment->dev, sgt, direction, DMA_ATTR_SKIP_CPU_SYNC); + if (ret) + return ERR_PTR(ret); + + return sgt; +} + +static void ivpu_gem_userptr_dmabuf_unmap(struct dma_buf_attachment *attachment, + struct sg_table *sgt, + enum dma_data_direction direction) +{ + dma_unmap_sgtable(attachment->dev, sgt, direction, DMA_ATTR_SKIP_CPU_SYNC); +} + +static void ivpu_gem_userptr_dmabuf_release(struct dma_buf *dma_buf) +{ + struct sg_table *sgt = dma_buf->priv; + struct sg_page_iter page_iter; + struct page *page; + + for_each_sgtable_page(sgt, &page_iter, 0) { + page = sg_page_iter_page(&page_iter); + unpin_user_page(page); + } + + sg_free_table(sgt); + kfree(sgt); +} + +static const struct dma_buf_ops ivpu_gem_userptr_dmabuf_ops = { + .map_dma_buf = ivpu_gem_userptr_dmabuf_map, + .unmap_dma_buf = ivpu_gem_userptr_dmabuf_unmap, + .release = ivpu_gem_userptr_dmabuf_release, +}; + +static struct dma_buf * +ivpu_create_userptr_dmabuf(struct ivpu_device *vdev, void __user *user_ptr, + size_t size, uint32_t flags) +{ + struct dma_buf_export_info exp_info = {}; + struct dma_buf *dma_buf; + struct sg_table *sgt; + struct page **pages; + unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned int gup_flags = FOLL_LONGTERM; + int ret, i, pinned; + + /* Add FOLL_WRITE only if the BO is not read-only */ + if (!(flags & DRM_IVPU_BO_READ_ONLY)) + gup_flags |= FOLL_WRITE; + + pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return ERR_PTR(-ENOMEM); + + pinned = pin_user_pages_fast((unsigned long)user_ptr, nr_pages, gup_flags, pages); + if (pinned < 0) { + ret = pinned; + ivpu_warn(vdev, "Failed to pin user pages: %d\n", ret); + goto free_pages_array; + } + + if (pinned != nr_pages) { + ivpu_warn(vdev, "Pinned %d pages, expected %lu\n", pinned, nr_pages); + ret = -EFAULT; + goto unpin_pages; + } + + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto unpin_pages; + } + + ret = sg_alloc_table_from_pages(sgt, pages, nr_pages, 0, size, GFP_KERNEL); + if (ret) { + ivpu_warn(vdev, "Failed to create sg table: %d\n", ret); + goto free_sgt; + } + + exp_info.exp_name = "ivpu_userptr_dmabuf"; + exp_info.owner = THIS_MODULE; + exp_info.ops = &ivpu_gem_userptr_dmabuf_ops; + exp_info.size = size; + exp_info.flags = O_RDWR | O_CLOEXEC; + exp_info.priv = sgt; + + dma_buf = dma_buf_export(&exp_info); + if (IS_ERR(dma_buf)) { + ret = PTR_ERR(dma_buf); + ivpu_warn(vdev, "Failed to export userptr dma-buf: %d\n", ret); + goto free_sg_table; + } + + kvfree(pages); + return dma_buf; + +free_sg_table: + sg_free_table(sgt); +free_sgt: + kfree(sgt); +unpin_pages: + for (i = 0; i < pinned; i++) + unpin_user_page(pages[i]); +free_pages_array: + kvfree(pages); + return ERR_PTR(ret); +} + +static struct ivpu_bo * +ivpu_bo_create_from_userptr(struct ivpu_device *vdev, void __user *user_ptr, + size_t size, uint32_t flags) +{ + struct dma_buf *dma_buf; + struct drm_gem_object *obj; + struct ivpu_bo *bo; + + dma_buf = ivpu_create_userptr_dmabuf(vdev, user_ptr, size, flags); + if (IS_ERR(dma_buf)) + return ERR_CAST(dma_buf); + + obj = ivpu_gem_prime_import(&vdev->drm, dma_buf); + if (IS_ERR(obj)) { + dma_buf_put(dma_buf); + return ERR_CAST(obj); + } + + dma_buf_put(dma_buf); + + bo = to_ivpu_bo(obj); + bo->flags = flags; + + return bo; +} + +int ivpu_bo_create_from_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_bo_create_from_userptr *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = to_ivpu_device(dev); + void __user *user_ptr = u64_to_user_ptr(args->user_ptr); + struct ivpu_bo *bo; + int ret; + + if (args->flags & ~(DRM_IVPU_BO_HIGH_MEM | DRM_IVPU_BO_DMA_MEM | DRM_IVPU_BO_READ_ONLY)) + return -EINVAL; + + if (!args->user_ptr || !args->size) + return -EINVAL; + + if (!PAGE_ALIGNED(args->user_ptr) || !PAGE_ALIGNED(args->size)) + return -EINVAL; + + if (!access_ok(user_ptr, args->size)) + return -EFAULT; + + bo = ivpu_bo_create_from_userptr(vdev, user_ptr, args->size, args->flags); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (ret) { + ivpu_err(vdev, "Failed to create handle for BO: %pe (ctx %u size %llu flags 0x%x)", + bo, file_priv->ctx.id, args->size, args->flags); + } else { + ivpu_dbg(vdev, BO, "Created userptr BO: handle=%u vpu_addr=0x%llx size=%llu flags=0x%x\n", + args->handle, bo->vpu_addr, args->size, bo->flags); + args->vpu_addr = bo->vpu_addr; + } + + drm_gem_object_put(&bo->base.base); + + return ret; +} diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 4ffc783426be..d128e8961688 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -430,7 +430,7 @@ static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_a int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only) { size_t start_vpu_addr = vpu_addr; struct scatterlist *sg; @@ -450,6 +450,8 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, prot = IVPU_MMU_ENTRY_MAPPED; if (llc_coherent) prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT; + if (read_only) + prot |= IVPU_MMU_ENTRY_FLAG_RO; mutex_lock(&ctx->lock); diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index f255310968cf..663a11a9db11 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -42,7 +42,7 @@ int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node); int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, struct sg_table *sgt, bool llc_coherent); + u64 vpu_addr, struct sg_table *sgt, bool llc_coherent, bool read_only); void ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt); int ivpu_mmu_context_set_pages_ro(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index e470b0221e02..264505d54f93 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -25,6 +25,7 @@ extern "C" { #define DRM_IVPU_CMDQ_CREATE 0x0b #define DRM_IVPU_CMDQ_DESTROY 0x0c #define DRM_IVPU_CMDQ_SUBMIT 0x0d +#define DRM_IVPU_BO_CREATE_FROM_USERPTR 0x0e #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -69,6 +70,10 @@ extern "C" { #define DRM_IOCTL_IVPU_CMDQ_SUBMIT \ DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_SUBMIT, struct drm_ivpu_cmdq_submit) +#define DRM_IOCTL_IVPU_BO_CREATE_FROM_USERPTR \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_CREATE_FROM_USERPTR, \ + struct drm_ivpu_bo_create_from_userptr) + /** * DOC: contexts * @@ -127,6 +132,13 @@ extern "C" { * command queue destroy and submit job on specific command queue. */ #define DRM_IVPU_CAP_MANAGE_CMDQ 3 +/** + * DRM_IVPU_CAP_BO_CREATE_FROM_USERPTR + * + * Driver supports creating buffer objects from user space memory pointers. + * This allows creating GEM buffers from existing user memory regions. + */ +#define DRM_IVPU_CAP_BO_CREATE_FROM_USERPTR 4 /** * struct drm_ivpu_param - Get/Set VPU parameters @@ -194,6 +206,7 @@ struct drm_ivpu_param { #define DRM_IVPU_BO_HIGH_MEM DRM_IVPU_BO_SHAVE_MEM #define DRM_IVPU_BO_MAPPABLE 0x00000002 #define DRM_IVPU_BO_DMA_MEM 0x00000004 +#define DRM_IVPU_BO_READ_ONLY 0x00000008 #define DRM_IVPU_BO_CACHED 0x00000000 #define DRM_IVPU_BO_UNCACHED 0x00010000 @@ -204,6 +217,7 @@ struct drm_ivpu_param { (DRM_IVPU_BO_HIGH_MEM | \ DRM_IVPU_BO_MAPPABLE | \ DRM_IVPU_BO_DMA_MEM | \ + DRM_IVPU_BO_READ_ONLY | \ DRM_IVPU_BO_CACHE_MASK) /** @@ -255,6 +269,44 @@ struct drm_ivpu_bo_create { __u64 vpu_addr; }; +/** + * struct drm_ivpu_bo_create_from_userptr - Create dma-buf from user pointer + * + * Create a GEM buffer object from a user pointer to a memory region. + */ +struct drm_ivpu_bo_create_from_userptr { + /** @user_ptr: User pointer to memory region (must be page aligned) */ + __u64 user_ptr; + + /** @size: Size of the memory region in bytes (must be page aligned) */ + __u64 size; + + /** + * @flags: + * + * Supported flags: + * + * %DRM_IVPU_BO_HIGH_MEM: + * + * Allocate VPU address from >4GB range. + * + * %DRM_IVPU_BO_DMA_MEM: + * + * Allocate from DMA memory range accessible by hardware DMA. + * + * %DRM_IVPU_BO_READ_ONLY: + * + * Allocate as a read-only buffer object. + */ + __u32 flags; + + /** @handle: Returned GEM object handle */ + __u32 handle; + + /** @vpu_addr: Returned VPU virtual address */ + __u64 vpu_addr; +}; + /** * struct drm_ivpu_bo_info - Query buffer object info */ -- cgit v1.2.3 From 1556c170d2f78344a9eee567fbfcee4651689813 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Mon, 3 Nov 2025 22:25:44 -0800 Subject: accel/amdxdna: Add IOCTL parameter for resource data Extend DRM_IOCTL_AMDXDNA_GET_INFO to include additional parameters that allow collection of resource data. Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251104062546.833771-2-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 6 ------ drivers/accel/amdxdna/aie2_message.c | 2 ++ drivers/accel/amdxdna/aie2_pci.c | 27 +++++++++++++++++++++++++++ drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 ++- include/uapi/drm/amdxdna_accel.h | 17 +++++++++++++++++ 5 files changed, 48 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 289a2aaf4cae..b78c47ed0d34 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -556,7 +556,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) struct drm_gpu_scheduler *sched; struct amdxdna_hwctx_priv *priv; struct amdxdna_gem_obj *heap; - struct amdxdna_dev_hdl *ndev; int i, ret; priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); @@ -654,8 +653,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) amdxdna_pm_suspend_put(xdna); hwctx->status = HWCTX_STAT_INIT; - ndev = xdna->dev_handle; - ndev->hwctx_num++; init_waitqueue_head(&priv->job_free_wq); XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); @@ -688,13 +685,10 @@ free_priv: void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) { - struct amdxdna_dev_hdl *ndev; struct amdxdna_dev *xdna; int idx; xdna = hwctx->client->xdna; - ndev = xdna->dev_handle; - ndev->hwctx_num--; XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); drm_sched_entity_destroy(&hwctx->priv->entity); diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 339dec998247..39214253d804 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -235,6 +235,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct ret = -EINVAL; goto out_destroy_context; } + ndev->hwctx_num++; XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d", hwctx->name, ret, resp.msix_id); @@ -269,6 +270,7 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc hwctx->fw_ctx_id); hwctx->priv->mbox_chann = NULL; hwctx->fw_ctx_id = -1; + ndev->hwctx_num--; return ret; } diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index ce57b915004e..396dc6e06007 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -838,6 +838,30 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client, return 0; } +static int aie2_query_resource_info(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_get_resource_info res_info; + const struct amdxdna_dev_priv *priv; + struct amdxdna_dev_hdl *ndev; + struct amdxdna_dev *xdna; + + xdna = client->xdna; + ndev = xdna->dev_handle; + priv = ndev->priv; + + res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk; + res_info.npu_tops_max = ndev->max_tops; + res_info.npu_task_max = priv->hwctx_limit; + res_info.npu_tops_curr = ndev->curr_tops; + res_info.npu_task_curr = ndev->hwctx_num; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &res_info, sizeof(res_info))) + return -EFAULT; + + return 0; +} + static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) { struct amdxdna_dev *xdna = client->xdna; @@ -872,6 +896,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i case DRM_AMDXDNA_GET_POWER_MODE: ret = aie2_get_power_mode(client, args); break; + case DRM_AMDXDNA_QUERY_RESOURCE_INFO: + ret = aie2_query_resource_info(client, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 3599e713bfcb..af943a603ad1 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -29,9 +29,10 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); * 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY * 0.2: Support getting last error hardware error * 0.3: Support firmware debug buffer + * 0.4: Support getting resource information */ #define AMDXDNA_DRIVER_MAJOR 0 -#define AMDXDNA_DRIVER_MINOR 3 +#define AMDXDNA_DRIVER_MINOR 4 /* * Bind the driver base on (vendor_id, device_id) pair and later use the diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index c7eec9ceb2ae..8b679c38d308 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -442,6 +442,23 @@ enum amdxdna_drm_get_param { DRM_AMDXDNA_QUERY_HW_CONTEXTS, DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8, DRM_AMDXDNA_GET_POWER_MODE, + DRM_AMDXDNA_QUERY_RESOURCE_INFO = 12, +}; + +/** + * struct amdxdna_drm_get_resource_info - Get resource information + */ +struct amdxdna_drm_get_resource_info { + /** @npu_clk_max: max H-Clocks */ + __u64 npu_clk_max; + /** @npu_tops_max: max TOPs */ + __u64 npu_tops_max; + /** @npu_task_max: max number of tasks */ + __u64 npu_task_max; + /** @npu_tops_curr: current TOPs */ + __u64 npu_tops_curr; + /** @npu_task_curr: current number of tasks */ + __u64 npu_task_curr; }; /** -- cgit v1.2.3 From e568dc3e625d818f199bd085005213cce3271453 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Mon, 3 Nov 2025 22:25:45 -0800 Subject: accel/amdxdna: Add IOCTL parameter for telemetry data Extend DRM_IOCTL_AMDXDNA_GET_INFO to include additional parameters that allow collection of telemetry data. Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251104062546.833771-3-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 56 +++++++++++++++++--- drivers/accel/amdxdna/aie2_msg_priv.h | 25 ++++++++- drivers/accel/amdxdna/aie2_pci.c | 73 ++++++++++++++++++++++++++ drivers/accel/amdxdna/aie2_pci.h | 3 ++ drivers/accel/amdxdna/amdxdna_mailbox_helper.h | 6 ++- drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +- include/uapi/drm/amdxdna_accel.h | 17 ++++++ 7 files changed, 173 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 39214253d804..69cdce9ff208 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -47,7 +47,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, ndev->mgmt_chann = NULL; } - if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) { + if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) { XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x", msg->opcode, *hdl->data); ret = -EINVAL; @@ -336,11 +336,6 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, goto fail; } - if (resp.status != AIE2_STATUS_SUCCESS) { - XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status); - ret = -EINVAL; - goto fail; - } XDNA_DBG(xdna, "Query NPU status completed"); if (size < resp.size) { @@ -362,6 +357,55 @@ fail: return ret; } +int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, + char __user *buf, u32 size, + struct amdxdna_drm_query_telemetry_header *header) +{ + DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY); + struct amdxdna_dev *xdna = ndev->xdna; + dma_addr_t dma_addr; + u8 *addr; + int ret; + + if (header->type >= MAX_TELEMETRY_TYPE) + return -EINVAL; + + addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); + if (!addr) + return -ENOMEM; + + req.buf_addr = dma_addr; + req.buf_size = size; + req.type = header->type; + + drm_clflush_virt_range(addr, size); /* device can access */ + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(xdna, "Query telemetry failed, status %d", ret); + goto free_buf; + } + + if (size < resp.size) { + ret = -EINVAL; + XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size); + goto free_buf; + } + + if (copy_to_user(buf, addr, resp.size)) { + ret = -EFAULT; + XDNA_ERR(xdna, "Failed to copy telemetry to user space"); + goto free_buf; + } + + header->major = resp.major; + header->minor = resp.minor; + +free_buf: + dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE); + return ret; +} + int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, void *handle, int (*cb)(void*, void __iomem *, size_t)) { diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 945140011763..947daa63f064 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -9,7 +9,8 @@ enum aie2_msg_opcode { MSG_OP_CREATE_CONTEXT = 0x2, MSG_OP_DESTROY_CONTEXT = 0x3, - MSG_OP_SYNC_BO = 0x7, + MSG_OP_GET_TELEMETRY = 0x4, + MSG_OP_SYNC_BO = 0x7, MSG_OP_EXECUTE_BUFFER_CF = 0xC, MSG_OP_QUERY_COL_STATUS = 0xD, MSG_OP_QUERY_AIE_TILE_INFO = 0xE, @@ -137,6 +138,28 @@ struct destroy_ctx_resp { enum aie2_msg_status status; } __packed; +enum telemetry_type { + TELEMETRY_TYPE_DISABLED, + TELEMETRY_TYPE_HEALTH, + TELEMETRY_TYPE_ERROR_INFO, + TELEMETRY_TYPE_PROFILING, + TELEMETRY_TYPE_DEBUG, + MAX_TELEMETRY_TYPE +}; + +struct get_telemetry_req { + enum telemetry_type type; + __u64 buf_addr; + __u32 buf_size; +} __packed; + +struct get_telemetry_resp { + __u32 major; + __u32 minor; + __u32 size; + enum aie2_msg_status status; +} __packed; + struct execute_buffer_req { __u32 cu_idx; __u32 payload[19]; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 396dc6e06007..d7ccbdaf47f5 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -862,6 +862,76 @@ static int aie2_query_resource_info(struct amdxdna_client *client, return 0; } +static int aie2_fill_hwctx_map(struct amdxdna_hwctx *hwctx, void *arg) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + u32 *map = arg; + + if (hwctx->fw_ctx_id >= xdna->dev_handle->priv->hwctx_limit) { + XDNA_ERR(xdna, "Invalid fw ctx id %d/%d ", hwctx->fw_ctx_id, + xdna->dev_handle->priv->hwctx_limit); + return -EINVAL; + } + + map[hwctx->fw_ctx_id] = hwctx->id; + return 0; +} + +static int aie2_get_telemetry(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_telemetry_header *header __free(kfree) = NULL; + u32 telemetry_data_sz, header_sz, elem_num; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_client *tmp_client; + int ret; + + elem_num = xdna->dev_handle->priv->hwctx_limit; + header_sz = struct_size(header, map, elem_num); + if (args->buffer_size <= header_sz) { + XDNA_ERR(xdna, "Invalid buffer size"); + return -EINVAL; + } + + telemetry_data_sz = args->buffer_size - header_sz; + if (telemetry_data_sz > SZ_4M) { + XDNA_ERR(xdna, "Buffer size is too big, %d", telemetry_data_sz); + return -EINVAL; + } + + header = kzalloc(header_sz, GFP_KERNEL); + if (!header) + return -ENOMEM; + + if (copy_from_user(header, u64_to_user_ptr(args->buffer), sizeof(*header))) { + XDNA_ERR(xdna, "Failed to copy telemetry header from user"); + return -EFAULT; + } + + header->map_num_elements = elem_num; + list_for_each_entry(tmp_client, &xdna->client_list, node) { + ret = amdxdna_hwctx_walk(tmp_client, &header->map, + aie2_fill_hwctx_map); + if (ret) + return ret; + } + + ret = aie2_query_telemetry(xdna->dev_handle, + u64_to_user_ptr(args->buffer + header_sz), + telemetry_data_sz, header); + if (ret) { + XDNA_ERR(xdna, "Query telemetry failed ret %d", ret); + return ret; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer), header, header_sz)) { + XDNA_ERR(xdna, "Copy header failed"); + return -EFAULT; + } + + return 0; +} + static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) { struct amdxdna_dev *xdna = client->xdna; @@ -896,6 +966,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i case DRM_AMDXDNA_GET_POWER_MODE: ret = aie2_get_power_mode(client, args); break; + case DRM_AMDXDNA_QUERY_TELEMETRY: + ret = aie2_get_telemetry(client, args); + break; case DRM_AMDXDNA_QUERY_RESOURCE_INFO: ret = aie2_query_resource_info(client, args); break; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index a79f4f71ff6b..9793cd1e0c55 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -305,6 +305,9 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); +int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, + char __user *buf, u32 size, + struct amdxdna_drm_query_telemetry_header *header); int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, void *handle, int (*cb)(void*, void __iomem *, size_t)); int aie2_config_cu(struct amdxdna_hwctx *hwctx, diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h index 710ff8873d61..556c712cad0a 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.h +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.h @@ -16,16 +16,18 @@ struct xdna_notify { u32 *data; size_t size; int error; + u32 *status; }; -#define DECLARE_XDNA_MSG_COMMON(name, op, status) \ +#define DECLARE_XDNA_MSG_COMMON(name, op, s) \ struct name##_req req = { 0 }; \ - struct name##_resp resp = { status }; \ + struct name##_resp resp = { .status = s }; \ struct xdna_notify hdl = { \ .error = 0, \ .data = (u32 *)&resp, \ .size = sizeof(resp), \ .comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp), \ + .status = (u32 *)&resp.status, \ }; \ struct xdna_mailbox_msg msg = { \ .send_data = (u8 *)&req, \ diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index af943a603ad1..7590265d4485 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -30,9 +30,10 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); * 0.2: Support getting last error hardware error * 0.3: Support firmware debug buffer * 0.4: Support getting resource information + * 0.5: Support getting telemetry data */ #define AMDXDNA_DRIVER_MAJOR 0 -#define AMDXDNA_DRIVER_MINOR 4 +#define AMDXDNA_DRIVER_MINOR 5 /* * Bind the driver base on (vendor_id, device_id) pair and later use the diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index 8b679c38d308..8ad254bc35a5 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -442,6 +442,7 @@ enum amdxdna_drm_get_param { DRM_AMDXDNA_QUERY_HW_CONTEXTS, DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8, DRM_AMDXDNA_GET_POWER_MODE, + DRM_AMDXDNA_QUERY_TELEMETRY, DRM_AMDXDNA_QUERY_RESOURCE_INFO = 12, }; @@ -461,6 +462,22 @@ struct amdxdna_drm_get_resource_info { __u64 npu_task_curr; }; +/** + * struct amdxdna_drm_query_telemetry_header - Telemetry data header + */ +struct amdxdna_drm_query_telemetry_header { + /** @major: Firmware telemetry interface major version number */ + __u32 major; + /** @minor: Firmware telemetry interface minor version number */ + __u32 minor; + /** @type: Telemetry query type */ + __u32 type; + /** @map_num_elements: Total number of elements in the map table */ + __u32 map_num_elements; + /** @map: Element map */ + __u32 map[]; +}; + /** * struct amdxdna_drm_get_info - Get some information from the AIE hardware. * @param: Value in enum amdxdna_drm_get_param. Specifies the structure passed in the buffer. -- cgit v1.2.3 From 3a0ff7b98af4a5de1b995dfb57e65843f9b7b628 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 4 Nov 2025 10:53:39 -0800 Subject: accel/amdxdna: Support preemption requests The driver checks the firmware version during initialization.If preemption is supported, the driver configures preemption accordingly and handles userspace preemption requests. Otherwise, the driver returns an error for userspace preemption requests. Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251104185340.897560-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 95 +++++++++++++++++++++++++++++++++ drivers/accel/amdxdna/aie2_msg_priv.h | 3 ++ drivers/accel/amdxdna/aie2_pci.c | 63 ++++++++++++++++++++++ drivers/accel/amdxdna/aie2_pci.h | 8 +++ drivers/accel/amdxdna/amdxdna_ctx.h | 17 ++++++ drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +- drivers/accel/amdxdna/npu4_regs.c | 4 ++ include/uapi/drm/amdxdna_accel.h | 16 +++++- 8 files changed, 207 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 69cdce9ff208..d493bb1c3360 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -210,6 +210,14 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct hwctx->fw_ctx_id = resp.context_id; WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id"); + if (ndev->force_preempt_enabled) { + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id); + if (ret) { + XDNA_ERR(xdna, "failed to enable force preempt %d", ret); + return ret; + } + } + cq_pair = &resp.cq_pair[0]; x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); @@ -601,6 +609,11 @@ aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) return 0; } +static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + return -EOPNOTSUPP; +} + static u32 aie2_get_chain_msg_op(u32 cmd_op) { switch (cmd_op) { @@ -621,6 +634,8 @@ static struct aie2_exec_msg_ops legacy_exec_message_ops = { .init_chain_req = aie2_init_exec_chain_req, .fill_cf_slot = aie2_cmdlist_fill_cf, .fill_dpu_slot = aie2_cmdlist_fill_dpu, + .fill_preempt_slot = aie2_cmdlist_unsupp, + .fill_elf_slot = aie2_cmdlist_unsupp, .get_chain_msg_op = aie2_get_chain_msg_op, }; @@ -680,6 +695,74 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si return 0; } +static int +aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_preempt_data *pd; + u32 cmd_len; + u32 arg_sz; + + pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*pd); + if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; + + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); + if (npu_slot->cu_idx == INVALID_CU_IDX) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_PREEMPT; + npu_slot->inst_buf_addr = pd->inst_buf; + npu_slot->save_buf_addr = pd->save_buf; + npu_slot->restore_buf_addr = pd->restore_buf; + npu_slot->inst_size = pd->inst_size; + npu_slot->save_size = pd->save_size; + npu_slot->restore_size = pd->restore_size; + npu_slot->inst_prop_cnt = pd->inst_prop_cnt; + npu_slot->arg_cnt = arg_sz / sizeof(u32); + memcpy(npu_slot->args, pd->prop_args, arg_sz); + + *size = sizeof(*npu_slot) + arg_sz; + return 0; +} + +static int +aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size) +{ + struct cmd_chain_slot_npu *npu_slot = slot; + struct amdxdna_cmd_preempt_data *pd; + u32 cmd_len; + u32 arg_sz; + + pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); + arg_sz = cmd_len - sizeof(*pd); + if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) + return -EINVAL; + + if (*size < sizeof(*npu_slot) + arg_sz) + return -EINVAL; + + memset(npu_slot, 0, sizeof(*npu_slot)); + npu_slot->type = EXEC_NPU_TYPE_ELF; + npu_slot->inst_buf_addr = pd->inst_buf; + npu_slot->save_buf_addr = pd->save_buf; + npu_slot->restore_buf_addr = pd->restore_buf; + npu_slot->inst_size = pd->inst_size; + npu_slot->save_size = pd->save_size; + npu_slot->restore_size = pd->restore_size; + npu_slot->inst_prop_cnt = pd->inst_prop_cnt; + npu_slot->arg_cnt = 1; + npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN; + + *size = struct_size(npu_slot, args, npu_slot->arg_cnt); + return 0; +} + static u32 aie2_get_npu_chain_msg_op(u32 cmd_op) { return MSG_OP_CHAIN_EXEC_NPU; @@ -691,6 +774,8 @@ static struct aie2_exec_msg_ops npu_exec_message_ops = { .init_chain_req = aie2_init_npu_chain_req, .fill_cf_slot = aie2_cmdlist_fill_npu_cf, .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu, + .fill_preempt_slot = aie2_cmdlist_fill_npu_preempt, + .fill_elf_slot = aie2_cmdlist_fill_npu_elf, .get_chain_msg_op = aie2_get_npu_chain_msg_op, }; @@ -749,6 +834,16 @@ aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo, case ERT_START_NPU: ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size); break; + case ERT_START_NPU_PREEMPT: + if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) + return -EOPNOTSUPP; + ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size); + break; + case ERT_START_NPU_PREEMPT_ELF: + if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT)) + return -EOPNOTSUPP; + ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size); + break; default: XDNA_INFO(xdna, "Unsupported op %d", op); ret = -EOPNOTSUPP; diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 947daa63f064..1c957a6298d3 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -176,6 +176,8 @@ struct exec_dpu_req { enum exec_npu_type { EXEC_NPU_TYPE_NON_ELF = 0x1, EXEC_NPU_TYPE_PARTIAL_ELF = 0x2, + EXEC_NPU_TYPE_PREEMPT = 0x3, + EXEC_NPU_TYPE_ELF = 0x4, }; union exec_req { @@ -372,6 +374,7 @@ struct cmd_chain_slot_dpu { }; #define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32)) +#define AIE2_EXEC_BUFFER_KERNEL_OP_TXN 3 struct cmd_chain_slot_npu { enum exec_npu_type type; u64 inst_buf_addr; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index d7ccbdaf47f5..ceef1c502e9e 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -183,6 +183,10 @@ int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, if (cfg->category != category) continue; + if (cfg->feature_mask && + bitmap_subset(&cfg->feature_mask, &ndev->feature_mask, AIE2_FEATURE_MAX)) + continue; + value = val ? *val : cfg->value; ret = aie2_set_runtime_cfg(ndev, cfg->type, value); if (ret) { @@ -932,6 +936,25 @@ static int aie2_get_telemetry(struct amdxdna_client *client, return 0; } +static int aie2_get_preempt_state(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_attribute_state state = {}; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + if (args->param == DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE) + state.state = ndev->force_preempt_enabled; + else if (args->param == DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE) + state.state = ndev->frame_boundary_preempt; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &state, sizeof(state))) + return -EFAULT; + + return 0; +} + static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) { struct amdxdna_dev *xdna = client->xdna; @@ -972,6 +995,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i case DRM_AMDXDNA_QUERY_RESOURCE_INFO: ret = aie2_query_resource_info(client, args); break; + case DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE: + case DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE: + ret = aie2_get_preempt_state(client, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; @@ -1078,6 +1105,38 @@ static int aie2_set_power_mode(struct amdxdna_client *client, return aie2_pm_set_mode(xdna->dev_handle, power_mode); } +static int aie2_set_preempt_state(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle; + struct amdxdna_drm_attribute_state state; + u32 val; + int ret; + + if (copy_from_user(&state, u64_to_user_ptr(args->buffer), sizeof(state))) + return -EFAULT; + + if (state.state > 1) + return -EINVAL; + + if (XDNA_MBZ_DBG(client->xdna, state.pad, sizeof(state.pad))) + return -EINVAL; + + if (args->param == DRM_AMDXDNA_SET_FORCE_PREEMPT) { + ndev->force_preempt_enabled = state.state; + } else if (args->param == DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT) { + val = state.state; + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, + &val); + if (ret) + return ret; + + ndev->frame_boundary_preempt = state.state; + } + + return 0; +} + static int aie2_set_state(struct amdxdna_client *client, struct amdxdna_drm_set_state *args) { @@ -1095,6 +1154,10 @@ static int aie2_set_state(struct amdxdna_client *client, case DRM_AMDXDNA_SET_POWER_MODE: ret = aie2_set_power_mode(client, args); break; + case DRM_AMDXDNA_SET_FORCE_PREEMPT: + case DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT: + ret = aie2_set_preempt_state(client, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index 9793cd1e0c55..a5f9c42155d1 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -110,12 +110,15 @@ struct aie_metadata { enum rt_config_category { AIE2_RT_CFG_INIT, AIE2_RT_CFG_CLK_GATING, + AIE2_RT_CFG_FORCE_PREEMPT, + AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, }; struct rt_config { u32 type; u32 value; u32 category; + unsigned long feature_mask; }; struct dpm_clk_freq { @@ -164,6 +167,8 @@ struct aie2_exec_msg_ops { void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); + int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); u32 (*get_chain_msg_op)(u32 cmd_op); }; @@ -197,6 +202,8 @@ struct amdxdna_dev_hdl { u32 hclk_freq; u32 max_tops; u32 curr_tops; + u32 force_preempt_enabled; + u32 frame_boundary_preempt; /* Mailbox and the management channel */ struct mailbox *mbox; @@ -223,6 +230,7 @@ struct aie2_hw_ops { enum aie2_fw_feature { AIE2_NPU_COMMAND, + AIE2_PREEMPT, AIE2_FEATURE_MAX }; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index d02fb32499fa..b6151244d64f 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -16,6 +16,8 @@ enum ert_cmd_opcode { ERT_START_CU = 0, ERT_CMD_CHAIN = 19, ERT_START_NPU = 20, + ERT_START_NPU_PREEMPT = 21, + ERT_START_NPU_PREEMPT_ELF = 22, ERT_INVALID_CMD = ~0U, }; @@ -55,6 +57,21 @@ struct amdxdna_cmd_chain { u64 data[] __counted_by(command_count); }; +/* + * Interpretation of the beginning of data payload for ERT_START_NPU_PREEMPT in + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. + */ +struct amdxdna_cmd_preempt_data { + u64 inst_buf; /* instruction buffer address */ + u64 save_buf; /* save buffer address */ + u64 restore_buf; /* restore buffer address */ + u32 inst_size; /* size of instruction buffer in bytes */ + u32 save_size; /* size of save buffer in bytes */ + u32 restore_size; /* size of restore buffer in bytes */ + u32 inst_prop_cnt; /* properties count */ + u32 prop_args[]; /* properties and regular kernel arguments */ +}; + /* Exec buffer command header format */ #define AMDXDNA_CMD_STATE GENMASK(3, 0) #define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10) diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 7590265d4485..1973ab67721b 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -31,9 +31,10 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); * 0.3: Support firmware debug buffer * 0.4: Support getting resource information * 0.5: Support getting telemetry data + * 0.6: Support preemption */ #define AMDXDNA_DRIVER_MAJOR 0 -#define AMDXDNA_DRIVER_MINOR 5 +#define AMDXDNA_DRIVER_MINOR 6 /* * Bind the driver base on (vendor_id, device_id) pair and later use the diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index d90777275a9f..986a5f28ba24 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -64,10 +64,13 @@ const struct rt_config npu4_default_rt_cfg[] = { { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */ + { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT }, + { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT }, { 0 }, }; @@ -85,6 +88,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = { const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { { .feature = AIE2_NPU_COMMAND, .min_minor = 15 }, + { .feature = AIE2_PREEMPT, .min_minor = 12 }, { 0 } }; diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index 8ad254bc35a5..62c917fd4f7b 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -443,7 +443,9 @@ enum amdxdna_drm_get_param { DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8, DRM_AMDXDNA_GET_POWER_MODE, DRM_AMDXDNA_QUERY_TELEMETRY, - DRM_AMDXDNA_QUERY_RESOURCE_INFO = 12, + DRM_AMDXDNA_GET_FORCE_PREEMPT_STATE, + DRM_AMDXDNA_QUERY_RESOURCE_INFO, + DRM_AMDXDNA_GET_FRAME_BOUNDARY_PREEMPT_STATE, }; /** @@ -462,6 +464,16 @@ struct amdxdna_drm_get_resource_info { __u64 npu_task_curr; }; +/** + * struct amdxdna_drm_attribute_state - State of an attribute + */ +struct amdxdna_drm_attribute_state { + /** @state: enabled or disabled */ + __u8 state; + /** @pad: MBZ */ + __u8 pad[7]; +}; + /** * struct amdxdna_drm_query_telemetry_header - Telemetry data header */ @@ -613,6 +625,8 @@ enum amdxdna_drm_set_param { DRM_AMDXDNA_SET_POWER_MODE, DRM_AMDXDNA_WRITE_AIE_MEM, DRM_AMDXDNA_WRITE_AIE_REG, + DRM_AMDXDNA_SET_FORCE_PREEMPT, + DRM_AMDXDNA_SET_FRAME_BOUNDARY_PREEMPT, }; /** -- cgit v1.2.3 From b6fa6100cec0287856ec1b363b0a962a9be90e6c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 30 Oct 2025 22:41:51 -0700 Subject: drm/panfrost: fix UAPI kernel-doc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix all kernel-doc warnings in include/uapi/drm/panfrost_drm.h. This mostly means modifying existing comments to conform to kernel-doc format, but there also some additions of missing kernel-doc comments and changing non-kernel-doc comments to use "/*" to begin them. Warning: panfrost_drm.h:83 struct member 'jc' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:83 struct member 'in_syncs' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:83 struct member 'in_sync_count' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:83 struct member 'out_sync' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:83 struct member 'bo_handles' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:83 struct member 'bo_handle_count' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:83 struct member 'requirements' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:83 struct member 'jm_ctx_handle' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:83 struct member 'pad' not described in 'drm_panfrost_submit' Warning: panfrost_drm.h:116 Incorrect use of kernel-doc format: * Returned offset for the BO in the GPU address space. This offset Warning: panfrost_drm.h:124 struct member 'size' not described in 'drm_panfrost_create_bo' Warning: panfrost_drm.h:124 struct member 'flags' not described in 'drm_panfrost_create_bo' Warning: panfrost_drm.h:124 struct member 'handle' not described in 'drm_panfrost_create_bo' Warning: panfrost_drm.h:124 struct member 'pad' not described in 'drm_panfrost_create_bo' Warning: panfrost_drm.h:124 struct member 'nonzero' not described in 'drm_panfrost_create_bo' Warning: panfrost_drm.h:143 struct member 'handle' not described in 'drm_panfrost_mmap_bo' Warning: panfrost_drm.h:143 struct member 'flags' not described in 'drm_panfrost_mmap_bo' Warning: panfrost_drm.h:143 struct member 'offset' not described in 'drm_panfrost_mmap_bo' Signed-off-by: Randy Dunlap Reviewed-by: Steven Price Reviewed-by: Adrián Larumbe Signed-off-by: Steven Price Link: https://patch.msgid.link/20251031054152.1406764-1-rdunlap@infradead.org --- include/uapi/drm/panfrost_drm.h | 118 ++++++++++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 36 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index e8b47c9f6976..1956431bb391 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -54,32 +54,46 @@ extern "C" { * This asks the kernel to have the GPU execute a render command list. */ struct drm_panfrost_submit { - - /** Address to GPU mapping of job descriptor */ + /** + * @jc: Address to GPU mapping of job descriptor + */ __u64 jc; - - /** An optional array of sync objects to wait on before starting this job. */ + /** + * @in_syncs: An optional array of sync objects to wait on + * before starting this job. + */ __u64 in_syncs; - - /** Number of sync objects to wait on before starting this job. */ + /** + * @in_sync_count: Number of sync objects to wait on before + * starting this job. + */ __u32 in_sync_count; - - /** An optional sync object to place the completion fence in. */ + /** + * @out_sync: An optional sync object to place the completion fence in. + */ __u32 out_sync; - - /** Pointer to a u32 array of the BOs that are referenced by the job. */ + /** + * @bo_handles: Pointer to a u32 array of the BOs that are + * referenced by the job. + */ __u64 bo_handles; - - /** Number of BO handles passed in (size is that times 4). */ + /** + * @bo_handle_count: Number of BO handles passed in (size is + * that times 4). + */ __u32 bo_handle_count; - - /** A combination of PANFROST_JD_REQ_* */ + /** + * @requirements: A combination of PANFROST_JD_REQ_* + */ __u32 requirements; - - /** JM context handle. Zero if you want to use the default context. */ + /** + * @jm_ctx_handle: JM context handle. Zero if you want to use the + * default context. + */ __u32 jm_ctx_handle; - - /** Padding field. MBZ. */ + /** + * @pad: Padding field. Must be zero. + */ __u32 pad; }; @@ -92,9 +106,18 @@ struct drm_panfrost_submit { * completed. */ struct drm_panfrost_wait_bo { + /** + * @handle: Handle for the object to wait for. + */ __u32 handle; + /** + * @pad: Padding, must be zero-filled. + */ __u32 pad; - __s64 timeout_ns; /* absolute */ + /** + * @timeout_ns: absolute number of nanoseconds to wait. + */ + __s64 timeout_ns; }; /* Valid flags to pass to drm_panfrost_create_bo */ @@ -107,16 +130,26 @@ struct drm_panfrost_wait_bo { * The flags argument is a bit mask of PANFROST_BO_* flags. */ struct drm_panfrost_create_bo { + /** + * @size: size of shmem/BO area to create (bytes) + */ __u32 size; + /** + * @flags: see PANFROST_BO_* flags + */ __u32 flags; - /** Returned GEM handle for the BO. */ + /** + * @handle: Returned GEM handle for the BO. + */ __u32 handle; - /* Pad, must be zero-filled. */ + /** + * @pad: Padding, must be zero-filled. + */ __u32 pad; /** - * Returned offset for the BO in the GPU address space. This offset - * is private to the DRM fd and is valid for the lifetime of the GEM - * handle. + * @offset: Returned offset for the BO in the GPU address space. + * This offset is private to the DRM fd and is valid for the + * lifetime of the GEM handle. * * This offset value will always be nonzero, since various HW * units treat 0 specially. @@ -136,10 +169,17 @@ struct drm_panfrost_create_bo { * used in a future extension. */ struct drm_panfrost_mmap_bo { - /** Handle for the object being mapped. */ + /** + * @handle: Handle for the object being mapped. + */ __u32 handle; + /** + * @flags: currently not used (should be zero) + */ __u32 flags; - /** offset into the drm node to use for subsequent mmap call. */ + /** + * @offset: offset into the drm node to use for subsequent mmap call. + */ __u64 offset; }; @@ -196,7 +236,7 @@ struct drm_panfrost_get_param { __u64 value; }; -/** +/* * Returns the offset for the BO in the GPU address space for this DRM fd. * This is the same value returned by drm_panfrost_create_bo, if that was called * from this DRM fd. @@ -244,12 +284,14 @@ struct drm_panfrost_madvise { * struct drm_panfrost_set_label_bo - ioctl argument for labelling Panfrost BOs. */ struct drm_panfrost_set_label_bo { - /** @handle: Handle of the buffer object to label. */ + /** + * @handle: Handle of the buffer object to label. + */ __u32 handle; - - /** @pad: MBZ. */ + /** + * @pad: Must be zero. + */ __u32 pad; - /** * @label: User pointer to a NUL-terminated string * @@ -330,10 +372,13 @@ enum drm_panfrost_jm_ctx_priority { }; struct drm_panfrost_jm_ctx_create { - /** @handle: Handle of the created JM context */ + /** + * @handle: Handle of the created JM context + */ __u32 handle; - - /** @priority: Context priority (see enum drm_panfrost_jm_ctx_priority). */ + /** + * @priority: Context priority (see enum drm_panfrost_jm_ctx_priority). + */ __u32 priority; }; @@ -344,8 +389,9 @@ struct drm_panfrost_jm_ctx_destroy { * Must be a valid context handle returned by DRM_IOCTL_PANTHOR_JM_CTX_CREATE. */ __u32 handle; - - /** @pad: Padding field, MBZ. */ + /** + * @pad: Padding field, must be zero. + */ __u32 pad; }; -- cgit v1.2.3 From cfc27680ee208cdf7a61cda817b4158c4142595f Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 14 Nov 2025 17:01:29 -0700 Subject: drm/colorop: Introduce new drm_colorop mode object This patches introduces a new drm_colorop mode object. This object represents color transformations and can be used to define color pipelines. We also introduce the drm_colorop_state here, as well as various helpers and state tracking bits. Reviewed-by: Simon Ser Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Reviewed-by: Daniel Stone Reviewed-by: Melissa Wen Reviewed-by: Sebastian Wick Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-5-alex.hung@amd.com --- drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/drm_atomic.c | 69 +++++++++++++++ drivers/gpu/drm/drm_atomic_helper.c | 12 +++ drivers/gpu/drm/drm_atomic_uapi.c | 45 ++++++++++ drivers/gpu/drm/drm_colorop.c | 103 ++++++++++++++++++++++ drivers/gpu/drm/drm_mode_config.c | 7 ++ include/drm/drm_atomic.h | 70 +++++++++++++++ include/drm/drm_atomic_uapi.h | 1 + include/drm/drm_colorop.h | 169 ++++++++++++++++++++++++++++++++++++ include/drm/drm_mode_config.h | 18 ++++ include/drm/drm_plane.h | 8 ++ include/uapi/drm/drm_mode.h | 1 + 12 files changed, 504 insertions(+) create mode 100644 drivers/gpu/drm/drm_colorop.c create mode 100644 include/drm/drm_colorop.h (limited to 'include/uapi') diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 9901534948e5..3d40442d1854 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -41,6 +41,7 @@ drm-y := \ drm_bridge.o \ drm_cache.o \ drm_color_mgmt.o \ + drm_colorop.o \ drm_connector.o \ drm_crtc.o \ drm_displayid.o \ diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index e05820b18832..6438a3938032 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "drm_crtc_internal.h" #include "drm_internal.h" @@ -107,6 +108,7 @@ void drm_atomic_state_default_release(struct drm_atomic_state *state) kfree(state->connectors); kfree(state->crtcs); kfree(state->planes); + kfree(state->colorops); kfree(state->private_objs); } EXPORT_SYMBOL(drm_atomic_state_default_release); @@ -138,6 +140,10 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) sizeof(*state->planes), GFP_KERNEL); if (!state->planes) goto fail; + state->colorops = kcalloc(dev->mode_config.num_colorop, + sizeof(*state->colorops), GFP_KERNEL); + if (!state->colorops) + goto fail; /* * Because drm_atomic_state can be committed asynchronously we need our @@ -251,6 +257,20 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->planes[i].new_state = NULL; } + for (i = 0; i < config->num_colorop; i++) { + struct drm_colorop *colorop = state->colorops[i].ptr; + + if (!colorop) + continue; + + drm_colorop_atomic_destroy_state(colorop, + state->colorops[i].state); + state->colorops[i].ptr = NULL; + state->colorops[i].state = NULL; + state->colorops[i].old_state = NULL; + state->colorops[i].new_state = NULL; + } + for (i = 0; i < state->num_private_objs; i++) { struct drm_private_obj *obj = state->private_objs[i].ptr; @@ -572,6 +592,55 @@ drm_atomic_get_plane_state(struct drm_atomic_state *state, } EXPORT_SYMBOL(drm_atomic_get_plane_state); +/** + * drm_atomic_get_colorop_state - get colorop state + * @state: global atomic state object + * @colorop: colorop to get state object for + * + * This function returns the colorop state for the given colorop, allocating it + * if needed. It will also grab the relevant plane lock to make sure that the + * state is consistent. + * + * Returns: + * + * Either the allocated state or the error code encoded into the pointer. When + * the error is EDEADLK then the w/w mutex code has detected a deadlock and the + * entire atomic sequence must be restarted. All other errors are fatal. + */ +struct drm_colorop_state * +drm_atomic_get_colorop_state(struct drm_atomic_state *state, + struct drm_colorop *colorop) +{ + int ret, index = drm_colorop_index(colorop); + struct drm_colorop_state *colorop_state; + + WARN_ON(!state->acquire_ctx); + + colorop_state = drm_atomic_get_new_colorop_state(state, colorop); + if (colorop_state) + return colorop_state; + + ret = drm_modeset_lock(&colorop->plane->mutex, state->acquire_ctx); + if (ret) + return ERR_PTR(ret); + + colorop_state = drm_atomic_helper_colorop_duplicate_state(colorop); + if (!colorop_state) + return ERR_PTR(-ENOMEM); + + state->colorops[index].state = colorop_state; + state->colorops[index].ptr = colorop; + state->colorops[index].old_state = colorop->state; + state->colorops[index].new_state = colorop_state; + colorop_state->state = state; + + drm_dbg_atomic(colorop->dev, "Added [COLOROP:%d] %p state to %p\n", + colorop->base.id, colorop_state, state); + + return colorop_state; +} +EXPORT_SYMBOL(drm_atomic_get_colorop_state); + static bool plane_switching_crtc(const struct drm_plane_state *old_plane_state, const struct drm_plane_state *new_plane_state) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index e641fcf8c568..10adac9397cf 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -3184,6 +3184,8 @@ int drm_atomic_helper_swap_state(struct drm_atomic_state *state, struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct drm_plane *plane; struct drm_plane_state *old_plane_state, *new_plane_state; + struct drm_colorop *colorop; + struct drm_colorop_state *old_colorop_state, *new_colorop_state; struct drm_crtc_commit *commit; struct drm_private_obj *obj; struct drm_private_state *old_obj_state, *new_obj_state; @@ -3261,6 +3263,16 @@ int drm_atomic_helper_swap_state(struct drm_atomic_state *state, } } + for_each_oldnew_colorop_in_state(state, colorop, old_colorop_state, new_colorop_state, i) { + WARN_ON(colorop->state != old_colorop_state); + + old_colorop_state->state = state; + new_colorop_state->state = NULL; + + state->colorops[i].state = old_colorop_state; + colorop->state = new_colorop_state; + } + drm_panic_lock(state->dev, flags); for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { WARN_ON(plane->state != old_plane_state); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index b2cb5ae5a139..148f11895b9e 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -648,6 +649,26 @@ drm_atomic_plane_get_property(struct drm_plane *plane, return 0; } +static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, + struct drm_colorop_state *state, + struct drm_file *file_priv, + struct drm_property *property, + uint64_t val) +{ + drm_dbg_atomic(colorop->dev, + "[COLOROP:%d] unknown property [PROP:%d:%s]]\n", + colorop->base.id, property->base.id, property->name); + return -EINVAL; +} + +static int +drm_atomic_colorop_get_property(struct drm_colorop *colorop, + const struct drm_colorop_state *state, + struct drm_property *property, uint64_t *val) +{ + return -EINVAL; +} + static int drm_atomic_set_writeback_fb_for_connector( struct drm_connector_state *conn_state, struct drm_framebuffer *fb) @@ -914,6 +935,15 @@ int drm_atomic_get_property(struct drm_mode_object *obj, plane->state, property, val); break; } + case DRM_MODE_OBJECT_COLOROP: { + struct drm_colorop *colorop = obj_to_colorop(obj); + + if (colorop->plane) + WARN_ON(!drm_modeset_is_locked(&colorop->plane->mutex)); + + ret = drm_atomic_colorop_get_property(colorop, colorop->state, property, val); + break; + } default: drm_dbg_atomic(dev, "[OBJECT:%d] has no properties\n", obj->id); ret = -EINVAL; @@ -1111,6 +1141,21 @@ int drm_atomic_set_property(struct drm_atomic_state *state, ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); + + break; + } + case DRM_MODE_OBJECT_COLOROP: { + struct drm_colorop *colorop = obj_to_colorop(obj); + struct drm_colorop_state *colorop_state; + + colorop_state = drm_atomic_get_colorop_state(state, colorop); + if (IS_ERR(colorop_state)) { + ret = PTR_ERR(colorop_state); + break; + } + + ret = drm_atomic_colorop_set_property(colorop, colorop_state, + file_priv, prop, prop_value); break; } default: diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c new file mode 100644 index 000000000000..59af7ac888d6 --- /dev/null +++ b/drivers/gpu/drm/drm_colorop.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include +#include +#include +#include + +#include "drm_crtc_internal.h" + +static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop, + struct drm_colorop_state *state) +{ + memcpy(state, colorop->state, sizeof(*state)); +} + +struct drm_colorop_state * +drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop) +{ + struct drm_colorop_state *state; + + if (WARN_ON(!colorop->state)) + return NULL; + + state = kmalloc(sizeof(*state), GFP_KERNEL); + if (state) + __drm_atomic_helper_colorop_duplicate_state(colorop, state); + + return state; +} + +void drm_colorop_atomic_destroy_state(struct drm_colorop *colorop, + struct drm_colorop_state *state) +{ + kfree(state); +} + +/** + * __drm_colorop_state_reset - resets colorop state to default values + * @colorop_state: atomic colorop state, must not be NULL + * @colorop: colorop object, must not be NULL + * + * Initializes the newly allocated @colorop_state with default + * values. This is useful for drivers that subclass the CRTC state. + */ +static void __drm_colorop_state_reset(struct drm_colorop_state *colorop_state, + struct drm_colorop *colorop) +{ + colorop_state->colorop = colorop; +} + +/** + * __drm_colorop_reset - reset state on colorop + * @colorop: drm colorop + * @colorop_state: colorop state to assign + * + * Initializes the newly allocated @colorop_state and assigns it to + * the &drm_crtc->state pointer of @colorop, usually required when + * initializing the drivers or when called from the &drm_colorop_funcs.reset + * hook. + * + * This is useful for drivers that subclass the colorop state. + */ +static void __drm_colorop_reset(struct drm_colorop *colorop, + struct drm_colorop_state *colorop_state) +{ + if (colorop_state) + __drm_colorop_state_reset(colorop_state, colorop); + + colorop->state = colorop_state; +} + +void drm_colorop_reset(struct drm_colorop *colorop) +{ + kfree(colorop->state); + colorop->state = kzalloc(sizeof(*colorop->state), GFP_KERNEL); + + if (colorop->state) + __drm_colorop_reset(colorop, colorop->state); +} diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index 25f376869b3a..d12db9b0bab8 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "drm_crtc_internal.h" @@ -192,11 +193,15 @@ int drm_mode_getresources(struct drm_device *dev, void *data, void drm_mode_config_reset(struct drm_device *dev) { struct drm_crtc *crtc; + struct drm_colorop *colorop; struct drm_plane *plane; struct drm_encoder *encoder; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; + drm_for_each_colorop(colorop, dev) + drm_colorop_reset(colorop); + drm_for_each_plane(plane, dev) if (plane->funcs->reset) plane->funcs->reset(plane); @@ -437,6 +442,7 @@ int drmm_mode_config_init(struct drm_device *dev) INIT_LIST_HEAD(&dev->mode_config.property_list); INIT_LIST_HEAD(&dev->mode_config.property_blob_list); INIT_LIST_HEAD(&dev->mode_config.plane_list); + INIT_LIST_HEAD(&dev->mode_config.colorop_list); INIT_LIST_HEAD(&dev->mode_config.privobj_list); idr_init_base(&dev->mode_config.object_idr, 1); idr_init_base(&dev->mode_config.tile_idr, 1); @@ -458,6 +464,7 @@ int drmm_mode_config_init(struct drm_device *dev) dev->mode_config.num_crtc = 0; dev->mode_config.num_encoder = 0; dev->mode_config.num_total_plane = 0; + dev->mode_config.num_colorop = 0; if (IS_ENABLED(CONFIG_LOCKDEP)) { struct drm_modeset_acquire_ctx modeset_ctx; diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 2e433d44658d..895529337d7e 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -30,6 +30,7 @@ #include #include +#include /** * struct drm_crtc_commit - track modeset commits on a CRTC @@ -157,6 +158,11 @@ struct drm_crtc_commit { bool abort_completion; }; +struct __drm_colorops_state { + struct drm_colorop *ptr; + struct drm_colorop_state *state, *old_state, *new_state; +}; + struct __drm_planes_state { struct drm_plane *ptr; @@ -531,6 +537,14 @@ struct drm_atomic_state { */ bool checked : 1; + /** + * @colorops: + * + * Pointer to array of @drm_colorop and @drm_colorop_state part of this + * update. + */ + struct __drm_colorops_state *colorops; + /** * @planes: * @@ -672,6 +686,9 @@ drm_atomic_get_crtc_state(struct drm_atomic_state *state, struct drm_plane_state * __must_check drm_atomic_get_plane_state(struct drm_atomic_state *state, struct drm_plane *plane); +struct drm_colorop_state * +drm_atomic_get_colorop_state(struct drm_atomic_state *state, + struct drm_colorop *colorop); struct drm_connector_state * __must_check drm_atomic_get_connector_state(struct drm_atomic_state *state, struct drm_connector *connector); @@ -768,6 +785,36 @@ drm_atomic_get_new_plane_state(const struct drm_atomic_state *state, return state->planes[drm_plane_index(plane)].new_state; } +/** + * drm_atomic_get_old_colorop_state - get colorop state, if it exists + * @state: global atomic state object + * @colorop: colorop to grab + * + * This function returns the old colorop state for the given colorop, or + * NULL if the colorop is not part of the global atomic state. + */ +static inline struct drm_colorop_state * +drm_atomic_get_old_colorop_state(struct drm_atomic_state *state, + struct drm_colorop *colorop) +{ + return state->colorops[drm_colorop_index(colorop)].old_state; +} + +/** + * drm_atomic_get_new_colorop_state - get colorop state, if it exists + * @state: global atomic state object + * @colorop: colorop to grab + * + * This function returns the new colorop state for the given colorop, or + * NULL if the colorop is not part of the global atomic state. + */ +static inline struct drm_colorop_state * +drm_atomic_get_new_colorop_state(struct drm_atomic_state *state, + struct drm_colorop *colorop) +{ + return state->colorops[drm_colorop_index(colorop)].new_state; +} + /** * drm_atomic_get_old_connector_state - get connector state, if it exists * @state: global atomic state object @@ -998,6 +1045,29 @@ void drm_state_dump(struct drm_device *dev, struct drm_printer *p); (new_crtc_state) = (__state)->crtcs[__i].new_state, \ (void)(new_crtc_state) /* Only to avoid unused-but-set-variable warning */, 1)) +/** + * for_each_oldnew_colorop_in_state - iterate over all colorops in an atomic update + * @__state: &struct drm_atomic_state pointer + * @colorop: &struct drm_colorop iteration cursor + * @old_colorop_state: &struct drm_colorop_state iteration cursor for the old state + * @new_colorop_state: &struct drm_colorop_state iteration cursor for the new state + * @__i: int iteration cursor, for macro-internal use + * + * This iterates over all colorops in an atomic update, tracking both old and + * new state. This is useful in places where the state delta needs to be + * considered, for example in atomic check functions. + */ +#define for_each_oldnew_colorop_in_state(__state, colorop, old_colorop_state, \ + new_colorop_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->dev->mode_config.num_colorop; \ + (__i)++) \ + for_each_if ((__state)->colorops[__i].ptr && \ + ((colorop) = (__state)->colorops[__i].ptr, \ + (void)(colorop) /* Only to avoid unused-but-set-variable warning */, \ + (old_colorop_state) = (__state)->colorops[__i].old_state,\ + (new_colorop_state) = (__state)->colorops[__i].new_state, 1)) + /** * for_each_oldnew_plane_in_state - iterate over all planes in an atomic update * @__state: &struct drm_atomic_state pointer diff --git a/include/drm/drm_atomic_uapi.h b/include/drm/drm_atomic_uapi.h index 4c6d39d7bdb2..70a115d523cd 100644 --- a/include/drm/drm_atomic_uapi.h +++ b/include/drm/drm_atomic_uapi.h @@ -37,6 +37,7 @@ struct drm_crtc; struct drm_connector_state; struct dma_fence; struct drm_framebuffer; +struct drm_colorop; int __must_check drm_atomic_set_mode_for_crtc(struct drm_crtc_state *state, diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h new file mode 100644 index 000000000000..28bb7091ef1f --- /dev/null +++ b/include/drm/drm_colorop.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DRM_COLOROP_H__ +#define __DRM_COLOROP_H__ + +#include +#include +#include + +/** + * struct drm_colorop_state - mutable colorop state + */ +struct drm_colorop_state { + /** @colorop: backpointer to the colorop */ + struct drm_colorop *colorop; + + /* + * Color properties + * + * The following fields are not always valid, their usage depends + * on the colorop type. See their associated comment for more + * information. + */ + + /** @state: backpointer to global drm_atomic_state */ + struct drm_atomic_state *state; +}; + +/** + * struct drm_colorop - DRM color operation control structure + * + * A colorop represents one color operation. They can be chained via + * the 'next' pointer to build a color pipeline. + * + * Since colorops cannot stand-alone and are used to describe colorop + * operations on a plane they don't have their own locking mechanism but + * are locked and programmed along with their associated &drm_plane. + * + */ +struct drm_colorop { + /** @dev: parent DRM device */ + struct drm_device *dev; + + /** + * @head: + * + * List of all colorops on @dev, linked from &drm_mode_config.colorop_list. + * Invariant over the lifetime of @dev and therefore does not need + * locking. + */ + struct list_head head; + + /** + * @index: Position inside the mode_config.list, can be used as an array + * index. It is invariant over the lifetime of the colorop. + */ + unsigned int index; + + /** @base: base mode object */ + struct drm_mode_object base; + + /** + * @plane: + * + * The plane on which the colorop sits. A drm_colorop is always unique + * to a plane. + */ + struct drm_plane *plane; + + /** + * @state: + * + * Current atomic state for this colorop. + * + * This is protected by @mutex. Note that nonblocking atomic commits + * access the current colorop state without taking locks. + */ + struct drm_colorop_state *state; + + /* + * Color properties + * + * The following fields are not always valid, their usage depends + * on the colorop type. See their associated comment for more + * information. + */ + + /** @properties: property tracking for this colorop */ + struct drm_object_properties properties; + +}; + +#define obj_to_colorop(x) container_of(x, struct drm_colorop, base) + +/** + * drm_colorop_find - look up a Colorop object from its ID + * @dev: DRM device + * @file_priv: drm file to check for lease against. + * @id: &drm_mode_object ID + * + * This can be used to look up a Colorop from its userspace ID. Only used by + * drivers for legacy IOCTLs and interface, nowadays extensions to the KMS + * userspace interface should be done using &drm_property. + */ +static inline struct drm_colorop *drm_colorop_find(struct drm_device *dev, + struct drm_file *file_priv, + uint32_t id) +{ + struct drm_mode_object *mo; + + mo = drm_mode_object_find(dev, file_priv, id, DRM_MODE_OBJECT_COLOROP); + return mo ? obj_to_colorop(mo) : NULL; +} + +struct drm_colorop_state * +drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop); + +void drm_colorop_atomic_destroy_state(struct drm_colorop *colorop, + struct drm_colorop_state *state); + +/** + * drm_colorop_reset - reset colorop atomic state + * @colorop: drm colorop + * + * Resets the atomic state for @colorop by freeing the state pointer (which might + * be NULL, e.g. at driver load time) and allocating a new empty state object. + */ +void drm_colorop_reset(struct drm_colorop *colorop); + +/** + * drm_colorop_index - find the index of a registered colorop + * @colorop: colorop to find index for + * + * Given a registered colorop, return the index of that colorop within a DRM + * device's list of colorops. + */ +static inline unsigned int drm_colorop_index(const struct drm_colorop *colorop) +{ + return colorop->index; +} + +#define drm_for_each_colorop(colorop, dev) \ + list_for_each_entry(colorop, &(dev)->mode_config.colorop_list, head) + +#endif /* __DRM_COLOROP_H__ */ diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 2e848b816218..895fb820dba0 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -500,6 +500,24 @@ struct drm_mode_config { */ struct raw_spinlock panic_lock; + /** + * @num_colorop: + * + * Number of colorop objects on this device. + * This is invariant over the lifetime of a device and hence doesn't + * need any locks. + */ + int num_colorop; + + /** + * @colorop_list: + * + * List of colorop objects linked with &drm_colorop.head. This is + * invariant over the lifetime of a device and hence doesn't need any + * locks. + */ + struct list_head colorop_list; + /** * @num_crtc: * diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 01479dd94e76..61fedd4e253c 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -243,6 +243,14 @@ struct drm_plane_state { */ enum drm_scaling_filter scaling_filter; + /** + * @color_pipeline: + * + * The first colorop of the active color pipeline, or NULL, if no + * color pipeline is active. + */ + struct drm_colorop *color_pipeline; + /** * @commit: Tracks the pending commit to prevent use-after-free conditions, * and for async plane updates. diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 1e0e02a79b5c..ec27125c2928 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -629,6 +629,7 @@ struct drm_mode_connector_set_property { #define DRM_MODE_OBJECT_FB 0xfbfbfbfb #define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb #define DRM_MODE_OBJECT_PLANE 0xeeeeeeee +#define DRM_MODE_OBJECT_COLOROP 0xfafafafa #define DRM_MODE_OBJECT_ANY 0 struct drm_mode_obj_get_properties { -- cgit v1.2.3 From 84423e561208054f872b3ca66e3e99a10d06c0ac Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 14 Nov 2025 17:01:30 -0700 Subject: drm/colorop: Add TYPE property Add a read-only TYPE property. The TYPE specifies the colorop type, such as enumerated curve, 1D LUT, CTM, 3D LUT, PWL LUT, etc. For now we're only introducing an enumerated 1D LUT type to illustrate the concept. Reviewed-by: Simon Ser Reviewed-by: Louis Chauvet Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Reviewed-by: Daniel Stone Reviewed-by: Melissa Wen Reviewed-by: Sebastian Wick Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-6-alex.hung@amd.com --- drivers/gpu/drm/drm_atomic.c | 4 ++-- drivers/gpu/drm/drm_atomic_uapi.c | 7 ++++++- drivers/gpu/drm/drm_colorop.c | 12 ++++++++++++ include/drm/drm_colorop.h | 24 ++++++++++++++++++++++++ include/uapi/drm/drm_mode.h | 19 +++++++++++++++++++ 5 files changed, 63 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 6438a3938032..99545184960f 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -634,8 +634,8 @@ drm_atomic_get_colorop_state(struct drm_atomic_state *state, state->colorops[index].new_state = colorop_state; colorop_state->state = state; - drm_dbg_atomic(colorop->dev, "Added [COLOROP:%d] %p state to %p\n", - colorop->base.id, colorop_state, state); + drm_dbg_atomic(colorop->dev, "Added [COLOROP:%d:%d] %p state to %p\n", + colorop->base.id, colorop->type, colorop_state, state); return colorop_state; } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 148f11895b9e..55b3046c5f1c 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -666,7 +666,12 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, const struct drm_colorop_state *state, struct drm_property *property, uint64_t *val) { - return -EINVAL; + if (property == colorop->type_property) + *val = colorop->type; + else + return -EINVAL; + + return 0; } static int drm_atomic_set_writeback_fb_for_connector( diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 59af7ac888d6..d0e839a1df7c 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -101,3 +101,15 @@ void drm_colorop_reset(struct drm_colorop *colorop) if (colorop->state) __drm_colorop_reset(colorop, colorop->state); } + +static const char * const colorop_type_name[] = { + [DRM_COLOROP_1D_CURVE] = "1D Curve", +}; + +const char *drm_get_colorop_type_name(enum drm_colorop_type type) +{ + if (WARN_ON(type >= ARRAY_SIZE(colorop_type_name))) + return "unknown"; + + return colorop_type_name[type]; +} diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 28bb7091ef1f..0bda70f4e82a 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -112,6 +112,21 @@ struct drm_colorop { /** @properties: property tracking for this colorop */ struct drm_object_properties properties; + /** + * @type: + * + * Read-only + * Type of color operation + */ + enum drm_colorop_type type; + + /** + * @type_property: + * + * Read-only "TYPE" property for specifying the type of + * this color operation. The type is enum drm_colorop_type. + */ + struct drm_property *type_property; }; #define obj_to_colorop(x) container_of(x, struct drm_colorop, base) @@ -166,4 +181,13 @@ static inline unsigned int drm_colorop_index(const struct drm_colorop *colorop) #define drm_for_each_colorop(colorop, dev) \ list_for_each_entry(colorop, &(dev)->mode_config.colorop_list, head) +/** + * drm_get_colorop_type_name - return a string for colorop type + * @type: colorop type to compute name of + * + * In contrast to the other drm_get_*_name functions this one here returns a + * const pointer and hence is threadsafe. + */ +const char *drm_get_colorop_type_name(enum drm_colorop_type type); + #endif /* __DRM_COLOROP_H__ */ diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index ec27125c2928..c419f93eb94d 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -858,6 +858,25 @@ struct drm_color_lut { __u16 reserved; }; +/** + * enum drm_colorop_type - Type of color operation + * + * drm_colorops can be of many different types. Each type behaves differently + * and defines a different set of properties. This enum defines all types and + * gives a high-level description. + */ +enum drm_colorop_type { + /** + * @DRM_COLOROP_1D_CURVE: + * + * enum string "1D Curve" + * + * A 1D curve that is being applied to all color channels. The + * curve is specified via the CURVE_1D_TYPE colorop property. + */ + DRM_COLOROP_1D_CURVE +}; + /** * struct drm_plane_size_hint - Plane size hints * @width: The width of the plane in pixel -- cgit v1.2.3 From 179ab8e7d7b378f1cd3ff10113458133b73dc52e Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 14 Nov 2025 17:01:36 -0700 Subject: drm/colorop: Introduce DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE With the introduction of the pre-blending color pipeline we can no longer have color operations that don't have a clear position in the color pipeline. We deprecate all existing plane properties. For upstream drivers those are: - COLOR_ENCODING - COLOR_RANGE Drivers are expected to ignore these properties when programming the HW. DRM clients that register with DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE will not be allowed to set the COLOR_ENCODING and COLOR_RANGE properties. Setting of the COLOR_PIPELINE plane property or drm_colorop properties is only allowed for userspace that sets this client cap. Reviewed-by: Simon Ser Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Reviewed-by: Daniel Stone Reviewed-by: Melissa Wen Reviewed-by: Sebastian Wick Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-12-alex.hung@amd.com --- drivers/gpu/drm/drm_connector.c | 1 + drivers/gpu/drm/drm_crtc_internal.h | 1 + drivers/gpu/drm/drm_ioctl.c | 7 +++++++ drivers/gpu/drm/drm_mode_object.c | 18 ++++++++++++++++++ include/drm/drm_file.h | 7 +++++++ include/uapi/drm/drm.h | 15 +++++++++++++++ 6 files changed, 49 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 272d6254ea47..4d6dc9ebfdb5 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -3439,6 +3439,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, * properties reflect the latest status. */ ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic, + file_priv->plane_color_pipeline, (uint32_t __user *)(unsigned long)(out_resp->props_ptr), (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr), &out_resp->count_props); diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 89706aa8232f..c09409229644 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -163,6 +163,7 @@ struct drm_mode_object *__drm_mode_object_find(struct drm_device *dev, void drm_mode_object_unregister(struct drm_device *dev, struct drm_mode_object *object); int drm_mode_object_get_properties(struct drm_mode_object *obj, bool atomic, + bool plane_color_pipeline, uint32_t __user *prop_ptr, uint64_t __user *prop_values, uint32_t *arg_count_props); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index d8a24875a7ba..ff193155129e 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -373,6 +373,13 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) return -EINVAL; file_priv->supports_virtualized_cursor_plane = req->value; break; + case DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE: + if (!file_priv->atomic) + return -EINVAL; + if (req->value > 1) + return -EINVAL; + file_priv->plane_color_pipeline = req->value; + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index e943205a2394..b45d501b10c8 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "drm_crtc_internal.h" @@ -386,6 +387,7 @@ EXPORT_SYMBOL(drm_object_property_get_default_value); /* helper for getconnector and getproperties ioctls */ int drm_mode_object_get_properties(struct drm_mode_object *obj, bool atomic, + bool plane_color_pipeline, uint32_t __user *prop_ptr, uint64_t __user *prop_values, uint32_t *arg_count_props) @@ -399,6 +401,21 @@ int drm_mode_object_get_properties(struct drm_mode_object *obj, bool atomic, if ((prop->flags & DRM_MODE_PROP_ATOMIC) && !atomic) continue; + if (plane_color_pipeline && obj->type == DRM_MODE_OBJECT_PLANE) { + struct drm_plane *plane = obj_to_plane(obj); + + if (prop == plane->color_encoding_property || + prop == plane->color_range_property) + continue; + } + + if (!plane_color_pipeline && obj->type == DRM_MODE_OBJECT_PLANE) { + struct drm_plane *plane = obj_to_plane(obj); + + if (prop == plane->color_pipeline_property) + continue; + } + if (*arg_count_props > count) { ret = __drm_object_property_get_value(obj, prop, &val); if (ret) @@ -457,6 +474,7 @@ int drm_mode_obj_get_properties_ioctl(struct drm_device *dev, void *data, } ret = drm_mode_object_get_properties(obj, file_priv->atomic, + file_priv->plane_color_pipeline, (uint32_t __user *)(unsigned long)(arg->props_ptr), (uint64_t __user *)(unsigned long)(arg->prop_values_ptr), &arg->count_props); diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 115763799625..1a3018e4a537 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -206,6 +206,13 @@ struct drm_file { */ bool writeback_connectors; + /** + * @plane_color_pipeline: + * + * True if client understands plane color pipelines + */ + bool plane_color_pipeline; + /** * @was_master: * diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 3cd5cf15e3c9..27cc159c1d27 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -906,6 +906,21 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 +/** + * DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE + * + * If set to 1 the DRM core will allow setting the COLOR_PIPELINE + * property on a &drm_plane, as well as drm_colorop properties. + * + * Setting of these plane properties will be rejected when this client + * cap is set: + * - COLOR_ENCODING + * - COLOR_RANGE + * + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. + */ +#define DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE 7 + /* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; -- cgit v1.2.3 From e5719e7f19009d4fbedf685fc22eec9cd8de154f Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 14 Nov 2025 17:01:43 -0700 Subject: drm/colorop: Add 3x4 CTM type This type is used to support a 3x4 matrix in colorops. A 3x4 matrix uses the last column as a "bias" column. Some HW exposes support for 3x4. The calculation looks like: out matrix in |R| |0 1 2 3 | | R | |G| = |4 5 6 7 | x | G | |B| |8 9 10 11| | B | |1.0| This is also the first colorop where we need a blob property to program the property. For that we'll introduce a new DATA property that can be used by all colorop TYPEs requiring a blob. The way a DATA blob is read depends on the TYPE of the colorop. We only create the DATA property for property types that need it. Reviewed-by: Simon Ser Reviewed-by: Louis Chauvet Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Reviewed-by: Daniel Stone Reviewed-by: Melissa Wen Reviewed-by: Sebastian Wick Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-19-alex.hung@amd.com --- drivers/gpu/drm/drm_atomic.c | 3 +++ drivers/gpu/drm/drm_atomic_uapi.c | 31 ++++++++++++++++++++++++++ drivers/gpu/drm/drm_colorop.c | 47 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_colorop.h | 24 ++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 9 -------- include/uapi/drm/drm_mode.h | 32 +++++++++++++++++++++++++- 6 files changed, 136 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 95111f9a8635..60b3a069f4dd 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -794,6 +794,9 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, drm_printf(p, "\tcurve_1d_type=%s\n", drm_get_colorop_curve_1d_type_name(state->curve_1d_type)); break; + case DRM_COLOROP_CTM_3X4: + drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); + break; default: break; } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 608b0b571c2e..392198aae072 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -689,6 +689,32 @@ drm_atomic_plane_get_property(struct drm_plane *plane, return 0; } +static int drm_atomic_color_set_data_property(struct drm_colorop *colorop, + struct drm_colorop_state *state, + struct drm_property *property, + uint64_t val) +{ + ssize_t elem_size = -1; + ssize_t size = -1; + bool replaced = false; + + switch (colorop->type) { + case DRM_COLOROP_CTM_3X4: + size = sizeof(struct drm_color_ctm_3x4); + break; + default: + /* should never get here */ + return -EINVAL; + } + + return drm_property_replace_blob_from_id(colorop->dev, + &state->data, + val, + size, + elem_size, + &replaced); +} + static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, struct drm_colorop_state *state, struct drm_file *file_priv, @@ -699,6 +725,9 @@ static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, state->bypass = val; } else if (property == colorop->curve_1d_type_property) { state->curve_1d_type = val; + } else if (property == colorop->data_property) { + return drm_atomic_color_set_data_property(colorop, state, + property, val); } else { drm_dbg_atomic(colorop->dev, "[COLOROP:%d:%d] unknown property [PROP:%d:%s]\n", @@ -721,6 +750,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, *val = state->bypass; else if (property == colorop->curve_1d_type_property) *val = state->curve_1d_type; + else if (property == colorop->data_property) + *val = (state->data) ? state->data->base.id : 0; else return -EINVAL; diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index b75ad8544315..c68b85a7b261 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -64,6 +64,7 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { { DRM_COLOROP_1D_CURVE, "1D Curve" }, + { DRM_COLOROP_CTM_3X4, "3x4 Matrix"}, }; static const char * const colorop_curve_1d_type_names[] = { @@ -147,6 +148,11 @@ void drm_colorop_cleanup(struct drm_colorop *colorop) list_del(&colorop->head); config->num_colorop--; + if (colorop->state && colorop->state->data) { + drm_property_blob_put(colorop->state->data); + colorop->state->data = NULL; + } + kfree(colorop->state); } EXPORT_SYMBOL(drm_colorop_cleanup); @@ -236,11 +242,51 @@ int drm_plane_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop * } EXPORT_SYMBOL(drm_plane_colorop_curve_1d_init); +static int drm_colorop_create_data_prop(struct drm_device *dev, struct drm_colorop *colorop) +{ + struct drm_property *prop; + + /* data */ + prop = drm_property_create(dev, DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB, + "DATA", 0); + if (!prop) + return -ENOMEM; + + colorop->data_property = prop; + drm_object_attach_property(&colorop->base, + colorop->data_property, + 0); + + return 0; +} + +int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane) +{ + int ret; + + ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_CTM_3X4); + if (ret) + return ret; + + ret = drm_colorop_create_data_prop(dev, colorop); + if (ret) + return ret; + + drm_colorop_reset(colorop); + + return 0; +} +EXPORT_SYMBOL(drm_plane_colorop_ctm_3x4_init); + static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop, struct drm_colorop_state *state) { memcpy(state, colorop->state, sizeof(*state)); + if (state->data) + drm_property_blob_get(state->data); + state->bypass = true; } @@ -321,6 +367,7 @@ void drm_colorop_reset(struct drm_colorop *colorop) static const char * const colorop_type_name[] = { [DRM_COLOROP_1D_CURVE] = "1D Curve", + [DRM_COLOROP_CTM_3X4] = "3x4 Matrix", }; const char *drm_get_colorop_type_name(enum drm_colorop_type type) diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index ba03b35454da..8b5f8aaac2f4 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -97,6 +97,17 @@ struct drm_colorop_state { */ enum drm_colorop_curve_1d_type curve_1d_type; + /** + * @data: + * + * Data blob for any TYPE that requires such a blob. The + * interpretation of the blob is TYPE-specific. + * + * See the &drm_colorop_type documentation for how blob is laid + * out. + */ + struct drm_property_blob *data; + /** @state: backpointer to global drm_atomic_state */ struct drm_atomic_state *state; }; @@ -206,6 +217,17 @@ struct drm_colorop { */ struct drm_property *curve_1d_type_property; + /** + * @data_property: + * + * blob property for any TYPE that requires a blob of data, + * such as 1DLUT, CTM, 3DLUT, etc. + * + * The way this blob is interpreted depends on the TYPE of + * this + */ + struct drm_property *data_property; + /** * @next_property: * @@ -242,6 +264,8 @@ void drm_colorop_cleanup(struct drm_colorop *colorop); int drm_plane_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane, u64 supported_tfs); +int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane); struct drm_colorop_state * drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 406a42be429b..f80aa4c9d88f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1656,15 +1656,6 @@ struct drm_amdgpu_info_uq_metadata { #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ #define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ -/* FIXME wrong namespace! */ -struct drm_color_ctm_3x4 { - /* - * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude - * (not two's complement!) format. - */ - __u64 matrix[12]; -}; - #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index c419f93eb94d..054561022953 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -847,6 +847,20 @@ struct drm_color_ctm { __u64 matrix[9]; }; +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + * + * out matrix in + * |R| |0 1 2 3 | | R | + * |G| = |4 5 6 7 | x | G | + * |B| |8 9 10 11| | B | + * |1.0| + */ + __u64 matrix[12]; +}; + struct drm_color_lut { /* * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and @@ -874,7 +888,23 @@ enum drm_colorop_type { * A 1D curve that is being applied to all color channels. The * curve is specified via the CURVE_1D_TYPE colorop property. */ - DRM_COLOROP_1D_CURVE + DRM_COLOROP_1D_CURVE, + + /** + * @DRM_COLOROP_CTM_3X4: + * + * enum string "3x4 Matrix" + * + * A 3x4 matrix. Its values are specified via the + * &drm_color_ctm_3x4 struct provided via the DATA property. + * + * The DATA blob is a float[12]: + * out matrix in + * | R | | 0 1 2 3 | | R | + * | G | = | 4 5 6 7 | x | G | + * | B | | 8 9 10 12 | | B | + */ + DRM_COLOROP_CTM_3X4, }; /** -- cgit v1.2.3 From 621c45ca12ed9bd5a8ef434925fe51c319c6e28d Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Fri, 14 Nov 2025 17:02:00 -0700 Subject: drm: Add Enhanced LUT precision structure Existing LUT precision structure drm_color_lut has only 16 bit precision. This is not enough for upcoming enhanced hardwares and advance usecases like HDR processing. Hence added a new structure with 32 bit precision values. Signed-off-by: Alex Hung Signed-off-by: Uma Shankar Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Sebastian Wick Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-36-alex.hung@amd.com --- drivers/gpu/drm/drm_color_mgmt.c | 43 ++++++++++++++++++++++++++++++++++++++++ include/drm/drm_color_mgmt.h | 13 ++++++++++++ include/uapi/drm/drm_mode.h | 12 +++++++++++ 3 files changed, 68 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 131c1c9ae92f..c598b99673fc 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -874,3 +874,46 @@ void drm_crtc_fill_palette_8(struct drm_crtc *crtc, drm_crtc_set_lut_func set_pa fill_palette_8(crtc, i, set_palette); } EXPORT_SYMBOL(drm_crtc_fill_palette_8); + +/** + * drm_color_lut32_check - check validity of extended lookup table + * @lut: property blob containing extended LUT to check + * @tests: bitmask of tests to run + * + * Helper to check whether a userspace-provided extended lookup table is valid and + * satisfies hardware requirements. Drivers pass a bitmask indicating which of + * the tests in &drm_color_lut_tests should be performed. + * + * Returns 0 on success, -EINVAL on failure. + */ +int drm_color_lut32_check(const struct drm_property_blob *lut, u32 tests) +{ + const struct drm_color_lut32 *entry; + int i; + + if (!lut || !tests) + return 0; + + entry = lut->data; + for (i = 0; i < drm_color_lut32_size(lut); i++) { + if (tests & DRM_COLOR_LUT_EQUAL_CHANNELS) { + if (entry[i].red != entry[i].blue || + entry[i].red != entry[i].green) { + DRM_DEBUG_KMS("All LUT entries must have equal r/g/b\n"); + return -EINVAL; + } + } + + if (i > 0 && tests & DRM_COLOR_LUT_NON_DECREASING) { + if (entry[i].red < entry[i - 1].red || + entry[i].green < entry[i - 1].green || + entry[i].blue < entry[i - 1].blue) { + DRM_DEBUG_KMS("LUT entries must never decrease.\n"); + return -EINVAL; + } + } + } + + return 0; +} +EXPORT_SYMBOL(drm_color_lut32_check); diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index eccb71ab335a..527582c20885 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -72,6 +72,18 @@ static inline int drm_color_lut_size(const struct drm_property_blob *blob) return blob->length / sizeof(struct drm_color_lut); } +/** + * drm_color_lut32_size - calculate the number of entries in the extended LUT + * @blob: blob containing the LUT + * + * Returns: + * The number of entries in the color LUT stored in @blob. + */ +static inline int drm_color_lut32_size(const struct drm_property_blob *blob) +{ + return blob->length / sizeof(struct drm_color_lut32); +} + enum drm_color_encoding { DRM_COLOR_YCBCR_BT601, DRM_COLOR_YCBCR_BT709, @@ -146,4 +158,5 @@ void drm_crtc_load_palette_8(struct drm_crtc *crtc, const struct drm_color_lut * void drm_crtc_fill_palette_332(struct drm_crtc *crtc, drm_crtc_set_lut_func set_palette); void drm_crtc_fill_palette_8(struct drm_crtc *crtc, drm_crtc_set_lut_func set_palette); +int drm_color_lut32_check(const struct drm_property_blob *lut, u32 tests); #endif diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 054561022953..5e637ec7b64c 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -872,6 +872,18 @@ struct drm_color_lut { __u16 reserved; }; +/* + * struct drm_color_lut32 + * + * 32-bit per channel color LUT entry, similar to drm_color_lut. + */ +struct drm_color_lut32 { + __u32 red; + __u32 green; + __u32 blue; + __u32 reserved; +}; + /** * enum drm_colorop_type - Type of color operation * -- cgit v1.2.3 From 99a4e4f08abe253a7812e4872882a75cecd87703 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 14 Nov 2025 17:02:02 -0700 Subject: drm/colorop: Add 1D Curve Custom LUT type We've previously introduced DRM_COLOROP_1D_CURVE for pre-defined 1D curves. But we also have HW that supports custom curves and userspace needs the ability to pass custom curves, aka LUTs. This patch introduces a new colorop type, called DRM_COLOROP_1D_LUT that provides a SIZE property which is used by a driver to advertise the supported SIZE of the LUT, as well as a DATA property which userspace uses to set the LUT. DATA and size function in the same way as current drm_crtc GAMMA and DEGAMMA LUTs. Reviewed-by: Simon Ser Signed-off-by: Alex Hung Co-developed-by: Harry Wentland Signed-off-by: Harry Wentland Reviewed-by: Daniel Stone Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-38-alex.hung@amd.com --- drivers/gpu/drm/drm_atomic.c | 4 ++++ drivers/gpu/drm/drm_atomic_uapi.c | 5 +++++ drivers/gpu/drm/drm_colorop.c | 43 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_colorop.h | 16 +++++++++++++++ include/uapi/drm/drm_mode.h | 14 +++++++++++++ 5 files changed, 82 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 60b3a069f4dd..142fc52bc5b2 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -794,6 +794,10 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, drm_printf(p, "\tcurve_1d_type=%s\n", drm_get_colorop_curve_1d_type_name(state->curve_1d_type)); break; + case DRM_COLOROP_1D_LUT: + drm_printf(p, "\tsize=%d\n", colorop->size); + drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); + break; case DRM_COLOROP_CTM_3X4: drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); break; diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 6f4345b9b2a3..b9045b17fa62 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -699,6 +699,9 @@ static int drm_atomic_color_set_data_property(struct drm_colorop *colorop, bool replaced = false; switch (colorop->type) { + case DRM_COLOROP_1D_LUT: + size = colorop->size * sizeof(struct drm_color_lut32); + break; case DRM_COLOROP_CTM_3X4: size = sizeof(struct drm_color_ctm_3x4); break; @@ -750,6 +753,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, *val = state->bypass; else if (property == colorop->curve_1d_type_property) *val = state->curve_1d_type; + else if (property == colorop->size_property) + *val = colorop->size; else if (property == colorop->data_property) *val = (state->data) ? state->data->base.id : 0; else diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 46e4942b66e6..eaf457d0700c 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -64,6 +64,7 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { { DRM_COLOROP_1D_CURVE, "1D Curve" }, + { DRM_COLOROP_1D_LUT, "1D LUT" }, { DRM_COLOROP_CTM_3X4, "3x4 Matrix"}, }; @@ -264,6 +265,47 @@ static int drm_colorop_create_data_prop(struct drm_device *dev, struct drm_color return 0; } +/** + * drm_plane_colorop_curve_1d_lut_init - Initialize a DRM_COLOROP_1D_LUT + * + * @dev: DRM device + * @colorop: The drm_colorop object to initialize + * @plane: The associated drm_plane + * @lut_size: LUT size supported by driver + * @return zero on success, -E value on failure + */ +int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane, uint32_t lut_size) +{ + struct drm_property *prop; + int ret; + + ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_1D_LUT); + if (ret) + return ret; + + /* initialize 1D LUT only attribute */ + /* LUT size */ + prop = drm_property_create_range(dev, DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_ATOMIC, + "SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + + colorop->size_property = prop; + drm_object_attach_property(&colorop->base, colorop->size_property, lut_size); + colorop->size = lut_size; + + /* data */ + ret = drm_colorop_create_data_prop(dev, colorop); + if (ret) + return ret; + + drm_colorop_reset(colorop); + + return 0; +} +EXPORT_SYMBOL(drm_plane_colorop_curve_1d_lut_init); + int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane) { @@ -371,6 +413,7 @@ void drm_colorop_reset(struct drm_colorop *colorop) static const char * const colorop_type_name[] = { [DRM_COLOROP_1D_CURVE] = "1D Curve", + [DRM_COLOROP_1D_LUT] = "1D LUT", [DRM_COLOROP_CTM_3X4] = "3x4 Matrix", }; diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 067805276b15..529af9f8266d 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -257,6 +257,13 @@ struct drm_colorop { */ struct drm_property *bypass_property; + /** + * @size: + * + * Number of entries of the custom LUT. This should be read-only. + */ + uint32_t size; + /** * @curve_1d_type_property: * @@ -264,6 +271,13 @@ struct drm_colorop { */ struct drm_property *curve_1d_type_property; + /** + * @size_property: + * + * Size property for custom LUT from userspace. + */ + struct drm_property *size_property; + /** * @data_property: * @@ -311,6 +325,8 @@ void drm_colorop_cleanup(struct drm_colorop *colorop); int drm_plane_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane, u64 supported_tfs); +int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane, uint32_t lut_size); int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane); diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 5e637ec7b64c..bec524e2fa32 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -902,6 +902,20 @@ enum drm_colorop_type { */ DRM_COLOROP_1D_CURVE, + /** + * @DRM_COLOROP_1D_LUT: + * + * enum string "1D LUT" + * + * A simple 1D LUT of uniformly spaced &drm_color_lut32 entries, + * packed into a blob via the DATA property. The driver's + * expected LUT size is advertised via the SIZE property. + * + * The DATA blob is an array of struct drm_color_lut32 with size + * of "size". + */ + DRM_COLOROP_1D_LUT, + /** * @DRM_COLOROP_CTM_3X4: * -- cgit v1.2.3 From 3410108037d5b01fb35d2e4e92c17c3abdf89186 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 14 Nov 2025 17:02:05 -0700 Subject: drm/colorop: Add multiplier type This introduces a new drm_colorop_type: DRM_COLOROP_MULTIPLIER. It's a simple multiplier to all pixel values. The value is specified via a S31.32 fixed point provided via the "MULTIPLIER" property. Reviewed-by: Simon Ser Signed-off-by: Alex Hung Reviewed-by: Daniel Stone Reviewed-by: Melissa Wen Reviewed-by: Sebastian Wick Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-41-alex.hung@amd.com --- drivers/gpu/drm/drm_atomic.c | 3 +++ drivers/gpu/drm/drm_atomic_uapi.c | 4 ++++ drivers/gpu/drm/drm_colorop.c | 33 +++++++++++++++++++++++++++++++++ include/drm/drm_colorop.h | 16 ++++++++++++++++ include/uapi/drm/drm_mode.h | 11 +++++++++++ 5 files changed, 67 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 142fc52bc5b2..45243d05a7fd 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -801,6 +801,9 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, case DRM_COLOROP_CTM_3X4: drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); break; + case DRM_COLOROP_MULTIPLIER: + drm_printf(p, "\tmultiplier=%llu\n", state->multiplier); + break; default: break; } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index b9045b17fa62..874dcf8dd14f 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -728,6 +728,8 @@ static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, state->bypass = val; } else if (property == colorop->curve_1d_type_property) { state->curve_1d_type = val; + } else if (property == colorop->multiplier_property) { + state->multiplier = val; } else if (property == colorop->data_property) { return drm_atomic_color_set_data_property(colorop, state, property, val); @@ -753,6 +755,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, *val = state->bypass; else if (property == colorop->curve_1d_type_property) *val = state->curve_1d_type; + else if (property == colorop->multiplier_property) + *val = state->multiplier; else if (property == colorop->size_property) *val = colorop->size; else if (property == colorop->data_property) diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index eaf457d0700c..be99af98fd6c 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -66,6 +66,7 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { { DRM_COLOROP_1D_CURVE, "1D Curve" }, { DRM_COLOROP_1D_LUT, "1D LUT" }, { DRM_COLOROP_CTM_3X4, "3x4 Matrix"}, + { DRM_COLOROP_MULTIPLIER, "Multiplier"}, }; static const char * const colorop_curve_1d_type_names[] = { @@ -325,6 +326,37 @@ int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *c } EXPORT_SYMBOL(drm_plane_colorop_ctm_3x4_init); +/** + * drm_plane_colorop_mult_init - Initialize a DRM_COLOROP_MULTIPLIER + * + * @dev: DRM device + * @colorop: The drm_colorop object to initialize + * @plane: The associated drm_plane + * @return zero on success, -E value on failure + */ +int drm_plane_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane) +{ + struct drm_property *prop; + int ret; + + ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_MULTIPLIER); + if (ret) + return ret; + + prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "MULTIPLIER", 0, U64_MAX); + if (!prop) + return -ENOMEM; + + colorop->multiplier_property = prop; + drm_object_attach_property(&colorop->base, colorop->multiplier_property, 0); + + drm_colorop_reset(colorop); + + return 0; +} +EXPORT_SYMBOL(drm_plane_colorop_mult_init); + static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop, struct drm_colorop_state *state) { @@ -415,6 +447,7 @@ static const char * const colorop_type_name[] = { [DRM_COLOROP_1D_CURVE] = "1D Curve", [DRM_COLOROP_1D_LUT] = "1D LUT", [DRM_COLOROP_CTM_3X4] = "3x4 Matrix", + [DRM_COLOROP_MULTIPLIER] = "Multiplier", }; const char *drm_get_colorop_type_name(enum drm_colorop_type type) diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 529af9f8266d..a4f6a22fa1f9 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -144,6 +144,13 @@ struct drm_colorop_state { */ enum drm_colorop_curve_1d_type curve_1d_type; + /** + * @multiplier: + * + * Multiplier to 'gain' the plane. Format is S31.32 sign-magnitude. + */ + uint64_t multiplier; + /** * @data: * @@ -271,6 +278,13 @@ struct drm_colorop { */ struct drm_property *curve_1d_type_property; + /** + * @multiplier_property: + * + * Multiplier property for plane gain + */ + struct drm_property *multiplier_property; + /** * @size_property: * @@ -329,6 +343,8 @@ int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_color struct drm_plane *plane, uint32_t lut_size); int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane); +int drm_plane_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane); struct drm_colorop_state * drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop); diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index bec524e2fa32..cac25c0ca37b 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -931,6 +931,17 @@ enum drm_colorop_type { * | B | | 8 9 10 12 | | B | */ DRM_COLOROP_CTM_3X4, + + /** + * @DRM_COLOROP_MULTIPLIER: + * + * enum string "Multiplier" + * + * A simple multiplier, applied to all color values. The + * multiplier is specified as a S31.32 via the MULTIPLIER + * property. + */ + DRM_COLOROP_MULTIPLIER, }; /** -- cgit v1.2.3 From 7fa3ee8c0a79b7a8b5fae422ca29da2fde6821ba Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 14 Nov 2025 17:02:08 -0700 Subject: drm/colorop: Define LUT_1D interpolation We want to make sure userspace is aware of the 1D LUT interpolation. While linear interpolation is common it might not be supported on all HW. Give driver implementers a way to specify their interpolation. Reviewed-by: Simon Ser Signed-off-by: Alex Hung Signed-off-by: Harry Wentland Reviewed-by: Daniel Stone Reviewed-by: Melissa Wen Reviewed-by: Sebastian Wick Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-44-alex.hung@amd.com --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 6 ++-- drivers/gpu/drm/drm_atomic.c | 2 ++ drivers/gpu/drm/drm_atomic_uapi.c | 4 +++ drivers/gpu/drm/drm_colorop.c | 37 +++++++++++++++++++++- include/drm/drm_colorop.h | 21 +++++++++++- include/uapi/drm/drm_mode.h | 13 ++++++++ 6 files changed, 79 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index 33907cc8e1b3..e9363814d666 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -126,7 +126,8 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, MAX_COLOR_LUT_ENTRIES); + ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, MAX_COLOR_LUT_ENTRIES, + DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR); if (ret) goto cleanup; @@ -156,7 +157,8 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, MAX_COLOR_LUT_ENTRIES); + ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, MAX_COLOR_LUT_ENTRIES, + DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR); if (ret) goto cleanup; diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 45243d05a7fd..e49d2442fa12 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -796,6 +796,8 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, break; case DRM_COLOROP_1D_LUT: drm_printf(p, "\tsize=%d\n", colorop->size); + drm_printf(p, "\tinterpolation=%s\n", + drm_get_colorop_lut1d_interpolation_name(colorop->lut1d_interpolation)); drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); break; case DRM_COLOROP_CTM_3X4: diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 874dcf8dd14f..64e49338e284 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -726,6 +726,8 @@ static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, { if (property == colorop->bypass_property) { state->bypass = val; + } else if (property == colorop->lut1d_interpolation_property) { + colorop->lut1d_interpolation = val; } else if (property == colorop->curve_1d_type_property) { state->curve_1d_type = val; } else if (property == colorop->multiplier_property) { @@ -753,6 +755,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, *val = colorop->type; else if (property == colorop->bypass_property) *val = state->bypass; + else if (property == colorop->lut1d_interpolation_property) + *val = colorop->lut1d_interpolation; else if (property == colorop->curve_1d_type_property) *val = state->curve_1d_type; else if (property == colorop->multiplier_property) diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index be99af98fd6c..f9717d809829 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -78,6 +78,10 @@ static const char * const colorop_curve_1d_type_names[] = { [DRM_COLOROP_1D_CURVE_BT2020_OETF] = "BT.2020 OETF", }; +static const struct drm_prop_enum_list drm_colorop_lut1d_interpolation_list[] = { + { DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR, "Linear" }, +}; + /* Init Helpers */ static int drm_plane_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, @@ -273,10 +277,12 @@ static int drm_colorop_create_data_prop(struct drm_device *dev, struct drm_color * @colorop: The drm_colorop object to initialize * @plane: The associated drm_plane * @lut_size: LUT size supported by driver + * @interpolation: 1D LUT interpolation type * @return zero on success, -E value on failure */ int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, uint32_t lut_size) + struct drm_plane *plane, uint32_t lut_size, + enum drm_colorop_lut1d_interpolation_type interpolation) { struct drm_property *prop; int ret; @@ -296,6 +302,17 @@ int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_color drm_object_attach_property(&colorop->base, colorop->size_property, lut_size); colorop->size = lut_size; + /* interpolation */ + prop = drm_property_create_enum(dev, 0, "LUT1D_INTERPOLATION", + drm_colorop_lut1d_interpolation_list, + ARRAY_SIZE(drm_colorop_lut1d_interpolation_list)); + if (!prop) + return -ENOMEM; + + colorop->lut1d_interpolation_property = prop; + drm_object_attach_property(&colorop->base, prop, interpolation); + colorop->lut1d_interpolation = interpolation; + /* data */ ret = drm_colorop_create_data_prop(dev, colorop); if (ret) @@ -449,6 +466,9 @@ static const char * const colorop_type_name[] = { [DRM_COLOROP_CTM_3X4] = "3x4 Matrix", [DRM_COLOROP_MULTIPLIER] = "Multiplier", }; +static const char * const colorop_lut1d_interpolation_name[] = { + [DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR] = "Linear", +}; const char *drm_get_colorop_type_name(enum drm_colorop_type type) { @@ -466,6 +486,21 @@ const char *drm_get_colorop_curve_1d_type_name(enum drm_colorop_curve_1d_type ty return colorop_curve_1d_type_names[type]; } +/** + * drm_get_colorop_lut1d_interpolation_name: return a string for interpolation type + * @type: interpolation type to compute name of + * + * In contrast to the other drm_get_*_name functions this one here returns a + * const pointer and hence is threadsafe. + */ +const char *drm_get_colorop_lut1d_interpolation_name(enum drm_colorop_lut1d_interpolation_type type) +{ + if (WARN_ON(type >= ARRAY_SIZE(colorop_lut1d_interpolation_name))) + return "unknown"; + + return colorop_lut1d_interpolation_name[type]; +} + /** * drm_colorop_set_next_property - sets the next pointer * @colorop: drm colorop diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index a4f6a22fa1f9..0e1a5e9d26f3 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -271,6 +271,21 @@ struct drm_colorop { */ uint32_t size; + /** + * @lut1d_interpolation: + * + * Read-only + * Interpolation for DRM_COLOROP_1D_LUT + */ + enum drm_colorop_lut1d_interpolation_type lut1d_interpolation; + + /** + * @lut1d_interpolation_property: + * + * Read-only property for DRM_COLOROP_1D_LUT interpolation + */ + struct drm_property *lut1d_interpolation_property; + /** * @curve_1d_type_property: * @@ -340,7 +355,8 @@ void drm_colorop_cleanup(struct drm_colorop *colorop); int drm_plane_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane, u64 supported_tfs); int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, uint32_t lut_size); + struct drm_plane *plane, uint32_t lut_size, + enum drm_colorop_lut1d_interpolation_type interpolation); int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane); int drm_plane_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colorop, @@ -394,6 +410,9 @@ const char *drm_get_colorop_type_name(enum drm_colorop_type type); */ const char *drm_get_colorop_curve_1d_type_name(enum drm_colorop_curve_1d_type type); +const char * +drm_get_colorop_lut1d_interpolation_name(enum drm_colorop_lut1d_interpolation_type type); + void drm_colorop_set_next_property(struct drm_colorop *colorop, struct drm_colorop *next); #endif /* __DRM_COLOROP_H__ */ diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index cac25c0ca37b..4b38da880fc7 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -944,6 +944,19 @@ enum drm_colorop_type { DRM_COLOROP_MULTIPLIER, }; +/** + * enum drm_colorop_lut1d_interpolation_type - type of interpolation for 1D LUTs + */ +enum drm_colorop_lut1d_interpolation_type { + /** + * @DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR: + * + * Linear interpolation. Values between points of the LUT will be + * linearly interpolated. + */ + DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR, +}; + /** * struct drm_plane_size_hint - Plane size hints * @width: The width of the plane in pixel -- cgit v1.2.3 From db971856bbe0263d0ba78d641ea66d4fcdfc8fc3 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 14 Nov 2025 17:02:10 -0700 Subject: drm/colorop: Add 3D LUT support to color pipeline It is to be used to enable HDR by allowing userpace to create and pass 3D LUTs to kernel and hardware. new drm_colorop_type: DRM_COLOROP_3D_LUT. Reviewed-by: Simon Ser Signed-off-by: Alex Hung Reviewed-by: Daniel Stone Reviewed-by: Melissa Wen Reviewed-by: Sebastian Wick Signed-off-by: Simon Ser Link: https://patch.msgid.link/20251115000237.3561250-46-alex.hung@amd.com --- drivers/gpu/drm/drm_atomic.c | 6 ++++ drivers/gpu/drm/drm_atomic_uapi.c | 8 +++++ drivers/gpu/drm/drm_colorop.c | 72 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_colorop.h | 23 +++++++++++++ include/uapi/drm/drm_mode.h | 34 ++++++++++++++++++ 5 files changed, 143 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 436d2a948e41..67e095e398a3 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -807,6 +807,12 @@ static void drm_atomic_colorop_print_state(struct drm_printer *p, case DRM_COLOROP_MULTIPLIER: drm_printf(p, "\tmultiplier=%llu\n", state->multiplier); break; + case DRM_COLOROP_3D_LUT: + drm_printf(p, "\tsize=%d\n", colorop->size); + drm_printf(p, "\tinterpolation=%s\n", + drm_get_colorop_lut3d_interpolation_name(colorop->lut3d_interpolation)); + drm_printf(p, "\tdata blob id=%d\n", state->data ? state->data->base.id : 0); + break; default: break; } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 64e49338e284..7320db4b8489 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -705,6 +705,10 @@ static int drm_atomic_color_set_data_property(struct drm_colorop *colorop, case DRM_COLOROP_CTM_3X4: size = sizeof(struct drm_color_ctm_3x4); break; + case DRM_COLOROP_3D_LUT: + size = colorop->size * colorop->size * colorop->size * + sizeof(struct drm_color_lut32); + break; default: /* should never get here */ return -EINVAL; @@ -732,6 +736,8 @@ static int drm_atomic_colorop_set_property(struct drm_colorop *colorop, state->curve_1d_type = val; } else if (property == colorop->multiplier_property) { state->multiplier = val; + } else if (property == colorop->lut3d_interpolation_property) { + colorop->lut3d_interpolation = val; } else if (property == colorop->data_property) { return drm_atomic_color_set_data_property(colorop, state, property, val); @@ -763,6 +769,8 @@ drm_atomic_colorop_get_property(struct drm_colorop *colorop, *val = state->multiplier; else if (property == colorop->size_property) *val = colorop->size; + else if (property == colorop->lut3d_interpolation_property) + *val = colorop->lut3d_interpolation; else if (property == colorop->data_property) *val = (state->data) ? state->data->base.id : 0; else diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index fd666394f31f..272a73c28c0e 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -67,6 +67,7 @@ static const struct drm_prop_enum_list drm_colorop_type_enum_list[] = { { DRM_COLOROP_1D_LUT, "1D LUT" }, { DRM_COLOROP_CTM_3X4, "3x4 Matrix"}, { DRM_COLOROP_MULTIPLIER, "Multiplier"}, + { DRM_COLOROP_3D_LUT, "3D LUT"}, }; static const char * const colorop_curve_1d_type_names[] = { @@ -82,6 +83,11 @@ static const struct drm_prop_enum_list drm_colorop_lut1d_interpolation_list[] = { DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR, "Linear" }, }; + +static const struct drm_prop_enum_list drm_colorop_lut3d_interpolation_list[] = { + { DRM_COLOROP_LUT3D_INTERPOLATION_TETRAHEDRAL, "Tetrahedral" }, +}; + /* Init Helpers */ static int drm_plane_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, @@ -381,6 +387,51 @@ int drm_plane_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colo } EXPORT_SYMBOL(drm_plane_colorop_mult_init); +int drm_plane_colorop_3dlut_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane, + uint32_t lut_size, + enum drm_colorop_lut3d_interpolation_type interpolation, + uint32_t flags) +{ + struct drm_property *prop; + int ret; + + ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_3D_LUT, flags); + if (ret) + return ret; + + /* LUT size */ + prop = drm_property_create_range(dev, DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_ATOMIC, + "SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + + colorop->size_property = prop; + drm_object_attach_property(&colorop->base, colorop->size_property, lut_size); + colorop->size = lut_size; + + /* interpolation */ + prop = drm_property_create_enum(dev, 0, "LUT3D_INTERPOLATION", + drm_colorop_lut3d_interpolation_list, + ARRAY_SIZE(drm_colorop_lut3d_interpolation_list)); + if (!prop) + return -ENOMEM; + + colorop->lut3d_interpolation_property = prop; + drm_object_attach_property(&colorop->base, prop, interpolation); + colorop->lut3d_interpolation = interpolation; + + /* data */ + ret = drm_colorop_create_data_prop(dev, colorop); + if (ret) + return ret; + + drm_colorop_reset(colorop); + + return 0; +} +EXPORT_SYMBOL(drm_plane_colorop_3dlut_init); + static void __drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop, struct drm_colorop_state *state) { @@ -472,7 +523,13 @@ static const char * const colorop_type_name[] = { [DRM_COLOROP_1D_LUT] = "1D LUT", [DRM_COLOROP_CTM_3X4] = "3x4 Matrix", [DRM_COLOROP_MULTIPLIER] = "Multiplier", + [DRM_COLOROP_3D_LUT] = "3D LUT", +}; + +static const char * const colorop_lu3d_interpolation_name[] = { + [DRM_COLOROP_LUT3D_INTERPOLATION_TETRAHEDRAL] = "Tetrahedral", }; + static const char * const colorop_lut1d_interpolation_name[] = { [DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR] = "Linear", }; @@ -508,6 +565,21 @@ const char *drm_get_colorop_lut1d_interpolation_name(enum drm_colorop_lut1d_inte return colorop_lut1d_interpolation_name[type]; } +/** + * drm_get_colorop_lut3d_interpolation_name - return a string for interpolation type + * @type: interpolation type to compute name of + * + * In contrast to the other drm_get_*_name functions this one here returns a + * const pointer and hence is threadsafe. + */ +const char *drm_get_colorop_lut3d_interpolation_name(enum drm_colorop_lut3d_interpolation_type type) +{ + if (WARN_ON(type >= ARRAY_SIZE(colorop_lu3d_interpolation_name))) + return "unknown"; + + return colorop_lu3d_interpolation_name[type]; +} + /** * drm_colorop_set_next_property - sets the next pointer * @colorop: drm colorop diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 828888861ad9..9773e30e15ae 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -283,6 +283,14 @@ struct drm_colorop { */ enum drm_colorop_lut1d_interpolation_type lut1d_interpolation; + /** + * @lut3d_interpolation: + * + * Read-only + * Interpolation for DRM_COLOROP_3D_LUT + */ + enum drm_colorop_lut3d_interpolation_type lut3d_interpolation; + /** * @lut1d_interpolation_property: * @@ -311,6 +319,13 @@ struct drm_colorop { */ struct drm_property *size_property; + /** + * @lut3d_interpolation_property: + * + * Read-only property for DRM_COLOROP_3D_LUT interpolation + */ + struct drm_property *lut3d_interpolation_property; + /** * @data_property: * @@ -366,6 +381,11 @@ int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *c struct drm_plane *plane, uint32_t flags); int drm_plane_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane, uint32_t flags); +int drm_plane_colorop_3dlut_init(struct drm_device *dev, struct drm_colorop *colorop, + struct drm_plane *plane, + uint32_t lut_size, + enum drm_colorop_lut3d_interpolation_type interpolation, + uint32_t flags); struct drm_colorop_state * drm_atomic_helper_colorop_duplicate_state(struct drm_colorop *colorop); @@ -418,6 +438,9 @@ const char *drm_get_colorop_curve_1d_type_name(enum drm_colorop_curve_1d_type ty const char * drm_get_colorop_lut1d_interpolation_name(enum drm_colorop_lut1d_interpolation_type type); +const char * +drm_get_colorop_lut3d_interpolation_name(enum drm_colorop_lut3d_interpolation_type type); + void drm_colorop_set_next_property(struct drm_colorop *colorop, struct drm_colorop *next); #endif /* __DRM_COLOROP_H__ */ diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 4b38da880fc7..cbbbfc1dfe2b 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -942,6 +942,40 @@ enum drm_colorop_type { * property. */ DRM_COLOROP_MULTIPLIER, + + /** + * @DRM_COLOROP_3D_LUT: + * + * enum string "3D LUT" + * + * A 3D LUT of &drm_color_lut32 entries, + * packed into a blob via the DATA property. The driver's expected + * LUT size is advertised via the SIZE property, i.e., a 3D LUT with + * 17x17x17 entries will have SIZE set to 17. + * + * The DATA blob is a 3D array of struct drm_color_lut32 with dimension + * length of "size". + * The LUT elements are traversed like so: + * + * for B in range 0..n + * for G in range 0..n + * for R in range 0..n + * index = R + n * (G + n * B) + * color = lut3d[index] + */ + DRM_COLOROP_3D_LUT, +}; + +/** + * enum drm_colorop_lut3d_interpolation_type - type of 3DLUT interpolation + */ +enum drm_colorop_lut3d_interpolation_type { + /** + * @DRM_COLOROP_LUT3D_INTERPOLATION_TETRAHEDRAL: + * + * Tetrahedral 3DLUT interpolation + */ + DRM_COLOROP_LUT3D_INTERPOLATION_TETRAHEDRAL, }; /** -- cgit v1.2.3