From ef246da8e63c486780dca4d9b4d79589cbebf5e5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 24 Jan 2026 21:14:13 +0200 Subject: dma-buf: Rename .move_notify() callback to a clearer identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the .move_notify() callback to .invalidate_mappings() to make its purpose explicit and highlight that it is responsible for invalidating existing mappings. Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20260124-dmabuf-revoke-v5-1-f98fca917e96@nvidia.com Signed-off-by: Christian König --- include/linux/dma-buf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 91f4939db89b..d9ee4499b37d 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -407,7 +407,7 @@ struct dma_buf { * through the device. * * - Dynamic importers should set fences for any access that they can't - * disable immediately from their &dma_buf_attach_ops.move_notify + * disable immediately from their &dma_buf_attach_ops.invalidate_mappings * callback. * * IMPORTANT: @@ -446,7 +446,7 @@ struct dma_buf_attach_ops { bool allow_peer2peer; /** - * @move_notify: [optional] notification that the DMA-buf is moving + * @invalidate_mappings: [optional] notification that the DMA-buf is moving * * If this callback is provided the framework can avoid pinning the * backing store while mappings exists. @@ -463,7 +463,7 @@ struct dma_buf_attach_ops { * New mappings can be created after this callback returns, and will * point to the new location of the DMA-buf. */ - void (*move_notify)(struct dma_buf_attachment *attach); + void (*invalidate_mappings)(struct dma_buf_attachment *attach); }; /** -- cgit v1.2.3 From 95308225e5baeaae1e313816059c59a0036ab6b2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 24 Jan 2026 21:14:14 +0200 Subject: dma-buf: Rename dma_buf_move_notify() to dma_buf_invalidate_mappings() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Along with renaming the .move_notify() callback, rename the corresponding dma-buf core function. This makes the expected behavior clear to exporters calling this function. Signed-off-by: Leon Romanovsky Reviewed-by: Christian König Link: https://lore.kernel.org/r/20260124-dmabuf-revoke-v5-2-f98fca917e96@nvidia.com Signed-off-by: Christian König --- drivers/dma-buf/dma-buf.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/iommu/iommufd/selftest.c | 2 +- drivers/vfio/pci/vfio_pci_dmabuf.c | 4 ++-- include/linux/dma-buf.h | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index cc9b88214d97..1c257607a623 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -981,7 +981,7 @@ dma_buf_pin_on_map(struct dma_buf_attachment *attach) * 3. Exporters must hold the dma-buf reservation lock when calling these * functions: * - * - dma_buf_move_notify() + * - dma_buf_invalidate_mappings() */ /** @@ -1323,14 +1323,14 @@ void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach, EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, "DMA_BUF"); /** - * dma_buf_move_notify - notify attachments that DMA-buf is moving + * dma_buf_invalidate_mappings - notify attachments that DMA-buf is moving * * @dmabuf: [in] buffer which is moving * * Informs all attachments that they need to destroy and recreate all their * mappings. */ -void dma_buf_move_notify(struct dma_buf *dmabuf) +void dma_buf_invalidate_mappings(struct dma_buf *dmabuf) { struct dma_buf_attachment *attach; @@ -1340,7 +1340,7 @@ void dma_buf_move_notify(struct dma_buf *dmabuf) if (attach->importer_ops) attach->importer_ops->invalidate_mappings(attach); } -EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, "DMA_BUF"); +EXPORT_SYMBOL_NS_GPL(dma_buf_invalidate_mappings, "DMA_BUF"); /** * DOC: cpu access diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e08f58de4b17..f73dc99d1887 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1270,7 +1270,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) && old_mem && old_mem->mem_type != TTM_PL_SYSTEM) - dma_buf_move_notify(abo->tbo.base.dma_buf); + dma_buf_invalidate_mappings(abo->tbo.base.dma_buf); /* move_notify is called before move happens */ trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index b0bd31d14bb9..94712b05edff 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -819,7 +819,7 @@ static int xe_bo_move_notify(struct xe_bo *bo, /* Don't call move_notify() for imported dma-bufs. */ if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach) - dma_buf_move_notify(ttm_bo->base.dma_buf); + dma_buf_invalidate_mappings(ttm_bo->base.dma_buf); /* * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual), diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index c4322fd26f93..fd47953db4a3 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -2073,7 +2073,7 @@ static int iommufd_test_dmabuf_revoke(struct iommufd_ucmd *ucmd, int fd, priv = dmabuf->priv; dma_resv_lock(dmabuf->resv, NULL); priv->revoked = revoked; - dma_buf_move_notify(dmabuf); + dma_buf_invalidate_mappings(dmabuf); dma_resv_unlock(dmabuf->resv); err_put: diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index d4d0f7d08c53..362e3d149817 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -320,7 +320,7 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked) if (priv->revoked != revoked) { dma_resv_lock(priv->dmabuf->resv, NULL); priv->revoked = revoked; - dma_buf_move_notify(priv->dmabuf); + dma_buf_invalidate_mappings(priv->dmabuf); dma_resv_unlock(priv->dmabuf->resv); } fput(priv->dmabuf->file); @@ -341,7 +341,7 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev) list_del_init(&priv->dmabufs_elm); priv->vdev = NULL; priv->revoked = true; - dma_buf_move_notify(priv->dmabuf); + dma_buf_invalidate_mappings(priv->dmabuf); dma_resv_unlock(priv->dmabuf->resv); vfio_device_put_registration(&vdev->vdev); fput(priv->dmabuf->file); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index d9ee4499b37d..d0470af8887e 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -588,7 +588,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, enum dma_data_direction); void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); -void dma_buf_move_notify(struct dma_buf *dma_buf); +void dma_buf_invalidate_mappings(struct dma_buf *dma_buf); int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); int dma_buf_end_cpu_access(struct dma_buf *dma_buf, -- cgit v1.2.3 From 42dab3138176a944b09996441d837986f9ef13f8 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Tue, 6 Jan 2026 11:00:16 -0600 Subject: drm/connector: Add a new 'panel_type' property If the driver can make an assertion whether a connected panel is an OLED panel or not then it can attach a property to the connector that userspace can use as a hint for color schemes. Reviewed-by: Leo Li Link: https://patch.msgid.link/20260106170017.68158-2-superm1@kernel.org Signed-off-by: Mario Limonciello (AMD) --- drivers/gpu/drm/drm_connector.c | 33 +++++++++++++++++++++++++++++++++ include/drm/drm_connector.h | 1 + include/drm/drm_mode_config.h | 4 ++++ include/uapi/drm/drm_mode.h | 4 ++++ 4 files changed, 42 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 4f5b27fab475..aec05adbc889 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1173,6 +1173,11 @@ static const struct drm_prop_enum_list drm_link_status_enum_list[] = { { DRM_MODE_LINK_STATUS_BAD, "Bad" }, }; +static const struct drm_prop_enum_list drm_panel_type_enum_list[] = { + { DRM_MODE_PANEL_TYPE_UNKNOWN, "unknown" }, + { DRM_MODE_PANEL_TYPE_OLED, "OLED" }, +}; + /** * drm_display_info_set_bus_formats - set the supported bus formats * @info: display info to store bus formats in @@ -1501,6 +1506,9 @@ EXPORT_SYMBOL(drm_hdmi_connector_get_output_format_name); * Summarizing: Only set "DPMS" when the connector is known to be enabled, * assume that a successful SETCONFIG call also sets "DPMS" to on, and * never read back the value of "DPMS" because it can be incorrect. + * panel_type: + * Immutable enum property to indicate the type of connected panel. + * Possible values are "unknown" (default) and "OLED". * PATH: * Connector path property to identify how this sink is physically * connected. Used by DP MST. This should be set by calling @@ -1851,6 +1859,13 @@ int drm_connector_create_standard_properties(struct drm_device *dev) return -ENOMEM; dev->mode_config.link_status_property = prop; + prop = drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE, "panel_type", + drm_panel_type_enum_list, + ARRAY_SIZE(drm_panel_type_enum_list)); + if (!prop) + return -ENOMEM; + dev->mode_config.panel_type_property = prop; + prop = drm_property_create_bool(dev, DRM_MODE_PROP_IMMUTABLE, "non-desktop"); if (!prop) return -ENOMEM; @@ -3626,3 +3641,21 @@ struct drm_tile_group *drm_mode_create_tile_group(struct drm_device *dev, return tg; } EXPORT_SYMBOL(drm_mode_create_tile_group); + +/** + * drm_connector_attach_panel_type_property - attaches panel type property + * @connector: connector to attach the property on. + * + * This is used to add support for panel type detection. + */ +void drm_connector_attach_panel_type_property(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct drm_property *prop = dev->mode_config.panel_type_property; + + if (!prop) + return; + + drm_object_attach_property(&connector->base, prop, DRM_MODE_PANEL_TYPE_UNKNOWN); +} +EXPORT_SYMBOL(drm_connector_attach_panel_type_property); diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 7eaec37ae1c7..c18be8c19de0 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -2493,6 +2493,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, u32 scaling_mode_mask); int drm_connector_attach_vrr_capable_property( struct drm_connector *connector); +void drm_connector_attach_panel_type_property(struct drm_connector *connector); int drm_connector_attach_broadcast_rgb_property(struct drm_connector *connector); int drm_connector_attach_colorspace_property(struct drm_connector *connector); int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *connector); diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 895fb820dba0..5e1dd0cfccde 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -600,6 +600,10 @@ struct drm_mode_config { * multiple CRTCs. */ struct drm_property *tile_property; + /** + * @panel_type_property: Default connector property for panel type + */ + struct drm_property *panel_type_property; /** * @link_status_property: Default connector property for link status * of a connector diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index cbbbfc1dfe2b..3693d82b5279 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -166,6 +166,10 @@ extern "C" { #define DRM_MODE_LINK_STATUS_GOOD 0 #define DRM_MODE_LINK_STATUS_BAD 1 +/* Panel type property */ +#define DRM_MODE_PANEL_TYPE_UNKNOWN 0 +#define DRM_MODE_PANEL_TYPE_OLED 1 + /* * DRM_MODE_ROTATE_ * -- cgit v1.2.3 From 3aecd55af5b83d16d84e3c333d4163999ee8ff51 Mon Sep 17 00:00:00 2001 From: Caterina Shablia Date: Wed, 28 Jan 2026 18:40:57 +0000 Subject: drm: add ARM interleaved 64k modifier This modifier is primarily intended to be used by panvk to implement sparse partially-resident images with better map and unmap performance, and no worse access performance, compared to implementing them in terms of U-interleaved. With this modifier, the plane is divided into 64k byte 1:1 or 2:1 -sided tiles. The 64k tiles are laid out linearly. Each 64k tile is divided into blocks of 16x16 texel blocks each, which themselves are laid out linearly within a 64k tile. Then within each such 16x16 block, texel blocks are laid out according to U order, similar to 16X16_BLOCK_U_INTERLEAVED. Unlike 16X16_BLOCK_U_INTERLEAVED, the layout does not depend on whether a format is compressed or not. The hardware features corresponding to this modifier are available starting with v10 (second gen Valhall.) The corresponding panvk MR can be found at: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38986 Previous version: https://lists.freedesktop.org/archives/dri-devel/2026-January/547072.html No changes since v2 Changes since v1: * Rewrite the description of the modifier to be hopefully unambiguous. Signed-off-by: Caterina Shablia Reviewed-by: Boris Brezillon Reviewed-by: Liviu Dudau Link: https://patch.msgid.link/20260128184058.807213-1-caterina.shablia@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/drm_fourcc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index e527b24bd824..452f901513ad 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1422,6 +1422,22 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED \ DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_MISC, 1ULL) +/* + * ARM 64k interleaved modifier + * + * This is used by ARM Mali v10+ GPUs. With this modifier, the plane is divided + * into 64k byte 1:1 or 2:1 -sided tiles. The 64k tiles are laid out linearly. + * Each 64k tile is divided into blocks of 16x16 texel blocks, which are + * themselves laid out linearly within a 64k tile. Then within each 16x16 + * block, texel blocks are laid out according to U order, similar to + * 16X16_BLOCK_U_INTERLEAVED. + * + * Note that unlike 16X16_BLOCK_U_INTERLEAVED, the layout does not change + * depending on whether a format is compressed or not. + */ +#define DRM_FORMAT_MOD_ARM_INTERLEAVED_64K \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_MISC, 2ULL) + /* * Allwinner tiled modifier * -- cgit v1.2.3 From 2bcbc706dfa02ae50118173a6f6d8a12e735480c Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 19 Dec 2025 11:41:54 +0100 Subject: dma-buf: add dma_fence_was_initialized function v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some driver use fence->ops to test if a fence was initialized or not. The problem is that this utilizes internal behavior of the dma_fence implementation. So better abstract that into a function. v2: use a flag instead of testing fence->ops, rename the function, move to the beginning of the patch set. Signed-off-by: Christian König Reviewed-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20260120105655.7134-2-christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 +++++++------ drivers/gpu/drm/qxl/qxl_release.c | 2 +- include/linux/dma-fence.h | 15 +++++++++++++++ 4 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 21c5c30b4f34..c9a036b0d592 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -1054,7 +1054,7 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, fence->lock = lock; fence->context = context; fence->seqno = seqno; - fence->flags = flags; + fence->flags = flags | BIT(DMA_FENCE_FLAG_INITIALIZED_BIT); fence->error = 0; trace_dma_fence_init(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index aaf5477fcd7a..f05683d59f8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -282,9 +282,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) unsigned i; /* Check if any fences were initialized */ - if (job->base.s_fence && job->base.s_fence->finished.ops) + if (job->base.s_fence && + dma_fence_was_initialized(&job->base.s_fence->finished)) f = &job->base.s_fence->finished; - else if (job->hw_fence && job->hw_fence->base.ops) + else if (dma_fence_was_initialized(&job->hw_fence->base)) f = &job->hw_fence->base; else f = NULL; @@ -301,11 +302,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); - if (job->hw_fence->base.ops) + if (dma_fence_was_initialized(&job->hw_fence->base)) dma_fence_put(&job->hw_fence->base); else kfree(job->hw_fence); - if (job->hw_vm_fence->base.ops) + if (dma_fence_was_initialized(&job->hw_vm_fence->base)) dma_fence_put(&job->hw_vm_fence->base); else kfree(job->hw_vm_fence); @@ -339,11 +340,11 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (job->hw_fence->base.ops) + if (dma_fence_was_initialized(&job->hw_fence->base)) dma_fence_put(&job->hw_fence->base); else kfree(job->hw_fence); - if (job->hw_vm_fence->base.ops) + if (dma_fence_was_initialized(&job->hw_vm_fence->base)) dma_fence_put(&job->hw_vm_fence->base); else kfree(job->hw_vm_fence); diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 7b3c9a6016db..06b0b2aa7953 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -146,7 +146,7 @@ qxl_release_free(struct qxl_device *qdev, idr_remove(&qdev->release_idr, release->id); spin_unlock(&qdev->release_idr_lock); - if (release->base.ops) { + if (dma_fence_was_initialized(&release->base)) { WARN_ON(list_empty(&release->bos)); qxl_release_free_list(release); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index d4c92fd35092..9c4d25289239 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -48,6 +48,7 @@ struct seq_file; * atomic ops (bit_*), so taking the spinlock will not be needed most * of the time. * + * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called @@ -98,6 +99,7 @@ struct dma_fence { }; enum dma_fence_flag_bits { + DMA_FENCE_FLAG_INITIALIZED_BIT, DMA_FENCE_FLAG_SEQNO64_BIT, DMA_FENCE_FLAG_SIGNALED_BIT, DMA_FENCE_FLAG_TIMESTAMP_BIT, @@ -263,6 +265,19 @@ void dma_fence_release(struct kref *kref); void dma_fence_free(struct dma_fence *fence); void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq); +/** + * dma_fence_was_initialized - test if fence was initialized + * @fence: fence to test + * + * Return: True if fence was ever initialized, false otherwise. Works correctly + * only when memory backing the fence structure is zero initialized on + * allocation. + */ +static inline bool dma_fence_was_initialized(struct dma_fence *fence) +{ + return fence && test_bit(DMA_FENCE_FLAG_INITIALIZED_BIT, &fence->flags); +} + /** * dma_fence_put - decreases refcount of the fence * @fence: fence to reduce refcount of -- cgit v1.2.3 From 4a9671a03f2be13acde0cb15c5208767a9cc56e4 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 6 Feb 2026 08:52:38 +1000 Subject: gpu: Move DRM buddy allocator one level up (part one) Move the DRM buddy allocator one level up so that it can be used by GPU drivers (example, nova-core) that have usecases other than DRM (such as VFIO vGPU support). Modify the API, structures and Kconfigs to use "gpu_buddy" terminology. Adapt the drivers and tests to use the new API. The commit cannot be split due to bisectability, however no functional change is intended. Verified by running K-UNIT tests and build tested various configurations. Signed-off-by: Joel Fernandes Reviewed-by: Dave Airlie [airlied: I've split this into two so git can find copies easier. I've also just nuked drm_random library, that stuff needs to be done elsewhere and only the buddy tests seem to be using it]. Signed-off-by: Dave Airlie --- Documentation/gpu/drm-mm.rst | 6 +- drivers/gpu/Makefile | 2 +- drivers/gpu/buddy.c | 1336 +++++++++++++++++++++++++ drivers/gpu/drm/Kconfig | 4 - drivers/gpu/drm/Kconfig.debug | 1 - drivers/gpu/drm/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 2 +- drivers/gpu/drm/drm_buddy.c | 1336 ------------------------- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.c | 2 +- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 2 +- drivers/gpu/drm/lib/drm_random.c | 44 - drivers/gpu/drm/lib/drm_random.h | 28 - drivers/gpu/drm/tests/Makefile | 1 - drivers/gpu/drm/tests/drm_buddy_test.c | 928 ----------------- drivers/gpu/drm/tests/drm_exec_test.c | 2 - drivers/gpu/drm/tests/drm_mm_test.c | 2 - drivers/gpu/drm/ttm/tests/ttm_mock_manager.h | 2 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 2 +- drivers/gpu/tests/Makefile | 4 + drivers/gpu/tests/gpu_buddy_test.c | 928 +++++++++++++++++ drivers/gpu/tests/gpu_random.c | 44 + drivers/gpu/tests/gpu_random.h | 28 + include/drm/drm_buddy.h | 171 ---- include/linux/gpu_buddy.h | 171 ++++ 25 files changed, 2522 insertions(+), 2529 deletions(-) create mode 100644 drivers/gpu/buddy.c delete mode 100644 drivers/gpu/drm/drm_buddy.c delete mode 100644 drivers/gpu/drm/lib/drm_random.c delete mode 100644 drivers/gpu/drm/lib/drm_random.h delete mode 100644 drivers/gpu/drm/tests/drm_buddy_test.c create mode 100644 drivers/gpu/tests/Makefile create mode 100644 drivers/gpu/tests/gpu_buddy_test.c create mode 100644 drivers/gpu/tests/gpu_random.c create mode 100644 drivers/gpu/tests/gpu_random.h delete mode 100644 include/drm/drm_buddy.h create mode 100644 include/linux/gpu_buddy.h (limited to 'include') diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index f22433470c76..ceee0e663237 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -526,10 +526,10 @@ DRM GPUVM Function References DRM Buddy Allocator =================== -DRM Buddy Function References ------------------------------ +Buddy Allocator Function References (GPU buddy) +----------------------------------------------- -.. kernel-doc:: drivers/gpu/drm/drm_buddy.c +.. kernel-doc:: drivers/gpu/buddy.c :export: DRM Cache Handling and Fast WC memcpy() diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index 36a54d456630..c5292ee2c852 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -2,7 +2,7 @@ # drm/tegra depends on host1x, so if both drivers are built-in care must be # taken to initialize them in the correct order. Link order is the only way # to ensure this currently. -obj-y += host1x/ drm/ vga/ +obj-y += host1x/ drm/ vga/ tests/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ obj-$(CONFIG_TRACE_GPU_MEM) += trace/ obj-$(CONFIG_NOVA_CORE) += nova-core/ diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c new file mode 100644 index 000000000000..4cc63d961d26 --- /dev/null +++ b/drivers/gpu/buddy.c @@ -0,0 +1,1336 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include +#include +#include +#include + +#include +#include + +enum drm_buddy_free_tree { + DRM_BUDDY_CLEAR_TREE = 0, + DRM_BUDDY_DIRTY_TREE, + DRM_BUDDY_MAX_FREE_TREES, +}; + +static struct kmem_cache *slab_blocks; + +#define for_each_free_tree(tree) \ + for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++) + +static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm, + struct drm_buddy_block *parent, + unsigned int order, + u64 offset) +{ + struct drm_buddy_block *block; + + BUG_ON(order > DRM_BUDDY_MAX_ORDER); + + block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); + if (!block) + return NULL; + + block->header = offset; + block->header |= order; + block->parent = parent; + + RB_CLEAR_NODE(&block->rb); + + BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED); + return block; +} + +static void drm_block_free(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + kmem_cache_free(slab_blocks, block); +} + +static enum drm_buddy_free_tree +get_block_tree(struct drm_buddy_block *block) +{ + return drm_buddy_block_is_clear(block) ? + DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; +} + +static struct drm_buddy_block * +rbtree_get_free_block(const struct rb_node *node) +{ + return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL; +} + +static struct drm_buddy_block * +rbtree_last_free_block(struct rb_root *root) +{ + return rbtree_get_free_block(rb_last(root)); +} + +static bool rbtree_is_empty(struct rb_root *root) +{ + return RB_EMPTY_ROOT(root); +} + +static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block, + const struct drm_buddy_block *node) +{ + return drm_buddy_block_offset(block) < drm_buddy_block_offset(node); +} + +static bool rbtree_block_offset_less(struct rb_node *block, + const struct rb_node *node) +{ + return drm_buddy_block_offset_less(rbtree_get_free_block(block), + rbtree_get_free_block(node)); +} + +static void rbtree_insert(struct drm_buddy *mm, + struct drm_buddy_block *block, + enum drm_buddy_free_tree tree) +{ + rb_add(&block->rb, + &mm->free_trees[tree][drm_buddy_block_order(block)], + rbtree_block_offset_less); +} + +static void rbtree_remove(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + unsigned int order = drm_buddy_block_order(block); + enum drm_buddy_free_tree tree; + struct rb_root *root; + + tree = get_block_tree(block); + root = &mm->free_trees[tree][order]; + + rb_erase(&block->rb, root); + RB_CLEAR_NODE(&block->rb); +} + +static void clear_reset(struct drm_buddy_block *block) +{ + block->header &= ~DRM_BUDDY_HEADER_CLEAR; +} + +static void mark_cleared(struct drm_buddy_block *block) +{ + block->header |= DRM_BUDDY_HEADER_CLEAR; +} + +static void mark_allocated(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + block->header &= ~DRM_BUDDY_HEADER_STATE; + block->header |= DRM_BUDDY_ALLOCATED; + + rbtree_remove(mm, block); +} + +static void mark_free(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + enum drm_buddy_free_tree tree; + + block->header &= ~DRM_BUDDY_HEADER_STATE; + block->header |= DRM_BUDDY_FREE; + + tree = get_block_tree(block); + rbtree_insert(mm, block, tree); +} + +static void mark_split(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + block->header &= ~DRM_BUDDY_HEADER_STATE; + block->header |= DRM_BUDDY_SPLIT; + + rbtree_remove(mm, block); +} + +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= e2 && e1 >= s2; +} + +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= s2 && e1 >= e2; +} + +static struct drm_buddy_block * +__get_buddy(struct drm_buddy_block *block) +{ + struct drm_buddy_block *parent; + + parent = block->parent; + if (!parent) + return NULL; + + if (parent->left == block) + return parent->right; + + return parent->left; +} + +static unsigned int __drm_buddy_free(struct drm_buddy *mm, + struct drm_buddy_block *block, + bool force_merge) +{ + struct drm_buddy_block *parent; + unsigned int order; + + while ((parent = block->parent)) { + struct drm_buddy_block *buddy; + + buddy = __get_buddy(block); + + if (!drm_buddy_block_is_free(buddy)) + break; + + if (!force_merge) { + /* + * Check the block and its buddy clear state and exit + * the loop if they both have the dissimilar state. + */ + if (drm_buddy_block_is_clear(block) != + drm_buddy_block_is_clear(buddy)) + break; + + if (drm_buddy_block_is_clear(block)) + mark_cleared(parent); + } + + rbtree_remove(mm, buddy); + if (force_merge && drm_buddy_block_is_clear(buddy)) + mm->clear_avail -= drm_buddy_block_size(mm, buddy); + + drm_block_free(mm, block); + drm_block_free(mm, buddy); + + block = parent; + } + + order = drm_buddy_block_order(block); + mark_free(mm, block); + + return order; +} + +static int __force_merge(struct drm_buddy *mm, + u64 start, + u64 end, + unsigned int min_order) +{ + unsigned int tree, order; + int i; + + if (!min_order) + return -ENOMEM; + + if (min_order > mm->max_order) + return -EINVAL; + + for_each_free_tree(tree) { + for (i = min_order - 1; i >= 0; i--) { + struct rb_node *iter = rb_last(&mm->free_trees[tree][i]); + + while (iter) { + struct drm_buddy_block *block, *buddy; + u64 block_start, block_end; + + block = rbtree_get_free_block(iter); + iter = rb_prev(iter); + + if (!block || !block->parent) + continue; + + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block) - 1; + + if (!contains(start, end, block_start, block_end)) + continue; + + buddy = __get_buddy(block); + if (!drm_buddy_block_is_free(buddy)) + continue; + + WARN_ON(drm_buddy_block_is_clear(block) == + drm_buddy_block_is_clear(buddy)); + + /* + * Advance to the next node when the current node is the buddy, + * as freeing the block will also remove its buddy from the tree. + */ + if (iter == &buddy->rb) + iter = rb_prev(iter); + + rbtree_remove(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail -= drm_buddy_block_size(mm, block); + + order = __drm_buddy_free(mm, block, true); + if (order >= min_order) + return 0; + } + } + } + + return -ENOMEM; +} + +/** + * drm_buddy_init - init memory manager + * + * @mm: DRM buddy manager to initialize + * @size: size in bytes to manage + * @chunk_size: minimum page size in bytes for our allocations + * + * Initializes the memory manager and its resources. + * + * Returns: + * 0 on success, error code on failure. + */ +int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) +{ + unsigned int i, j, root_count = 0; + u64 offset = 0; + + if (size < chunk_size) + return -EINVAL; + + if (chunk_size < SZ_4K) + return -EINVAL; + + if (!is_power_of_2(chunk_size)) + return -EINVAL; + + size = round_down(size, chunk_size); + + mm->size = size; + mm->avail = size; + mm->clear_avail = 0; + mm->chunk_size = chunk_size; + mm->max_order = ilog2(size) - ilog2(chunk_size); + + BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER); + + mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES, + sizeof(*mm->free_trees), + GFP_KERNEL); + if (!mm->free_trees) + return -ENOMEM; + + for_each_free_tree(i) { + mm->free_trees[i] = kmalloc_array(mm->max_order + 1, + sizeof(struct rb_root), + GFP_KERNEL); + if (!mm->free_trees[i]) + goto out_free_tree; + + for (j = 0; j <= mm->max_order; ++j) + mm->free_trees[i][j] = RB_ROOT; + } + + mm->n_roots = hweight64(size); + + mm->roots = kmalloc_array(mm->n_roots, + sizeof(struct drm_buddy_block *), + GFP_KERNEL); + if (!mm->roots) + goto out_free_tree; + + /* + * Split into power-of-two blocks, in case we are given a size that is + * not itself a power-of-two. + */ + do { + struct drm_buddy_block *root; + unsigned int order; + u64 root_size; + + order = ilog2(size) - ilog2(chunk_size); + root_size = chunk_size << order; + + root = drm_block_alloc(mm, NULL, order, offset); + if (!root) + goto out_free_roots; + + mark_free(mm, root); + + BUG_ON(root_count > mm->max_order); + BUG_ON(drm_buddy_block_size(mm, root) < chunk_size); + + mm->roots[root_count] = root; + + offset += root_size; + size -= root_size; + root_count++; + } while (size); + + return 0; + +out_free_roots: + while (root_count--) + drm_block_free(mm, mm->roots[root_count]); + kfree(mm->roots); +out_free_tree: + while (i--) + kfree(mm->free_trees[i]); + kfree(mm->free_trees); + return -ENOMEM; +} +EXPORT_SYMBOL(drm_buddy_init); + +/** + * drm_buddy_fini - tear down the memory manager + * + * @mm: DRM buddy manager to free + * + * Cleanup memory manager resources and the freetree + */ +void drm_buddy_fini(struct drm_buddy *mm) +{ + u64 root_size, size, start; + unsigned int order; + int i; + + size = mm->size; + + for (i = 0; i < mm->n_roots; ++i) { + order = ilog2(size) - ilog2(mm->chunk_size); + start = drm_buddy_block_offset(mm->roots[i]); + __force_merge(mm, start, start + size, order); + + if (WARN_ON(!drm_buddy_block_is_free(mm->roots[i]))) + kunit_fail_current_test("buddy_fini() root"); + + drm_block_free(mm, mm->roots[i]); + + root_size = mm->chunk_size << order; + size -= root_size; + } + + WARN_ON(mm->avail != mm->size); + + for_each_free_tree(i) + kfree(mm->free_trees[i]); + kfree(mm->free_trees); + kfree(mm->roots); +} +EXPORT_SYMBOL(drm_buddy_fini); + +static int split_block(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + unsigned int block_order = drm_buddy_block_order(block) - 1; + u64 offset = drm_buddy_block_offset(block); + + BUG_ON(!drm_buddy_block_is_free(block)); + BUG_ON(!drm_buddy_block_order(block)); + + block->left = drm_block_alloc(mm, block, block_order, offset); + if (!block->left) + return -ENOMEM; + + block->right = drm_block_alloc(mm, block, block_order, + offset + (mm->chunk_size << block_order)); + if (!block->right) { + drm_block_free(mm, block->left); + return -ENOMEM; + } + + mark_split(mm, block); + + if (drm_buddy_block_is_clear(block)) { + mark_cleared(block->left); + mark_cleared(block->right); + clear_reset(block); + } + + mark_free(mm, block->left); + mark_free(mm, block->right); + + return 0; +} + +/** + * drm_get_buddy - get buddy address + * + * @block: DRM buddy block + * + * Returns the corresponding buddy block for @block, or NULL + * if this is a root block and can't be merged further. + * Requires some kind of locking to protect against + * any concurrent allocate and free operations. + */ +struct drm_buddy_block * +drm_get_buddy(struct drm_buddy_block *block) +{ + return __get_buddy(block); +} +EXPORT_SYMBOL(drm_get_buddy); + +/** + * drm_buddy_reset_clear - reset blocks clear state + * + * @mm: DRM buddy manager + * @is_clear: blocks clear state + * + * Reset the clear state based on @is_clear value for each block + * in the freetree. + */ +void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear) +{ + enum drm_buddy_free_tree src_tree, dst_tree; + u64 root_size, size, start; + unsigned int order; + int i; + + size = mm->size; + for (i = 0; i < mm->n_roots; ++i) { + order = ilog2(size) - ilog2(mm->chunk_size); + start = drm_buddy_block_offset(mm->roots[i]); + __force_merge(mm, start, start + size, order); + + root_size = mm->chunk_size << order; + size -= root_size; + } + + src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE; + dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; + + for (i = 0; i <= mm->max_order; ++i) { + struct rb_root *root = &mm->free_trees[src_tree][i]; + struct drm_buddy_block *block, *tmp; + + rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { + rbtree_remove(mm, block); + if (is_clear) { + mark_cleared(block); + mm->clear_avail += drm_buddy_block_size(mm, block); + } else { + clear_reset(block); + mm->clear_avail -= drm_buddy_block_size(mm, block); + } + + rbtree_insert(mm, block, dst_tree); + } + } +} +EXPORT_SYMBOL(drm_buddy_reset_clear); + +/** + * drm_buddy_free_block - free a block + * + * @mm: DRM buddy manager + * @block: block to be freed + */ +void drm_buddy_free_block(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + BUG_ON(!drm_buddy_block_is_allocated(block)); + mm->avail += drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail += drm_buddy_block_size(mm, block); + + __drm_buddy_free(mm, block, false); +} +EXPORT_SYMBOL(drm_buddy_free_block); + +static void __drm_buddy_free_list(struct drm_buddy *mm, + struct list_head *objects, + bool mark_clear, + bool mark_dirty) +{ + struct drm_buddy_block *block, *on; + + WARN_ON(mark_dirty && mark_clear); + + list_for_each_entry_safe(block, on, objects, link) { + if (mark_clear) + mark_cleared(block); + else if (mark_dirty) + clear_reset(block); + drm_buddy_free_block(mm, block); + cond_resched(); + } + INIT_LIST_HEAD(objects); +} + +static void drm_buddy_free_list_internal(struct drm_buddy *mm, + struct list_head *objects) +{ + /* + * Don't touch the clear/dirty bit, since allocation is still internal + * at this point. For example we might have just failed part of the + * allocation. + */ + __drm_buddy_free_list(mm, objects, false, false); +} + +/** + * drm_buddy_free_list - free blocks + * + * @mm: DRM buddy manager + * @objects: input list head to free blocks + * @flags: optional flags like DRM_BUDDY_CLEARED + */ +void drm_buddy_free_list(struct drm_buddy *mm, + struct list_head *objects, + unsigned int flags) +{ + bool mark_clear = flags & DRM_BUDDY_CLEARED; + + __drm_buddy_free_list(mm, objects, mark_clear, !mark_clear); +} +EXPORT_SYMBOL(drm_buddy_free_list); + +static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags) +{ + bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION; + + return needs_clear != drm_buddy_block_is_clear(block); +} + +static struct drm_buddy_block * +__alloc_range_bias(struct drm_buddy *mm, + u64 start, u64 end, + unsigned int order, + unsigned long flags, + bool fallback) +{ + u64 req_size = mm->chunk_size << order; + struct drm_buddy_block *block; + struct drm_buddy_block *buddy; + LIST_HEAD(dfs); + int err; + int i; + + end = end - 1; + + for (i = 0; i < mm->n_roots; ++i) + list_add_tail(&mm->roots[i]->tmp_link, &dfs); + + do { + u64 block_start; + u64 block_end; + + block = list_first_entry_or_null(&dfs, + struct drm_buddy_block, + tmp_link); + if (!block) + break; + + list_del(&block->tmp_link); + + if (drm_buddy_block_order(block) < order) + continue; + + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block) - 1; + + if (!overlaps(start, end, block_start, block_end)) + continue; + + if (drm_buddy_block_is_allocated(block)) + continue; + + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + + if (!fallback && block_incompatible(block, flags)) + continue; + + if (contains(start, end, block_start, block_end) && + order == drm_buddy_block_order(block)) { + /* + * Find the free block within the range. + */ + if (drm_buddy_block_is_free(block)) + return block; + + continue; + } + + if (!drm_buddy_block_is_split(block)) { + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + } + + list_add(&block->right->tmp_link, &dfs); + list_add(&block->left->tmp_link, &dfs); + } while (1); + + return ERR_PTR(-ENOSPC); + +err_undo: + /* + * We really don't want to leave around a bunch of split blocks, since + * bigger is better, so make sure we merge everything back before we + * free the allocated blocks. + */ + buddy = __get_buddy(block); + if (buddy && + (drm_buddy_block_is_free(block) && + drm_buddy_block_is_free(buddy))) + __drm_buddy_free(mm, block, false); + return ERR_PTR(err); +} + +static struct drm_buddy_block * +__drm_buddy_alloc_range_bias(struct drm_buddy *mm, + u64 start, u64 end, + unsigned int order, + unsigned long flags) +{ + struct drm_buddy_block *block; + bool fallback = false; + + block = __alloc_range_bias(mm, start, end, order, + flags, fallback); + if (IS_ERR(block)) + return __alloc_range_bias(mm, start, end, order, + flags, !fallback); + + return block; +} + +static struct drm_buddy_block * +get_maxblock(struct drm_buddy *mm, + unsigned int order, + enum drm_buddy_free_tree tree) +{ + struct drm_buddy_block *max_block = NULL, *block = NULL; + struct rb_root *root; + unsigned int i; + + for (i = order; i <= mm->max_order; ++i) { + root = &mm->free_trees[tree][i]; + block = rbtree_last_free_block(root); + if (!block) + continue; + + if (!max_block) { + max_block = block; + continue; + } + + if (drm_buddy_block_offset(block) > + drm_buddy_block_offset(max_block)) { + max_block = block; + } + } + + return max_block; +} + +static struct drm_buddy_block * +alloc_from_freetree(struct drm_buddy *mm, + unsigned int order, + unsigned long flags) +{ + struct drm_buddy_block *block = NULL; + struct rb_root *root; + enum drm_buddy_free_tree tree; + unsigned int tmp; + int err; + + tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ? + DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; + + if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { + block = get_maxblock(mm, order, tree); + if (block) + /* Store the obtained block order */ + tmp = drm_buddy_block_order(block); + } else { + for (tmp = order; tmp <= mm->max_order; ++tmp) { + /* Get RB tree root for this order and tree */ + root = &mm->free_trees[tree][tmp]; + block = rbtree_last_free_block(root); + if (block) + break; + } + } + + if (!block) { + /* Try allocating from the other tree */ + tree = (tree == DRM_BUDDY_CLEAR_TREE) ? + DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE; + + for (tmp = order; tmp <= mm->max_order; ++tmp) { + root = &mm->free_trees[tree][tmp]; + block = rbtree_last_free_block(root); + if (block) + break; + } + + if (!block) + return ERR_PTR(-ENOSPC); + } + + BUG_ON(!drm_buddy_block_is_free(block)); + + while (tmp != order) { + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + + block = block->right; + tmp--; + } + return block; + +err_undo: + if (tmp != order) + __drm_buddy_free(mm, block, false); + return ERR_PTR(err); +} + +static int __alloc_range(struct drm_buddy *mm, + struct list_head *dfs, + u64 start, u64 size, + struct list_head *blocks, + u64 *total_allocated_on_err) +{ + struct drm_buddy_block *block; + struct drm_buddy_block *buddy; + u64 total_allocated = 0; + LIST_HEAD(allocated); + u64 end; + int err; + + end = start + size - 1; + + do { + u64 block_start; + u64 block_end; + + block = list_first_entry_or_null(dfs, + struct drm_buddy_block, + tmp_link); + if (!block) + break; + + list_del(&block->tmp_link); + + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block) - 1; + + if (!overlaps(start, end, block_start, block_end)) + continue; + + if (drm_buddy_block_is_allocated(block)) { + err = -ENOSPC; + goto err_free; + } + + if (contains(start, end, block_start, block_end)) { + if (drm_buddy_block_is_free(block)) { + mark_allocated(mm, block); + total_allocated += drm_buddy_block_size(mm, block); + mm->avail -= drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail -= drm_buddy_block_size(mm, block); + list_add_tail(&block->link, &allocated); + continue; + } else if (!mm->clear_avail) { + err = -ENOSPC; + goto err_free; + } + } + + if (!drm_buddy_block_is_split(block)) { + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + } + + list_add(&block->right->tmp_link, dfs); + list_add(&block->left->tmp_link, dfs); + } while (1); + + if (total_allocated < size) { + err = -ENOSPC; + goto err_free; + } + + list_splice_tail(&allocated, blocks); + + return 0; + +err_undo: + /* + * We really don't want to leave around a bunch of split blocks, since + * bigger is better, so make sure we merge everything back before we + * free the allocated blocks. + */ + buddy = __get_buddy(block); + if (buddy && + (drm_buddy_block_is_free(block) && + drm_buddy_block_is_free(buddy))) + __drm_buddy_free(mm, block, false); + +err_free: + if (err == -ENOSPC && total_allocated_on_err) { + list_splice_tail(&allocated, blocks); + *total_allocated_on_err = total_allocated; + } else { + drm_buddy_free_list_internal(mm, &allocated); + } + + return err; +} + +static int __drm_buddy_alloc_range(struct drm_buddy *mm, + u64 start, + u64 size, + u64 *total_allocated_on_err, + struct list_head *blocks) +{ + LIST_HEAD(dfs); + int i; + + for (i = 0; i < mm->n_roots; ++i) + list_add_tail(&mm->roots[i]->tmp_link, &dfs); + + return __alloc_range(mm, &dfs, start, size, + blocks, total_allocated_on_err); +} + +static int __alloc_contig_try_harder(struct drm_buddy *mm, + u64 size, + u64 min_block_size, + struct list_head *blocks) +{ + u64 rhs_offset, lhs_offset, lhs_size, filled; + struct drm_buddy_block *block; + unsigned int tree, order; + LIST_HEAD(blocks_lhs); + unsigned long pages; + u64 modify_size; + int err; + + modify_size = rounddown_pow_of_two(size); + pages = modify_size >> ilog2(mm->chunk_size); + order = fls(pages) - 1; + if (order == 0) + return -ENOSPC; + + for_each_free_tree(tree) { + struct rb_root *root; + struct rb_node *iter; + + root = &mm->free_trees[tree][order]; + if (rbtree_is_empty(root)) + continue; + + iter = rb_last(root); + while (iter) { + block = rbtree_get_free_block(iter); + + /* Allocate blocks traversing RHS */ + rhs_offset = drm_buddy_block_offset(block); + err = __drm_buddy_alloc_range(mm, rhs_offset, size, + &filled, blocks); + if (!err || err != -ENOSPC) + return err; + + lhs_size = max((size - filled), min_block_size); + if (!IS_ALIGNED(lhs_size, min_block_size)) + lhs_size = round_up(lhs_size, min_block_size); + + /* Allocate blocks traversing LHS */ + lhs_offset = drm_buddy_block_offset(block) - lhs_size; + err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size, + NULL, &blocks_lhs); + if (!err) { + list_splice(&blocks_lhs, blocks); + return 0; + } else if (err != -ENOSPC) { + drm_buddy_free_list_internal(mm, blocks); + return err; + } + /* Free blocks for the next iteration */ + drm_buddy_free_list_internal(mm, blocks); + + iter = rb_prev(iter); + } + } + + return -ENOSPC; +} + +/** + * drm_buddy_block_trim - free unused pages + * + * @mm: DRM buddy manager + * @start: start address to begin the trimming. + * @new_size: original size requested + * @blocks: Input and output list of allocated blocks. + * MUST contain single block as input to be trimmed. + * On success will contain the newly allocated blocks + * making up the @new_size. Blocks always appear in + * ascending order + * + * For contiguous allocation, we round up the size to the nearest + * power of two value, drivers consume *actual* size, so remaining + * portions are unused and can be optionally freed with this function + * + * Returns: + * 0 on success, error code on failure. + */ +int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, + u64 new_size, + struct list_head *blocks) +{ + struct drm_buddy_block *parent; + struct drm_buddy_block *block; + u64 block_start, block_end; + LIST_HEAD(dfs); + u64 new_start; + int err; + + if (!list_is_singular(blocks)) + return -EINVAL; + + block = list_first_entry(blocks, + struct drm_buddy_block, + link); + + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + + if (WARN_ON(!drm_buddy_block_is_allocated(block))) + return -EINVAL; + + if (new_size > drm_buddy_block_size(mm, block)) + return -EINVAL; + + if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size)) + return -EINVAL; + + if (new_size == drm_buddy_block_size(mm, block)) + return 0; + + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + + list_del(&block->link); + mark_free(mm, block); + mm->avail += drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail += drm_buddy_block_size(mm, block); + + /* Prevent recursively freeing this node */ + parent = block->parent; + block->parent = NULL; + + list_add(&block->tmp_link, &dfs); + err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); + if (err) { + mark_allocated(mm, block); + mm->avail -= drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail -= drm_buddy_block_size(mm, block); + list_add(&block->link, blocks); + } + + block->parent = parent; + return err; +} +EXPORT_SYMBOL(drm_buddy_block_trim); + +static struct drm_buddy_block * +__drm_buddy_alloc_blocks(struct drm_buddy *mm, + u64 start, u64 end, + unsigned int order, + unsigned long flags) +{ + if (flags & DRM_BUDDY_RANGE_ALLOCATION) + /* Allocate traversing within the range */ + return __drm_buddy_alloc_range_bias(mm, start, end, + order, flags); + else + /* Allocate from freetree */ + return alloc_from_freetree(mm, order, flags); +} + +/** + * drm_buddy_alloc_blocks - allocate power-of-two blocks + * + * @mm: DRM buddy manager to allocate from + * @start: start of the allowed range for this block + * @end: end of the allowed range for this block + * @size: size of the allocation in bytes + * @min_block_size: alignment of the allocation + * @blocks: output list head to add allocated blocks + * @flags: DRM_BUDDY_*_ALLOCATION flags + * + * alloc_range_bias() called on range limitations, which traverses + * the tree and returns the desired block. + * + * alloc_from_freetree() called when *no* range restrictions + * are enforced, which picks the block from the freetree. + * + * Returns: + * 0 on success, error code on failure. + */ +int drm_buddy_alloc_blocks(struct drm_buddy *mm, + u64 start, u64 end, u64 size, + u64 min_block_size, + struct list_head *blocks, + unsigned long flags) +{ + struct drm_buddy_block *block = NULL; + u64 original_size, original_min_size; + unsigned int min_order, order; + LIST_HEAD(allocated); + unsigned long pages; + int err; + + if (size < mm->chunk_size) + return -EINVAL; + + if (min_block_size < mm->chunk_size) + return -EINVAL; + + if (!is_power_of_2(min_block_size)) + return -EINVAL; + + if (!IS_ALIGNED(start | end | size, mm->chunk_size)) + return -EINVAL; + + if (end > mm->size) + return -EINVAL; + + if (range_overflows(start, size, mm->size)) + return -EINVAL; + + /* Actual range allocation */ + if (start + size == end) { + if (!IS_ALIGNED(start | end, min_block_size)) + return -EINVAL; + + return __drm_buddy_alloc_range(mm, start, size, NULL, blocks); + } + + original_size = size; + original_min_size = min_block_size; + + /* Roundup the size to power of 2 */ + if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) { + size = roundup_pow_of_two(size); + min_block_size = size; + /* Align size value to min_block_size */ + } else if (!IS_ALIGNED(size, min_block_size)) { + size = round_up(size, min_block_size); + } + + pages = size >> ilog2(mm->chunk_size); + order = fls(pages) - 1; + min_order = ilog2(min_block_size) - ilog2(mm->chunk_size); + + if (order > mm->max_order || size > mm->size) { + if ((flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) && + !(flags & DRM_BUDDY_RANGE_ALLOCATION)) + return __alloc_contig_try_harder(mm, original_size, + original_min_size, blocks); + + return -EINVAL; + } + + do { + order = min(order, (unsigned int)fls(pages) - 1); + BUG_ON(order > mm->max_order); + BUG_ON(order < min_order); + + do { + block = __drm_buddy_alloc_blocks(mm, start, + end, + order, + flags); + if (!IS_ERR(block)) + break; + + if (order-- == min_order) { + /* Try allocation through force merge method */ + if (mm->clear_avail && + !__force_merge(mm, start, end, min_order)) { + block = __drm_buddy_alloc_blocks(mm, start, + end, + min_order, + flags); + if (!IS_ERR(block)) { + order = min_order; + break; + } + } + + /* + * Try contiguous block allocation through + * try harder method. + */ + if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION && + !(flags & DRM_BUDDY_RANGE_ALLOCATION)) + return __alloc_contig_try_harder(mm, + original_size, + original_min_size, + blocks); + err = -ENOSPC; + goto err_free; + } + } while (1); + + mark_allocated(mm, block); + mm->avail -= drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail -= drm_buddy_block_size(mm, block); + kmemleak_update_trace(block); + list_add_tail(&block->link, &allocated); + + pages -= BIT(order); + + if (!pages) + break; + } while (1); + + /* Trim the allocated block to the required size */ + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { + struct list_head *trim_list; + LIST_HEAD(temp); + u64 trim_size; + + trim_list = &allocated; + trim_size = original_size; + + if (!list_is_singular(&allocated)) { + block = list_last_entry(&allocated, typeof(*block), link); + list_move(&block->link, &temp); + trim_list = &temp; + trim_size = drm_buddy_block_size(mm, block) - + (size - original_size); + } + + drm_buddy_block_trim(mm, + NULL, + trim_size, + trim_list); + + if (!list_empty(&temp)) + list_splice_tail(trim_list, &allocated); + } + + list_splice_tail(&allocated, blocks); + return 0; + +err_free: + drm_buddy_free_list_internal(mm, &allocated); + return err; +} +EXPORT_SYMBOL(drm_buddy_alloc_blocks); + +/** + * drm_buddy_block_print - print block information + * + * @mm: DRM buddy manager + * @block: DRM buddy block + * @p: DRM printer to use + */ +void drm_buddy_block_print(struct drm_buddy *mm, + struct drm_buddy_block *block, + struct drm_printer *p) +{ + u64 start = drm_buddy_block_offset(block); + u64 size = drm_buddy_block_size(mm, block); + + drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size); +} +EXPORT_SYMBOL(drm_buddy_block_print); + +/** + * drm_buddy_print - print allocator state + * + * @mm: DRM buddy manager + * @p: DRM printer to use + */ +void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p) +{ + int order; + + drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", + mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); + + for (order = mm->max_order; order >= 0; order--) { + struct drm_buddy_block *block, *tmp; + struct rb_root *root; + u64 count = 0, free; + unsigned int tree; + + for_each_free_tree(tree) { + root = &mm->free_trees[tree][order]; + + rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { + BUG_ON(!drm_buddy_block_is_free(block)); + count++; + } + } + + drm_printf(p, "order-%2d ", order); + + free = count * (mm->chunk_size << order); + if (free < SZ_1M) + drm_printf(p, "free: %8llu KiB", free >> 10); + else + drm_printf(p, "free: %8llu MiB", free >> 20); + + drm_printf(p, ", blocks: %llu\n", count); + } +} +EXPORT_SYMBOL(drm_buddy_print); + +static void drm_buddy_module_exit(void) +{ + kmem_cache_destroy(slab_blocks); +} + +static int __init drm_buddy_module_init(void) +{ + slab_blocks = KMEM_CACHE(drm_buddy_block, 0); + if (!slab_blocks) + return -ENOMEM; + + return 0; +} + +module_init(drm_buddy_module_init); +module_exit(drm_buddy_module_exit); + +MODULE_DESCRIPTION("DRM Buddy Allocator"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 5888eb147ed1..862ff4000969 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -269,10 +269,6 @@ config DRM_SCHED config DRM_PANEL_BACKLIGHT_QUIRKS tristate -config DRM_LIB_RANDOM - bool - default n - config DRM_PRIVACY_SCREEN bool default n diff --git a/drivers/gpu/drm/Kconfig.debug b/drivers/gpu/drm/Kconfig.debug index 05dc43c0b8c5..3b7886865335 100644 --- a/drivers/gpu/drm/Kconfig.debug +++ b/drivers/gpu/drm/Kconfig.debug @@ -69,7 +69,6 @@ config DRM_KUNIT_TEST select DRM_EXPORT_FOR_TESTS if m select DRM_GEM_SHMEM_HELPER select DRM_KUNIT_TEST_HELPERS - select DRM_LIB_RANDOM select DRM_SYSFB_HELPER select PRIME_NUMBERS default KUNIT_ALL_TESTS diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 75840ec4d782..892859cfe95f 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -79,7 +79,6 @@ drm-$(CONFIG_DRM_CLIENT) += \ drm_client_event.o \ drm_client_modeset.o \ drm_client_sysrq.o -drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o drm-$(CONFIG_COMPAT) += drm_ioc32.o drm-$(CONFIG_DRM_PANEL) += drm_panel.o drm-$(CONFIG_OF) += drm_of.o @@ -115,7 +114,7 @@ drm_gpusvm_helper-$(CONFIG_ZONE_DEVICE) += \ obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o -obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o +obj-$(CONFIG_DRM_BUDDY) += ../buddy.o drm_dma_helper-y := drm_gem_dma_helper.o drm_dma_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_dma.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 5f5fd9a911c2..874779618056 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -24,7 +24,7 @@ #ifndef __AMDGPU_VRAM_MGR_H__ #define __AMDGPU_VRAM_MGR_H__ -#include +#include struct amdgpu_vram_mgr { struct ttm_resource_manager manager; diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c deleted file mode 100644 index fd34d3755f7c..000000000000 --- a/drivers/gpu/drm/drm_buddy.c +++ /dev/null @@ -1,1336 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include - -#include -#include -#include -#include - -#include -#include - -enum drm_buddy_free_tree { - DRM_BUDDY_CLEAR_TREE = 0, - DRM_BUDDY_DIRTY_TREE, - DRM_BUDDY_MAX_FREE_TREES, -}; - -static struct kmem_cache *slab_blocks; - -#define for_each_free_tree(tree) \ - for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++) - -static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm, - struct drm_buddy_block *parent, - unsigned int order, - u64 offset) -{ - struct drm_buddy_block *block; - - BUG_ON(order > DRM_BUDDY_MAX_ORDER); - - block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); - if (!block) - return NULL; - - block->header = offset; - block->header |= order; - block->parent = parent; - - RB_CLEAR_NODE(&block->rb); - - BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED); - return block; -} - -static void drm_block_free(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - kmem_cache_free(slab_blocks, block); -} - -static enum drm_buddy_free_tree -get_block_tree(struct drm_buddy_block *block) -{ - return drm_buddy_block_is_clear(block) ? - DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; -} - -static struct drm_buddy_block * -rbtree_get_free_block(const struct rb_node *node) -{ - return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL; -} - -static struct drm_buddy_block * -rbtree_last_free_block(struct rb_root *root) -{ - return rbtree_get_free_block(rb_last(root)); -} - -static bool rbtree_is_empty(struct rb_root *root) -{ - return RB_EMPTY_ROOT(root); -} - -static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block, - const struct drm_buddy_block *node) -{ - return drm_buddy_block_offset(block) < drm_buddy_block_offset(node); -} - -static bool rbtree_block_offset_less(struct rb_node *block, - const struct rb_node *node) -{ - return drm_buddy_block_offset_less(rbtree_get_free_block(block), - rbtree_get_free_block(node)); -} - -static void rbtree_insert(struct drm_buddy *mm, - struct drm_buddy_block *block, - enum drm_buddy_free_tree tree) -{ - rb_add(&block->rb, - &mm->free_trees[tree][drm_buddy_block_order(block)], - rbtree_block_offset_less); -} - -static void rbtree_remove(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - unsigned int order = drm_buddy_block_order(block); - enum drm_buddy_free_tree tree; - struct rb_root *root; - - tree = get_block_tree(block); - root = &mm->free_trees[tree][order]; - - rb_erase(&block->rb, root); - RB_CLEAR_NODE(&block->rb); -} - -static void clear_reset(struct drm_buddy_block *block) -{ - block->header &= ~DRM_BUDDY_HEADER_CLEAR; -} - -static void mark_cleared(struct drm_buddy_block *block) -{ - block->header |= DRM_BUDDY_HEADER_CLEAR; -} - -static void mark_allocated(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - block->header &= ~DRM_BUDDY_HEADER_STATE; - block->header |= DRM_BUDDY_ALLOCATED; - - rbtree_remove(mm, block); -} - -static void mark_free(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - enum drm_buddy_free_tree tree; - - block->header &= ~DRM_BUDDY_HEADER_STATE; - block->header |= DRM_BUDDY_FREE; - - tree = get_block_tree(block); - rbtree_insert(mm, block, tree); -} - -static void mark_split(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - block->header &= ~DRM_BUDDY_HEADER_STATE; - block->header |= DRM_BUDDY_SPLIT; - - rbtree_remove(mm, block); -} - -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) -{ - return s1 <= e2 && e1 >= s2; -} - -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) -{ - return s1 <= s2 && e1 >= e2; -} - -static struct drm_buddy_block * -__get_buddy(struct drm_buddy_block *block) -{ - struct drm_buddy_block *parent; - - parent = block->parent; - if (!parent) - return NULL; - - if (parent->left == block) - return parent->right; - - return parent->left; -} - -static unsigned int __drm_buddy_free(struct drm_buddy *mm, - struct drm_buddy_block *block, - bool force_merge) -{ - struct drm_buddy_block *parent; - unsigned int order; - - while ((parent = block->parent)) { - struct drm_buddy_block *buddy; - - buddy = __get_buddy(block); - - if (!drm_buddy_block_is_free(buddy)) - break; - - if (!force_merge) { - /* - * Check the block and its buddy clear state and exit - * the loop if they both have the dissimilar state. - */ - if (drm_buddy_block_is_clear(block) != - drm_buddy_block_is_clear(buddy)) - break; - - if (drm_buddy_block_is_clear(block)) - mark_cleared(parent); - } - - rbtree_remove(mm, buddy); - if (force_merge && drm_buddy_block_is_clear(buddy)) - mm->clear_avail -= drm_buddy_block_size(mm, buddy); - - drm_block_free(mm, block); - drm_block_free(mm, buddy); - - block = parent; - } - - order = drm_buddy_block_order(block); - mark_free(mm, block); - - return order; -} - -static int __force_merge(struct drm_buddy *mm, - u64 start, - u64 end, - unsigned int min_order) -{ - unsigned int tree, order; - int i; - - if (!min_order) - return -ENOMEM; - - if (min_order > mm->max_order) - return -EINVAL; - - for_each_free_tree(tree) { - for (i = min_order - 1; i >= 0; i--) { - struct rb_node *iter = rb_last(&mm->free_trees[tree][i]); - - while (iter) { - struct drm_buddy_block *block, *buddy; - u64 block_start, block_end; - - block = rbtree_get_free_block(iter); - iter = rb_prev(iter); - - if (!block || !block->parent) - continue; - - block_start = drm_buddy_block_offset(block); - block_end = block_start + drm_buddy_block_size(mm, block) - 1; - - if (!contains(start, end, block_start, block_end)) - continue; - - buddy = __get_buddy(block); - if (!drm_buddy_block_is_free(buddy)) - continue; - - WARN_ON(drm_buddy_block_is_clear(block) == - drm_buddy_block_is_clear(buddy)); - - /* - * Advance to the next node when the current node is the buddy, - * as freeing the block will also remove its buddy from the tree. - */ - if (iter == &buddy->rb) - iter = rb_prev(iter); - - rbtree_remove(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail -= drm_buddy_block_size(mm, block); - - order = __drm_buddy_free(mm, block, true); - if (order >= min_order) - return 0; - } - } - } - - return -ENOMEM; -} - -/** - * drm_buddy_init - init memory manager - * - * @mm: DRM buddy manager to initialize - * @size: size in bytes to manage - * @chunk_size: minimum page size in bytes for our allocations - * - * Initializes the memory manager and its resources. - * - * Returns: - * 0 on success, error code on failure. - */ -int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) -{ - unsigned int i, j, root_count = 0; - u64 offset = 0; - - if (size < chunk_size) - return -EINVAL; - - if (chunk_size < SZ_4K) - return -EINVAL; - - if (!is_power_of_2(chunk_size)) - return -EINVAL; - - size = round_down(size, chunk_size); - - mm->size = size; - mm->avail = size; - mm->clear_avail = 0; - mm->chunk_size = chunk_size; - mm->max_order = ilog2(size) - ilog2(chunk_size); - - BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER); - - mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES, - sizeof(*mm->free_trees), - GFP_KERNEL); - if (!mm->free_trees) - return -ENOMEM; - - for_each_free_tree(i) { - mm->free_trees[i] = kmalloc_array(mm->max_order + 1, - sizeof(struct rb_root), - GFP_KERNEL); - if (!mm->free_trees[i]) - goto out_free_tree; - - for (j = 0; j <= mm->max_order; ++j) - mm->free_trees[i][j] = RB_ROOT; - } - - mm->n_roots = hweight64(size); - - mm->roots = kmalloc_array(mm->n_roots, - sizeof(struct drm_buddy_block *), - GFP_KERNEL); - if (!mm->roots) - goto out_free_tree; - - /* - * Split into power-of-two blocks, in case we are given a size that is - * not itself a power-of-two. - */ - do { - struct drm_buddy_block *root; - unsigned int order; - u64 root_size; - - order = ilog2(size) - ilog2(chunk_size); - root_size = chunk_size << order; - - root = drm_block_alloc(mm, NULL, order, offset); - if (!root) - goto out_free_roots; - - mark_free(mm, root); - - BUG_ON(root_count > mm->max_order); - BUG_ON(drm_buddy_block_size(mm, root) < chunk_size); - - mm->roots[root_count] = root; - - offset += root_size; - size -= root_size; - root_count++; - } while (size); - - return 0; - -out_free_roots: - while (root_count--) - drm_block_free(mm, mm->roots[root_count]); - kfree(mm->roots); -out_free_tree: - while (i--) - kfree(mm->free_trees[i]); - kfree(mm->free_trees); - return -ENOMEM; -} -EXPORT_SYMBOL(drm_buddy_init); - -/** - * drm_buddy_fini - tear down the memory manager - * - * @mm: DRM buddy manager to free - * - * Cleanup memory manager resources and the freetree - */ -void drm_buddy_fini(struct drm_buddy *mm) -{ - u64 root_size, size, start; - unsigned int order; - int i; - - size = mm->size; - - for (i = 0; i < mm->n_roots; ++i) { - order = ilog2(size) - ilog2(mm->chunk_size); - start = drm_buddy_block_offset(mm->roots[i]); - __force_merge(mm, start, start + size, order); - - if (WARN_ON(!drm_buddy_block_is_free(mm->roots[i]))) - kunit_fail_current_test("buddy_fini() root"); - - drm_block_free(mm, mm->roots[i]); - - root_size = mm->chunk_size << order; - size -= root_size; - } - - WARN_ON(mm->avail != mm->size); - - for_each_free_tree(i) - kfree(mm->free_trees[i]); - kfree(mm->free_trees); - kfree(mm->roots); -} -EXPORT_SYMBOL(drm_buddy_fini); - -static int split_block(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - unsigned int block_order = drm_buddy_block_order(block) - 1; - u64 offset = drm_buddy_block_offset(block); - - BUG_ON(!drm_buddy_block_is_free(block)); - BUG_ON(!drm_buddy_block_order(block)); - - block->left = drm_block_alloc(mm, block, block_order, offset); - if (!block->left) - return -ENOMEM; - - block->right = drm_block_alloc(mm, block, block_order, - offset + (mm->chunk_size << block_order)); - if (!block->right) { - drm_block_free(mm, block->left); - return -ENOMEM; - } - - mark_split(mm, block); - - if (drm_buddy_block_is_clear(block)) { - mark_cleared(block->left); - mark_cleared(block->right); - clear_reset(block); - } - - mark_free(mm, block->left); - mark_free(mm, block->right); - - return 0; -} - -/** - * drm_get_buddy - get buddy address - * - * @block: DRM buddy block - * - * Returns the corresponding buddy block for @block, or NULL - * if this is a root block and can't be merged further. - * Requires some kind of locking to protect against - * any concurrent allocate and free operations. - */ -struct drm_buddy_block * -drm_get_buddy(struct drm_buddy_block *block) -{ - return __get_buddy(block); -} -EXPORT_SYMBOL(drm_get_buddy); - -/** - * drm_buddy_reset_clear - reset blocks clear state - * - * @mm: DRM buddy manager - * @is_clear: blocks clear state - * - * Reset the clear state based on @is_clear value for each block - * in the freetree. - */ -void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear) -{ - enum drm_buddy_free_tree src_tree, dst_tree; - u64 root_size, size, start; - unsigned int order; - int i; - - size = mm->size; - for (i = 0; i < mm->n_roots; ++i) { - order = ilog2(size) - ilog2(mm->chunk_size); - start = drm_buddy_block_offset(mm->roots[i]); - __force_merge(mm, start, start + size, order); - - root_size = mm->chunk_size << order; - size -= root_size; - } - - src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE; - dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; - - for (i = 0; i <= mm->max_order; ++i) { - struct rb_root *root = &mm->free_trees[src_tree][i]; - struct drm_buddy_block *block, *tmp; - - rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { - rbtree_remove(mm, block); - if (is_clear) { - mark_cleared(block); - mm->clear_avail += drm_buddy_block_size(mm, block); - } else { - clear_reset(block); - mm->clear_avail -= drm_buddy_block_size(mm, block); - } - - rbtree_insert(mm, block, dst_tree); - } - } -} -EXPORT_SYMBOL(drm_buddy_reset_clear); - -/** - * drm_buddy_free_block - free a block - * - * @mm: DRM buddy manager - * @block: block to be freed - */ -void drm_buddy_free_block(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - BUG_ON(!drm_buddy_block_is_allocated(block)); - mm->avail += drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail += drm_buddy_block_size(mm, block); - - __drm_buddy_free(mm, block, false); -} -EXPORT_SYMBOL(drm_buddy_free_block); - -static void __drm_buddy_free_list(struct drm_buddy *mm, - struct list_head *objects, - bool mark_clear, - bool mark_dirty) -{ - struct drm_buddy_block *block, *on; - - WARN_ON(mark_dirty && mark_clear); - - list_for_each_entry_safe(block, on, objects, link) { - if (mark_clear) - mark_cleared(block); - else if (mark_dirty) - clear_reset(block); - drm_buddy_free_block(mm, block); - cond_resched(); - } - INIT_LIST_HEAD(objects); -} - -static void drm_buddy_free_list_internal(struct drm_buddy *mm, - struct list_head *objects) -{ - /* - * Don't touch the clear/dirty bit, since allocation is still internal - * at this point. For example we might have just failed part of the - * allocation. - */ - __drm_buddy_free_list(mm, objects, false, false); -} - -/** - * drm_buddy_free_list - free blocks - * - * @mm: DRM buddy manager - * @objects: input list head to free blocks - * @flags: optional flags like DRM_BUDDY_CLEARED - */ -void drm_buddy_free_list(struct drm_buddy *mm, - struct list_head *objects, - unsigned int flags) -{ - bool mark_clear = flags & DRM_BUDDY_CLEARED; - - __drm_buddy_free_list(mm, objects, mark_clear, !mark_clear); -} -EXPORT_SYMBOL(drm_buddy_free_list); - -static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags) -{ - bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION; - - return needs_clear != drm_buddy_block_is_clear(block); -} - -static struct drm_buddy_block * -__alloc_range_bias(struct drm_buddy *mm, - u64 start, u64 end, - unsigned int order, - unsigned long flags, - bool fallback) -{ - u64 req_size = mm->chunk_size << order; - struct drm_buddy_block *block; - struct drm_buddy_block *buddy; - LIST_HEAD(dfs); - int err; - int i; - - end = end - 1; - - for (i = 0; i < mm->n_roots; ++i) - list_add_tail(&mm->roots[i]->tmp_link, &dfs); - - do { - u64 block_start; - u64 block_end; - - block = list_first_entry_or_null(&dfs, - struct drm_buddy_block, - tmp_link); - if (!block) - break; - - list_del(&block->tmp_link); - - if (drm_buddy_block_order(block) < order) - continue; - - block_start = drm_buddy_block_offset(block); - block_end = block_start + drm_buddy_block_size(mm, block) - 1; - - if (!overlaps(start, end, block_start, block_end)) - continue; - - if (drm_buddy_block_is_allocated(block)) - continue; - - if (block_start < start || block_end > end) { - u64 adjusted_start = max(block_start, start); - u64 adjusted_end = min(block_end, end); - - if (round_down(adjusted_end + 1, req_size) <= - round_up(adjusted_start, req_size)) - continue; - } - - if (!fallback && block_incompatible(block, flags)) - continue; - - if (contains(start, end, block_start, block_end) && - order == drm_buddy_block_order(block)) { - /* - * Find the free block within the range. - */ - if (drm_buddy_block_is_free(block)) - return block; - - continue; - } - - if (!drm_buddy_block_is_split(block)) { - err = split_block(mm, block); - if (unlikely(err)) - goto err_undo; - } - - list_add(&block->right->tmp_link, &dfs); - list_add(&block->left->tmp_link, &dfs); - } while (1); - - return ERR_PTR(-ENOSPC); - -err_undo: - /* - * We really don't want to leave around a bunch of split blocks, since - * bigger is better, so make sure we merge everything back before we - * free the allocated blocks. - */ - buddy = __get_buddy(block); - if (buddy && - (drm_buddy_block_is_free(block) && - drm_buddy_block_is_free(buddy))) - __drm_buddy_free(mm, block, false); - return ERR_PTR(err); -} - -static struct drm_buddy_block * -__drm_buddy_alloc_range_bias(struct drm_buddy *mm, - u64 start, u64 end, - unsigned int order, - unsigned long flags) -{ - struct drm_buddy_block *block; - bool fallback = false; - - block = __alloc_range_bias(mm, start, end, order, - flags, fallback); - if (IS_ERR(block)) - return __alloc_range_bias(mm, start, end, order, - flags, !fallback); - - return block; -} - -static struct drm_buddy_block * -get_maxblock(struct drm_buddy *mm, - unsigned int order, - enum drm_buddy_free_tree tree) -{ - struct drm_buddy_block *max_block = NULL, *block = NULL; - struct rb_root *root; - unsigned int i; - - for (i = order; i <= mm->max_order; ++i) { - root = &mm->free_trees[tree][i]; - block = rbtree_last_free_block(root); - if (!block) - continue; - - if (!max_block) { - max_block = block; - continue; - } - - if (drm_buddy_block_offset(block) > - drm_buddy_block_offset(max_block)) { - max_block = block; - } - } - - return max_block; -} - -static struct drm_buddy_block * -alloc_from_freetree(struct drm_buddy *mm, - unsigned int order, - unsigned long flags) -{ - struct drm_buddy_block *block = NULL; - struct rb_root *root; - enum drm_buddy_free_tree tree; - unsigned int tmp; - int err; - - tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ? - DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; - - if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { - block = get_maxblock(mm, order, tree); - if (block) - /* Store the obtained block order */ - tmp = drm_buddy_block_order(block); - } else { - for (tmp = order; tmp <= mm->max_order; ++tmp) { - /* Get RB tree root for this order and tree */ - root = &mm->free_trees[tree][tmp]; - block = rbtree_last_free_block(root); - if (block) - break; - } - } - - if (!block) { - /* Try allocating from the other tree */ - tree = (tree == DRM_BUDDY_CLEAR_TREE) ? - DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE; - - for (tmp = order; tmp <= mm->max_order; ++tmp) { - root = &mm->free_trees[tree][tmp]; - block = rbtree_last_free_block(root); - if (block) - break; - } - - if (!block) - return ERR_PTR(-ENOSPC); - } - - BUG_ON(!drm_buddy_block_is_free(block)); - - while (tmp != order) { - err = split_block(mm, block); - if (unlikely(err)) - goto err_undo; - - block = block->right; - tmp--; - } - return block; - -err_undo: - if (tmp != order) - __drm_buddy_free(mm, block, false); - return ERR_PTR(err); -} - -static int __alloc_range(struct drm_buddy *mm, - struct list_head *dfs, - u64 start, u64 size, - struct list_head *blocks, - u64 *total_allocated_on_err) -{ - struct drm_buddy_block *block; - struct drm_buddy_block *buddy; - u64 total_allocated = 0; - LIST_HEAD(allocated); - u64 end; - int err; - - end = start + size - 1; - - do { - u64 block_start; - u64 block_end; - - block = list_first_entry_or_null(dfs, - struct drm_buddy_block, - tmp_link); - if (!block) - break; - - list_del(&block->tmp_link); - - block_start = drm_buddy_block_offset(block); - block_end = block_start + drm_buddy_block_size(mm, block) - 1; - - if (!overlaps(start, end, block_start, block_end)) - continue; - - if (drm_buddy_block_is_allocated(block)) { - err = -ENOSPC; - goto err_free; - } - - if (contains(start, end, block_start, block_end)) { - if (drm_buddy_block_is_free(block)) { - mark_allocated(mm, block); - total_allocated += drm_buddy_block_size(mm, block); - mm->avail -= drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail -= drm_buddy_block_size(mm, block); - list_add_tail(&block->link, &allocated); - continue; - } else if (!mm->clear_avail) { - err = -ENOSPC; - goto err_free; - } - } - - if (!drm_buddy_block_is_split(block)) { - err = split_block(mm, block); - if (unlikely(err)) - goto err_undo; - } - - list_add(&block->right->tmp_link, dfs); - list_add(&block->left->tmp_link, dfs); - } while (1); - - if (total_allocated < size) { - err = -ENOSPC; - goto err_free; - } - - list_splice_tail(&allocated, blocks); - - return 0; - -err_undo: - /* - * We really don't want to leave around a bunch of split blocks, since - * bigger is better, so make sure we merge everything back before we - * free the allocated blocks. - */ - buddy = __get_buddy(block); - if (buddy && - (drm_buddy_block_is_free(block) && - drm_buddy_block_is_free(buddy))) - __drm_buddy_free(mm, block, false); - -err_free: - if (err == -ENOSPC && total_allocated_on_err) { - list_splice_tail(&allocated, blocks); - *total_allocated_on_err = total_allocated; - } else { - drm_buddy_free_list_internal(mm, &allocated); - } - - return err; -} - -static int __drm_buddy_alloc_range(struct drm_buddy *mm, - u64 start, - u64 size, - u64 *total_allocated_on_err, - struct list_head *blocks) -{ - LIST_HEAD(dfs); - int i; - - for (i = 0; i < mm->n_roots; ++i) - list_add_tail(&mm->roots[i]->tmp_link, &dfs); - - return __alloc_range(mm, &dfs, start, size, - blocks, total_allocated_on_err); -} - -static int __alloc_contig_try_harder(struct drm_buddy *mm, - u64 size, - u64 min_block_size, - struct list_head *blocks) -{ - u64 rhs_offset, lhs_offset, lhs_size, filled; - struct drm_buddy_block *block; - unsigned int tree, order; - LIST_HEAD(blocks_lhs); - unsigned long pages; - u64 modify_size; - int err; - - modify_size = rounddown_pow_of_two(size); - pages = modify_size >> ilog2(mm->chunk_size); - order = fls(pages) - 1; - if (order == 0) - return -ENOSPC; - - for_each_free_tree(tree) { - struct rb_root *root; - struct rb_node *iter; - - root = &mm->free_trees[tree][order]; - if (rbtree_is_empty(root)) - continue; - - iter = rb_last(root); - while (iter) { - block = rbtree_get_free_block(iter); - - /* Allocate blocks traversing RHS */ - rhs_offset = drm_buddy_block_offset(block); - err = __drm_buddy_alloc_range(mm, rhs_offset, size, - &filled, blocks); - if (!err || err != -ENOSPC) - return err; - - lhs_size = max((size - filled), min_block_size); - if (!IS_ALIGNED(lhs_size, min_block_size)) - lhs_size = round_up(lhs_size, min_block_size); - - /* Allocate blocks traversing LHS */ - lhs_offset = drm_buddy_block_offset(block) - lhs_size; - err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size, - NULL, &blocks_lhs); - if (!err) { - list_splice(&blocks_lhs, blocks); - return 0; - } else if (err != -ENOSPC) { - drm_buddy_free_list_internal(mm, blocks); - return err; - } - /* Free blocks for the next iteration */ - drm_buddy_free_list_internal(mm, blocks); - - iter = rb_prev(iter); - } - } - - return -ENOSPC; -} - -/** - * drm_buddy_block_trim - free unused pages - * - * @mm: DRM buddy manager - * @start: start address to begin the trimming. - * @new_size: original size requested - * @blocks: Input and output list of allocated blocks. - * MUST contain single block as input to be trimmed. - * On success will contain the newly allocated blocks - * making up the @new_size. Blocks always appear in - * ascending order - * - * For contiguous allocation, we round up the size to the nearest - * power of two value, drivers consume *actual* size, so remaining - * portions are unused and can be optionally freed with this function - * - * Returns: - * 0 on success, error code on failure. - */ -int drm_buddy_block_trim(struct drm_buddy *mm, - u64 *start, - u64 new_size, - struct list_head *blocks) -{ - struct drm_buddy_block *parent; - struct drm_buddy_block *block; - u64 block_start, block_end; - LIST_HEAD(dfs); - u64 new_start; - int err; - - if (!list_is_singular(blocks)) - return -EINVAL; - - block = list_first_entry(blocks, - struct drm_buddy_block, - link); - - block_start = drm_buddy_block_offset(block); - block_end = block_start + drm_buddy_block_size(mm, block); - - if (WARN_ON(!drm_buddy_block_is_allocated(block))) - return -EINVAL; - - if (new_size > drm_buddy_block_size(mm, block)) - return -EINVAL; - - if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size)) - return -EINVAL; - - if (new_size == drm_buddy_block_size(mm, block)) - return 0; - - new_start = block_start; - if (start) { - new_start = *start; - - if (new_start < block_start) - return -EINVAL; - - if (!IS_ALIGNED(new_start, mm->chunk_size)) - return -EINVAL; - - if (range_overflows(new_start, new_size, block_end)) - return -EINVAL; - } - - list_del(&block->link); - mark_free(mm, block); - mm->avail += drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail += drm_buddy_block_size(mm, block); - - /* Prevent recursively freeing this node */ - parent = block->parent; - block->parent = NULL; - - list_add(&block->tmp_link, &dfs); - err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); - if (err) { - mark_allocated(mm, block); - mm->avail -= drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail -= drm_buddy_block_size(mm, block); - list_add(&block->link, blocks); - } - - block->parent = parent; - return err; -} -EXPORT_SYMBOL(drm_buddy_block_trim); - -static struct drm_buddy_block * -__drm_buddy_alloc_blocks(struct drm_buddy *mm, - u64 start, u64 end, - unsigned int order, - unsigned long flags) -{ - if (flags & DRM_BUDDY_RANGE_ALLOCATION) - /* Allocate traversing within the range */ - return __drm_buddy_alloc_range_bias(mm, start, end, - order, flags); - else - /* Allocate from freetree */ - return alloc_from_freetree(mm, order, flags); -} - -/** - * drm_buddy_alloc_blocks - allocate power-of-two blocks - * - * @mm: DRM buddy manager to allocate from - * @start: start of the allowed range for this block - * @end: end of the allowed range for this block - * @size: size of the allocation in bytes - * @min_block_size: alignment of the allocation - * @blocks: output list head to add allocated blocks - * @flags: DRM_BUDDY_*_ALLOCATION flags - * - * alloc_range_bias() called on range limitations, which traverses - * the tree and returns the desired block. - * - * alloc_from_freetree() called when *no* range restrictions - * are enforced, which picks the block from the freetree. - * - * Returns: - * 0 on success, error code on failure. - */ -int drm_buddy_alloc_blocks(struct drm_buddy *mm, - u64 start, u64 end, u64 size, - u64 min_block_size, - struct list_head *blocks, - unsigned long flags) -{ - struct drm_buddy_block *block = NULL; - u64 original_size, original_min_size; - unsigned int min_order, order; - LIST_HEAD(allocated); - unsigned long pages; - int err; - - if (size < mm->chunk_size) - return -EINVAL; - - if (min_block_size < mm->chunk_size) - return -EINVAL; - - if (!is_power_of_2(min_block_size)) - return -EINVAL; - - if (!IS_ALIGNED(start | end | size, mm->chunk_size)) - return -EINVAL; - - if (end > mm->size) - return -EINVAL; - - if (range_overflows(start, size, mm->size)) - return -EINVAL; - - /* Actual range allocation */ - if (start + size == end) { - if (!IS_ALIGNED(start | end, min_block_size)) - return -EINVAL; - - return __drm_buddy_alloc_range(mm, start, size, NULL, blocks); - } - - original_size = size; - original_min_size = min_block_size; - - /* Roundup the size to power of 2 */ - if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) { - size = roundup_pow_of_two(size); - min_block_size = size; - /* Align size value to min_block_size */ - } else if (!IS_ALIGNED(size, min_block_size)) { - size = round_up(size, min_block_size); - } - - pages = size >> ilog2(mm->chunk_size); - order = fls(pages) - 1; - min_order = ilog2(min_block_size) - ilog2(mm->chunk_size); - - if (order > mm->max_order || size > mm->size) { - if ((flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) && - !(flags & DRM_BUDDY_RANGE_ALLOCATION)) - return __alloc_contig_try_harder(mm, original_size, - original_min_size, blocks); - - return -EINVAL; - } - - do { - order = min(order, (unsigned int)fls(pages) - 1); - BUG_ON(order > mm->max_order); - BUG_ON(order < min_order); - - do { - block = __drm_buddy_alloc_blocks(mm, start, - end, - order, - flags); - if (!IS_ERR(block)) - break; - - if (order-- == min_order) { - /* Try allocation through force merge method */ - if (mm->clear_avail && - !__force_merge(mm, start, end, min_order)) { - block = __drm_buddy_alloc_blocks(mm, start, - end, - min_order, - flags); - if (!IS_ERR(block)) { - order = min_order; - break; - } - } - - /* - * Try contiguous block allocation through - * try harder method. - */ - if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION && - !(flags & DRM_BUDDY_RANGE_ALLOCATION)) - return __alloc_contig_try_harder(mm, - original_size, - original_min_size, - blocks); - err = -ENOSPC; - goto err_free; - } - } while (1); - - mark_allocated(mm, block); - mm->avail -= drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail -= drm_buddy_block_size(mm, block); - kmemleak_update_trace(block); - list_add_tail(&block->link, &allocated); - - pages -= BIT(order); - - if (!pages) - break; - } while (1); - - /* Trim the allocated block to the required size */ - if (!(flags & DRM_BUDDY_TRIM_DISABLE) && - original_size != size) { - struct list_head *trim_list; - LIST_HEAD(temp); - u64 trim_size; - - trim_list = &allocated; - trim_size = original_size; - - if (!list_is_singular(&allocated)) { - block = list_last_entry(&allocated, typeof(*block), link); - list_move(&block->link, &temp); - trim_list = &temp; - trim_size = drm_buddy_block_size(mm, block) - - (size - original_size); - } - - drm_buddy_block_trim(mm, - NULL, - trim_size, - trim_list); - - if (!list_empty(&temp)) - list_splice_tail(trim_list, &allocated); - } - - list_splice_tail(&allocated, blocks); - return 0; - -err_free: - drm_buddy_free_list_internal(mm, &allocated); - return err; -} -EXPORT_SYMBOL(drm_buddy_alloc_blocks); - -/** - * drm_buddy_block_print - print block information - * - * @mm: DRM buddy manager - * @block: DRM buddy block - * @p: DRM printer to use - */ -void drm_buddy_block_print(struct drm_buddy *mm, - struct drm_buddy_block *block, - struct drm_printer *p) -{ - u64 start = drm_buddy_block_offset(block); - u64 size = drm_buddy_block_size(mm, block); - - drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size); -} -EXPORT_SYMBOL(drm_buddy_block_print); - -/** - * drm_buddy_print - print allocator state - * - * @mm: DRM buddy manager - * @p: DRM printer to use - */ -void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p) -{ - int order; - - drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", - mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); - - for (order = mm->max_order; order >= 0; order--) { - struct drm_buddy_block *block, *tmp; - struct rb_root *root; - u64 count = 0, free; - unsigned int tree; - - for_each_free_tree(tree) { - root = &mm->free_trees[tree][order]; - - rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { - BUG_ON(!drm_buddy_block_is_free(block)); - count++; - } - } - - drm_printf(p, "order-%2d ", order); - - free = count * (mm->chunk_size << order); - if (free < SZ_1M) - drm_printf(p, "free: %8llu KiB", free >> 10); - else - drm_printf(p, "free: %8llu MiB", free >> 20); - - drm_printf(p, ", blocks: %llu\n", count); - } -} -EXPORT_SYMBOL(drm_buddy_print); - -static void drm_buddy_module_exit(void) -{ - kmem_cache_destroy(slab_blocks); -} - -static int __init drm_buddy_module_init(void) -{ - slab_blocks = KMEM_CACHE(drm_buddy_block, 0); - if (!slab_blocks) - return -ENOMEM; - - return 0; -} - -module_init(drm_buddy_module_init); -module_exit(drm_buddy_module_exit); - -MODULE_DESCRIPTION("DRM Buddy Allocator"); -MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index f65fe86c02b5..eeda5daa544f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -5,7 +5,7 @@ #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 4d830740946d..30246f02bcfe 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -7,7 +7,7 @@ #include "i915_scatterlist.h" #include "i915_ttm_buddy_manager.h" -#include +#include #include #include diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index d5c6e6605086..6b256d95badd 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -5,7 +5,7 @@ #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/lib/drm_random.c b/drivers/gpu/drm/lib/drm_random.c deleted file mode 100644 index 0e9dba1ef4af..000000000000 --- a/drivers/gpu/drm/lib/drm_random.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include - -#include "drm_random.h" - -u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) -{ - return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); -} -EXPORT_SYMBOL(drm_prandom_u32_max_state); - -void drm_random_reorder(unsigned int *order, unsigned int count, - struct rnd_state *state) -{ - unsigned int i, j; - - for (i = 0; i < count; ++i) { - BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); - j = drm_prandom_u32_max_state(count, state); - swap(order[i], order[j]); - } -} -EXPORT_SYMBOL(drm_random_reorder); - -unsigned int *drm_random_order(unsigned int count, struct rnd_state *state) -{ - unsigned int *order, i; - - order = kmalloc_array(count, sizeof(*order), GFP_KERNEL); - if (!order) - return order; - - for (i = 0; i < count; i++) - order[i] = i; - - drm_random_reorder(order, count, state); - return order; -} -EXPORT_SYMBOL(drm_random_order); diff --git a/drivers/gpu/drm/lib/drm_random.h b/drivers/gpu/drm/lib/drm_random.h deleted file mode 100644 index 9f827260a89d..000000000000 --- a/drivers/gpu/drm/lib/drm_random.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __DRM_RANDOM_H__ -#define __DRM_RANDOM_H__ - -/* This is a temporary home for a couple of utility functions that should - * be transposed to lib/ at the earliest convenience. - */ - -#include - -#define DRM_RND_STATE_INITIALIZER(seed__) ({ \ - struct rnd_state state__; \ - prandom_seed_state(&state__, (seed__)); \ - state__; \ -}) - -#define DRM_RND_STATE(name__, seed__) \ - struct rnd_state name__ = DRM_RND_STATE_INITIALIZER(seed__) - -unsigned int *drm_random_order(unsigned int count, - struct rnd_state *state); -void drm_random_reorder(unsigned int *order, - unsigned int count, - struct rnd_state *state); -u32 drm_prandom_u32_max_state(u32 ep_ro, - struct rnd_state *state); - -#endif /* !__DRM_RANDOM_H__ */ diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile index 87d5d5f9332a..d2e2e3d8349a 100644 --- a/drivers/gpu/drm/tests/Makefile +++ b/drivers/gpu/drm/tests/Makefile @@ -7,7 +7,6 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \ drm_atomic_test.o \ drm_atomic_state_test.o \ drm_bridge_test.o \ - drm_buddy_test.o \ drm_cmdline_parser_test.o \ drm_connector_test.o \ drm_damage_helper_test.o \ diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c deleted file mode 100644 index e6f8459c6c54..000000000000 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ /dev/null @@ -1,928 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - * Copyright © 2022 Maíra Canal - */ - -#include - -#include -#include -#include - -#include - -#include "../lib/drm_random.h" - -static unsigned int random_seed; - -static inline u64 get_size(int order, u64 chunk_size) -{ - return (1 << order) * chunk_size; -} - -static void drm_test_buddy_fragmentation_performance(struct kunit *test) -{ - struct drm_buddy_block *block, *tmp; - int num_blocks, i, ret, count = 0; - LIST_HEAD(allocated_blocks); - unsigned long elapsed_ms; - LIST_HEAD(reverse_list); - LIST_HEAD(test_blocks); - LIST_HEAD(clear_list); - LIST_HEAD(dirty_list); - LIST_HEAD(free_list); - struct drm_buddy mm; - u64 mm_size = SZ_4G; - ktime_t start, end; - - /* - * Allocation under severe fragmentation - * - * Create severe fragmentation by allocating the entire 4 GiB address space - * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern - * leaves many scattered holes. Split the allocations into two groups and - * return them with different flags to block coalescing, then repeatedly - * allocate and free 64 KiB blocks while timing the loop. This stresses how - * quickly the allocator can satisfy larger, aligned requests from a pool of - * highly fragmented space. - */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), - "buddy_init failed\n"); - - num_blocks = mm_size / SZ_64K; - - start = ktime_get(); - /* Allocate with maximum fragmentation - 8K blocks with 64K alignment */ - for (i = 0; i < num_blocks; i++) - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, - &allocated_blocks, 0), - "buddy_alloc hit an error size=%u\n", SZ_8K); - - list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { - if (count % 4 == 0 || count % 4 == 3) - list_move_tail(&block->link, &clear_list); - else - list_move_tail(&block->link, &dirty_list); - count++; - } - - /* Free with different flags to ensure no coalescing */ - drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED); - drm_buddy_free_list(&mm, &dirty_list, 0); - - for (i = 0; i < num_blocks; i++) - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K, - &test_blocks, 0), - "buddy_alloc hit an error size=%u\n", SZ_64K); - drm_buddy_free_list(&mm, &test_blocks, 0); - - end = ktime_get(); - elapsed_ms = ktime_to_ms(ktime_sub(end, start)); - - kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms); - - drm_buddy_fini(&mm); - - /* - * Reverse free order under fragmentation - * - * Construct a fragmented 4 GiB space by allocating every 8 KiB block with - * 64 KiB alignment, creating a dense scatter of small regions. Half of the - * blocks are selectively freed to form sparse gaps, while the remaining - * allocations are preserved, reordered in reverse, and released back with - * the cleared flag. This models a pathological reverse-ordered free pattern - * and measures how quickly the allocator can merge and reclaim space when - * deallocation occurs in the opposite order of allocation, exposing the - * cost difference between a linear freelist scan and an ordered tree lookup. - */ - ret = drm_buddy_init(&mm, mm_size, SZ_4K); - KUNIT_ASSERT_EQ(test, ret, 0); - - start = ktime_get(); - /* Allocate maximum fragmentation */ - for (i = 0; i < num_blocks; i++) - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, - &allocated_blocks, 0), - "buddy_alloc hit an error size=%u\n", SZ_8K); - - list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { - if (count % 2 == 0) - list_move_tail(&block->link, &free_list); - count++; - } - drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED); - - list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link) - list_move(&block->link, &reverse_list); - drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED); - - end = ktime_get(); - elapsed_ms = ktime_to_ms(ktime_sub(end, start)); - - kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms); - - drm_buddy_fini(&mm); -} - -static void drm_test_buddy_alloc_range_bias(struct kunit *test) -{ - u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem; - DRM_RND_STATE(prng, random_seed); - unsigned int i, count, *order; - struct drm_buddy_block *block; - unsigned long flags; - struct drm_buddy mm; - LIST_HEAD(allocated); - - bias_size = SZ_1M; - ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); - ps = max(SZ_4K, ps); - mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ - - kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), - "buddy_init failed\n"); - - count = mm_size / bias_size; - order = drm_random_order(count, &prng); - KUNIT_EXPECT_TRUE(test, order); - - /* - * Idea is to split the address space into uniform bias ranges, and then - * in some random order allocate within each bias, using various - * patterns within. This should detect if allocations leak out from a - * given bias, for example. - */ - - for (i = 0; i < count; i++) { - LIST_HEAD(tmp); - u32 size; - - bias_start = order[i] * bias_size; - bias_end = bias_start + bias_size; - bias_rem = bias_size; - - /* internal round_up too big */ - KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, bias_size + ps, bias_size, - &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, bias_size, bias_size); - - /* size too big */ - KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, bias_size + ps, ps, - &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, bias_size + ps, ps); - - /* bias range too small for size */ - KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start + ps, - bias_end, bias_size, ps, - &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", - bias_start + ps, bias_end, bias_size, ps); - - /* bias misaligned */ - KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start + ps, - bias_end - ps, - bias_size >> 1, bias_size >> 1, - &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", - bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); - - /* single big page */ - KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, bias_size, bias_size, - &tmp, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, bias_size, bias_size); - drm_buddy_free_list(&mm, &tmp, 0); - - /* single page with internal round_up */ - KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, ps, bias_size, - &tmp, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, ps, bias_size); - drm_buddy_free_list(&mm, &tmp, 0); - - /* random size within */ - size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); - if (size) - KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, size, ps, - &tmp, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, size, ps); - - bias_rem -= size; - /* too big for current avail */ - KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, bias_rem + ps, ps, - &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, bias_rem + ps, ps); - - if (bias_rem) { - /* random fill of the remainder */ - size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); - size = max(size, ps); - - KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, size, ps, - &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, size, ps); - /* - * Intentionally allow some space to be left - * unallocated, and ideally not always on the bias - * boundaries. - */ - drm_buddy_free_list(&mm, &tmp, 0); - } else { - list_splice_tail(&tmp, &allocated); - } - } - - kfree(order); - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); - - /* - * Something more free-form. Idea is to pick a random starting bias - * range within the address space and then start filling it up. Also - * randomly grow the bias range in both directions as we go along. This - * should give us bias start/end which is not always uniform like above, - * and in some cases will require the allocator to jump over already - * allocated nodes in the middle of the address space. - */ - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), - "buddy_init failed\n"); - - bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); - bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); - bias_end = max(bias_end, bias_start + ps); - bias_rem = bias_end - bias_start; - - do { - u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); - - KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, size, ps, - &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, size, ps); - bias_rem -= size; - - /* - * Try to randomly grow the bias range in both directions, or - * only one, or perhaps don't grow at all. - */ - do { - u32 old_bias_start = bias_start; - u32 old_bias_end = bias_end; - - if (bias_start) - bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); - if (bias_end != mm_size) - bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); - - bias_rem += old_bias_start - bias_start; - bias_rem += bias_end - old_bias_end; - } while (!bias_rem && (bias_start || bias_end != mm_size)); - } while (bias_rem); - - KUNIT_ASSERT_EQ(test, bias_start, 0); - KUNIT_ASSERT_EQ(test, bias_end, mm_size); - KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, bias_end, - ps, ps, - &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc passed with bias(%x-%x), size=%u\n", - bias_start, bias_end, ps); - - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); - - /* - * Allocate cleared blocks in the bias range when the DRM buddy's clear avail is - * zero. This will validate the bias range allocation in scenarios like system boot - * when no cleared blocks are available and exercise the fallback path too. The resulting - * blocks should always be dirty. - */ - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), - "buddy_init failed\n"); - - bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); - bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); - bias_end = max(bias_end, bias_start + ps); - bias_rem = bias_end - bias_start; - - flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION; - size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); - - KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, - bias_end, size, ps, - &allocated, - flags), - "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", - bias_start, bias_end, size, ps); - - list_for_each_entry(block, &allocated, link) - KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); - - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); -} - -static void drm_test_buddy_alloc_clear(struct kunit *test) -{ - unsigned long n_pages, total, i = 0; - const unsigned long ps = SZ_4K; - struct drm_buddy_block *block; - const int max_order = 12; - LIST_HEAD(allocated); - struct drm_buddy mm; - unsigned int order; - u32 mm_size, size; - LIST_HEAD(dirty); - LIST_HEAD(clean); - - mm_size = SZ_4K << max_order; - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); - - KUNIT_EXPECT_EQ(test, mm.max_order, max_order); - - /* - * Idea is to allocate and free some random portion of the address space, - * returning those pages as non-dirty and randomly alternate between - * requesting dirty and non-dirty pages (not going over the limit - * we freed as non-dirty), putting that into two separate lists. - * Loop over both lists at the end checking that the dirty list - * is indeed all dirty pages and vice versa. Free it all again, - * keeping the dirty/clear status. - */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - 5 * ps, ps, &allocated, - DRM_BUDDY_TOPDOWN_ALLOCATION), - "buddy_alloc hit an error size=%lu\n", 5 * ps); - drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); - - n_pages = 10; - do { - unsigned long flags; - struct list_head *list; - int slot = i % 2; - - if (slot == 0) { - list = &dirty; - flags = 0; - } else { - list = &clean; - flags = DRM_BUDDY_CLEAR_ALLOCATION; - } - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - ps, ps, list, - flags), - "buddy_alloc hit an error size=%lu\n", ps); - } while (++i < n_pages); - - list_for_each_entry(block, &clean, link) - KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true); - - list_for_each_entry(block, &dirty, link) - KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); - - drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); - - /* - * Trying to go over the clear limit for some allocation. - * The allocation should never fail with reasonable page-size. - */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - 10 * ps, ps, &clean, - DRM_BUDDY_CLEAR_ALLOCATION), - "buddy_alloc hit an error size=%lu\n", 10 * ps); - - drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); - drm_buddy_free_list(&mm, &dirty, 0); - drm_buddy_fini(&mm); - - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); - - /* - * Create a new mm. Intentionally fragment the address space by creating - * two alternating lists. Free both lists, one as dirty the other as clean. - * Try to allocate double the previous size with matching min_page_size. The - * allocation should never fail as it calls the force_merge. Also check that - * the page is always dirty after force_merge. Free the page as dirty, then - * repeat the whole thing, increment the order until we hit the max_order. - */ - - i = 0; - n_pages = mm_size / ps; - do { - struct list_head *list; - int slot = i % 2; - - if (slot == 0) - list = &dirty; - else - list = &clean; - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - ps, ps, list, 0), - "buddy_alloc hit an error size=%lu\n", ps); - } while (++i < n_pages); - - drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); - drm_buddy_free_list(&mm, &dirty, 0); - - order = 1; - do { - size = SZ_4K << order; - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - size, size, &allocated, - DRM_BUDDY_CLEAR_ALLOCATION), - "buddy_alloc hit an error size=%u\n", size); - total = 0; - list_for_each_entry(block, &allocated, link) { - if (size != mm_size) - KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); - total += drm_buddy_block_size(&mm, block); - } - KUNIT_EXPECT_EQ(test, total, size); - - drm_buddy_free_list(&mm, &allocated, 0); - } while (++order <= max_order); - - drm_buddy_fini(&mm); - - /* - * Create a new mm with a non power-of-two size. Allocate a random size from each - * root, free as cleared and then call fini. This will ensure the multi-root - * force merge during fini. - */ - mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2)); - - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); - KUNIT_EXPECT_EQ(test, mm.max_order, max_order); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, - 4 * ps, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc hit an error size=%lu\n", 4 * ps); - drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, - 2 * ps, ps, &allocated, - DRM_BUDDY_CLEAR_ALLOCATION), - "buddy_alloc hit an error size=%lu\n", 2 * ps); - drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size, - ps, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), - "buddy_alloc hit an error size=%lu\n", ps); - drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); - drm_buddy_fini(&mm); -} - -static void drm_test_buddy_alloc_contiguous(struct kunit *test) -{ - const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; - unsigned long i, n_pages, total; - struct drm_buddy_block *block; - struct drm_buddy mm; - LIST_HEAD(left); - LIST_HEAD(middle); - LIST_HEAD(right); - LIST_HEAD(allocated); - - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); - - /* - * Idea is to fragment the address space by alternating block - * allocations between three different lists; one for left, middle and - * right. We can then free a list to simulate fragmentation. In - * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION, - * including the try_harder path. - */ - - i = 0; - n_pages = mm_size / ps; - do { - struct list_head *list; - int slot = i % 3; - - if (slot == 0) - list = &left; - else if (slot == 1) - list = &middle; - else - list = &right; - KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, 0, mm_size, - ps, ps, list, 0), - "buddy_alloc hit an error size=%lu\n", - ps); - } while (++i < n_pages); - - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - 3 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%lu\n", 3 * ps); - - drm_buddy_free_list(&mm, &middle, 0); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - 3 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%lu\n", 3 * ps); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - 2 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%lu\n", 2 * ps); - - drm_buddy_free_list(&mm, &right, 0); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - 3 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%lu\n", 3 * ps); - /* - * At this point we should have enough contiguous space for 2 blocks, - * however they are never buddies (since we freed middle and right) so - * will require the try_harder logic to find them. - */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - 2 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%lu\n", 2 * ps); - - drm_buddy_free_list(&mm, &left, 0); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, - 3 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%lu\n", 3 * ps); - - total = 0; - list_for_each_entry(block, &allocated, link) - total += drm_buddy_block_size(&mm, block); - - KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); - - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); -} - -static void drm_test_buddy_alloc_pathological(struct kunit *test) -{ - u64 mm_size, size, start = 0; - struct drm_buddy_block *block; - const int max_order = 3; - unsigned long flags = 0; - int order, top; - struct drm_buddy mm; - LIST_HEAD(blocks); - LIST_HEAD(holes); - LIST_HEAD(tmp); - - /* - * Create a pot-sized mm, then allocate one of each possible - * order within. This should leave the mm with exactly one - * page left. Free the largest block, then whittle down again. - * Eventually we will have a fully 50% fragmented mm. - */ - - mm_size = SZ_4K << max_order; - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), - "buddy_init failed\n"); - - KUNIT_EXPECT_EQ(test, mm.max_order, max_order); - - for (top = max_order; top; top--) { - /* Make room by freeing the largest allocated block */ - block = list_first_entry_or_null(&blocks, typeof(*block), link); - if (block) { - list_del(&block->link); - drm_buddy_free_block(&mm, block); - } - - for (order = top; order--;) { - size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, - mm_size, size, size, - &tmp, flags), - "buddy_alloc hit -ENOMEM with order=%d, top=%d\n", - order, top); - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); - - list_move_tail(&block->link, &blocks); - } - - /* There should be one final page for this sub-allocation */ - size = get_size(0, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc hit -ENOMEM for hole\n"); - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); - - list_move_tail(&block->link, &holes); - - size = get_size(top, mm.chunk_size); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", - top, max_order); - } - - drm_buddy_free_list(&mm, &holes, 0); - - /* Nothing larger than blocks of chunk_size now available */ - for (order = 1; order <= max_order; order++) { - size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc unexpectedly succeeded at order %d, it should be full!", - order); - } - - list_splice_tail(&holes, &blocks); - drm_buddy_free_list(&mm, &blocks, 0); - drm_buddy_fini(&mm); -} - -static void drm_test_buddy_alloc_pessimistic(struct kunit *test) -{ - u64 mm_size, size, start = 0; - struct drm_buddy_block *block, *bn; - const unsigned int max_order = 16; - unsigned long flags = 0; - struct drm_buddy mm; - unsigned int order; - LIST_HEAD(blocks); - LIST_HEAD(tmp); - - /* - * Create a pot-sized mm, then allocate one of each possible - * order within. This should leave the mm with exactly one - * page left. - */ - - mm_size = SZ_4K << max_order; - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), - "buddy_init failed\n"); - - KUNIT_EXPECT_EQ(test, mm.max_order, max_order); - - for (order = 0; order < max_order; order++) { - size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc hit -ENOMEM with order=%d\n", - order); - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); - - list_move_tail(&block->link, &blocks); - } - - /* And now the last remaining block available */ - size = get_size(0, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc hit -ENOMEM on final alloc\n"); - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); - - list_move_tail(&block->link, &blocks); - - /* Should be completely full! */ - for (order = max_order; order--;) { - size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc unexpectedly succeeded, it should be full!"); - } - - block = list_last_entry(&blocks, typeof(*block), link); - list_del(&block->link); - drm_buddy_free_block(&mm, block); - - /* As we free in increasing size, we make available larger blocks */ - order = 1; - list_for_each_entry_safe(block, bn, &blocks, link) { - list_del(&block->link); - drm_buddy_free_block(&mm, block); - - size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc hit -ENOMEM with order=%d\n", - order); - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); - - list_del(&block->link); - drm_buddy_free_block(&mm, block); - order++; - } - - /* To confirm, now the whole mm should be available */ - size = get_size(max_order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc (realloc) hit -ENOMEM with order=%d\n", - max_order); - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); - - list_del(&block->link); - drm_buddy_free_block(&mm, block); - drm_buddy_free_list(&mm, &blocks, 0); - drm_buddy_fini(&mm); -} - -static void drm_test_buddy_alloc_optimistic(struct kunit *test) -{ - u64 mm_size, size, start = 0; - struct drm_buddy_block *block; - unsigned long flags = 0; - const int max_order = 16; - struct drm_buddy mm; - LIST_HEAD(blocks); - LIST_HEAD(tmp); - int order; - - /* - * Create a mm with one block of each order available, and - * try to allocate them all. - */ - - mm_size = SZ_4K * ((1 << (max_order + 1)) - 1); - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), - "buddy_init failed\n"); - - KUNIT_EXPECT_EQ(test, mm.max_order, max_order); - - for (order = 0; order <= max_order; order++) { - size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc hit -ENOMEM with order=%d\n", - order); - - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); - KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); - - list_move_tail(&block->link, &blocks); - } - - /* Should be completely full! */ - size = get_size(0, mm.chunk_size); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, - size, size, &tmp, flags), - "buddy_alloc unexpectedly succeeded, it should be full!"); - - drm_buddy_free_list(&mm, &blocks, 0); - drm_buddy_fini(&mm); -} - -static void drm_test_buddy_alloc_limit(struct kunit *test) -{ - u64 size = U64_MAX, start = 0; - struct drm_buddy_block *block; - unsigned long flags = 0; - LIST_HEAD(allocated); - struct drm_buddy mm; - - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, size, SZ_4K)); - - KUNIT_EXPECT_EQ_MSG(test, mm.max_order, DRM_BUDDY_MAX_ORDER, - "mm.max_order(%d) != %d\n", mm.max_order, - DRM_BUDDY_MAX_ORDER); - - size = mm.chunk_size << mm.max_order; - KUNIT_EXPECT_FALSE(test, drm_buddy_alloc_blocks(&mm, start, size, size, - mm.chunk_size, &allocated, flags)); - - block = list_first_entry_or_null(&allocated, struct drm_buddy_block, link); - KUNIT_EXPECT_TRUE(test, block); - - KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), mm.max_order, - "block order(%d) != %d\n", - drm_buddy_block_order(block), mm.max_order); - - KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_size(&mm, block), - BIT_ULL(mm.max_order) * mm.chunk_size, - "block size(%llu) != %llu\n", - drm_buddy_block_size(&mm, block), - BIT_ULL(mm.max_order) * mm.chunk_size); - - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); -} - -static void drm_test_buddy_alloc_exceeds_max_order(struct kunit *test) -{ - u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G; - struct drm_buddy mm; - LIST_HEAD(blocks); - int err; - - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), - "buddy_init failed\n"); - - /* CONTIGUOUS allocation should succeed via try_harder fallback */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, size, - SZ_4K, &blocks, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%llu\n", size); - drm_buddy_free_list(&mm, &blocks, 0); - - /* Non-CONTIGUOUS with large min_block_size should return -EINVAL */ - err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0); - KUNIT_EXPECT_EQ(test, err, -EINVAL); - - /* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */ - err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, - DRM_BUDDY_RANGE_ALLOCATION); - KUNIT_EXPECT_EQ(test, err, -EINVAL); - - /* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */ - err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks, - DRM_BUDDY_CONTIGUOUS_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION); - KUNIT_EXPECT_EQ(test, err, -EINVAL); - - drm_buddy_fini(&mm); -} - -static int drm_buddy_suite_init(struct kunit_suite *suite) -{ - while (!random_seed) - random_seed = get_random_u32(); - - kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", - random_seed); - - return 0; -} - -static struct kunit_case drm_buddy_tests[] = { - KUNIT_CASE(drm_test_buddy_alloc_limit), - KUNIT_CASE(drm_test_buddy_alloc_optimistic), - KUNIT_CASE(drm_test_buddy_alloc_pessimistic), - KUNIT_CASE(drm_test_buddy_alloc_pathological), - KUNIT_CASE(drm_test_buddy_alloc_contiguous), - KUNIT_CASE(drm_test_buddy_alloc_clear), - KUNIT_CASE(drm_test_buddy_alloc_range_bias), - KUNIT_CASE(drm_test_buddy_fragmentation_performance), - KUNIT_CASE(drm_test_buddy_alloc_exceeds_max_order), - {} -}; - -static struct kunit_suite drm_buddy_test_suite = { - .name = "drm_buddy", - .suite_init = drm_buddy_suite_init, - .test_cases = drm_buddy_tests, -}; - -kunit_test_suite(drm_buddy_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("Kunit test for drm_buddy functions"); -MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c index 3a20c788c51f..2fc47f3b463b 100644 --- a/drivers/gpu/drm/tests/drm_exec_test.c +++ b/drivers/gpu/drm/tests/drm_exec_test.c @@ -16,8 +16,6 @@ #include #include -#include "../lib/drm_random.h" - struct drm_exec_priv { struct device *dev; struct drm_device *drm; diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index aec9eccdeae9..e24a619059d8 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -16,8 +16,6 @@ #include #include -#include "../lib/drm_random.h" - enum { BEST, BOTTOMUP, diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h index e4c95f86a467..96ea8c9aae34 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h +++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h @@ -5,7 +5,7 @@ #ifndef TTM_MOCK_MANAGER_H #define TTM_MOCK_MANAGER_H -#include +#include struct ttm_mock_manager { struct ttm_resource_manager man; diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index a71e14818ec2..babeec5511d9 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -6,7 +6,7 @@ #ifndef _XE_TTM_VRAM_MGR_TYPES_H_ #define _XE_TTM_VRAM_MGR_TYPES_H_ -#include +#include #include /** diff --git a/drivers/gpu/tests/Makefile b/drivers/gpu/tests/Makefile new file mode 100644 index 000000000000..8e7654e87d82 --- /dev/null +++ b/drivers/gpu/tests/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +gpu_buddy_tests-y = gpu_buddy_test.o gpu_random.o +obj-$(CONFIG_DRM_KUNIT_TEST) += gpu_buddy_tests.o diff --git a/drivers/gpu/tests/gpu_buddy_test.c b/drivers/gpu/tests/gpu_buddy_test.c new file mode 100644 index 000000000000..b905932da990 --- /dev/null +++ b/drivers/gpu/tests/gpu_buddy_test.c @@ -0,0 +1,928 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + * Copyright © 2022 Maíra Canal + */ + +#include + +#include +#include +#include + +#include + +#include "gpu_random.h" + +static unsigned int random_seed; + +static inline u64 get_size(int order, u64 chunk_size) +{ + return (1 << order) * chunk_size; +} + +static void drm_test_buddy_fragmentation_performance(struct kunit *test) +{ + struct drm_buddy_block *block, *tmp; + int num_blocks, i, ret, count = 0; + LIST_HEAD(allocated_blocks); + unsigned long elapsed_ms; + LIST_HEAD(reverse_list); + LIST_HEAD(test_blocks); + LIST_HEAD(clear_list); + LIST_HEAD(dirty_list); + LIST_HEAD(free_list); + struct drm_buddy mm; + u64 mm_size = SZ_4G; + ktime_t start, end; + + /* + * Allocation under severe fragmentation + * + * Create severe fragmentation by allocating the entire 4 GiB address space + * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern + * leaves many scattered holes. Split the allocations into two groups and + * return them with different flags to block coalescing, then repeatedly + * allocate and free 64 KiB blocks while timing the loop. This stresses how + * quickly the allocator can satisfy larger, aligned requests from a pool of + * highly fragmented space. + */ + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + "buddy_init failed\n"); + + num_blocks = mm_size / SZ_64K; + + start = ktime_get(); + /* Allocate with maximum fragmentation - 8K blocks with 64K alignment */ + for (i = 0; i < num_blocks; i++) + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, + &allocated_blocks, 0), + "buddy_alloc hit an error size=%u\n", SZ_8K); + + list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { + if (count % 4 == 0 || count % 4 == 3) + list_move_tail(&block->link, &clear_list); + else + list_move_tail(&block->link, &dirty_list); + count++; + } + + /* Free with different flags to ensure no coalescing */ + drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED); + drm_buddy_free_list(&mm, &dirty_list, 0); + + for (i = 0; i < num_blocks; i++) + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K, + &test_blocks, 0), + "buddy_alloc hit an error size=%u\n", SZ_64K); + drm_buddy_free_list(&mm, &test_blocks, 0); + + end = ktime_get(); + elapsed_ms = ktime_to_ms(ktime_sub(end, start)); + + kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms); + + drm_buddy_fini(&mm); + + /* + * Reverse free order under fragmentation + * + * Construct a fragmented 4 GiB space by allocating every 8 KiB block with + * 64 KiB alignment, creating a dense scatter of small regions. Half of the + * blocks are selectively freed to form sparse gaps, while the remaining + * allocations are preserved, reordered in reverse, and released back with + * the cleared flag. This models a pathological reverse-ordered free pattern + * and measures how quickly the allocator can merge and reclaim space when + * deallocation occurs in the opposite order of allocation, exposing the + * cost difference between a linear freelist scan and an ordered tree lookup. + */ + ret = drm_buddy_init(&mm, mm_size, SZ_4K); + KUNIT_ASSERT_EQ(test, ret, 0); + + start = ktime_get(); + /* Allocate maximum fragmentation */ + for (i = 0; i < num_blocks; i++) + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, + &allocated_blocks, 0), + "buddy_alloc hit an error size=%u\n", SZ_8K); + + list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { + if (count % 2 == 0) + list_move_tail(&block->link, &free_list); + count++; + } + drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED); + + list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link) + list_move(&block->link, &reverse_list); + drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED); + + end = ktime_get(); + elapsed_ms = ktime_to_ms(ktime_sub(end, start)); + + kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms); + + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_range_bias(struct kunit *test) +{ + u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem; + DRM_RND_STATE(prng, random_seed); + unsigned int i, count, *order; + struct drm_buddy_block *block; + unsigned long flags; + struct drm_buddy mm; + LIST_HEAD(allocated); + + bias_size = SZ_1M; + ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); + ps = max(SZ_4K, ps); + mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ + + kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + count = mm_size / bias_size; + order = drm_random_order(count, &prng); + KUNIT_EXPECT_TRUE(test, order); + + /* + * Idea is to split the address space into uniform bias ranges, and then + * in some random order allocate within each bias, using various + * patterns within. This should detect if allocations leak out from a + * given bias, for example. + */ + + for (i = 0; i < count; i++) { + LIST_HEAD(tmp); + u32 size; + + bias_start = order[i] * bias_size; + bias_end = bias_start + bias_size; + bias_rem = bias_size; + + /* internal round_up too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, bias_size, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + + /* size too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size + ps, ps); + + /* bias range too small for size */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end, bias_size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end, bias_size, ps); + + /* bias misaligned */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end - ps, + bias_size >> 1, bias_size >> 1, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); + + /* single big page */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + drm_buddy_free_list(&mm, &tmp, 0); + + /* single page with internal round_up */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, ps, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, ps, bias_size); + drm_buddy_free_list(&mm, &tmp, 0); + + /* random size within */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + if (size) + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + + bias_rem -= size; + /* too big for current avail */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_rem + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_rem + ps, ps); + + if (bias_rem) { + /* random fill of the remainder */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + size = max(size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + /* + * Intentionally allow some space to be left + * unallocated, and ideally not always on the bias + * boundaries. + */ + drm_buddy_free_list(&mm, &tmp, 0); + } else { + list_splice_tail(&tmp, &allocated); + } + } + + kfree(order); + drm_buddy_free_list(&mm, &allocated, 0); + drm_buddy_fini(&mm); + + /* + * Something more free-form. Idea is to pick a random starting bias + * range within the address space and then start filling it up. Also + * randomly grow the bias range in both directions as we go along. This + * should give us bias start/end which is not always uniform like above, + * and in some cases will require the allocator to jump over already + * allocated nodes in the middle of the address space. + */ + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); + bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); + bias_end = max(bias_end, bias_start + ps); + bias_rem = bias_end - bias_start; + + do { + u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + bias_rem -= size; + + /* + * Try to randomly grow the bias range in both directions, or + * only one, or perhaps don't grow at all. + */ + do { + u32 old_bias_start = bias_start; + u32 old_bias_end = bias_end; + + if (bias_start) + bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); + if (bias_end != mm_size) + bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); + + bias_rem += old_bias_start - bias_start; + bias_rem += bias_end - old_bias_end; + } while (!bias_rem && (bias_start || bias_end != mm_size)); + } while (bias_rem); + + KUNIT_ASSERT_EQ(test, bias_start, 0); + KUNIT_ASSERT_EQ(test, bias_end, mm_size); + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, bias_end, + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc passed with bias(%x-%x), size=%u\n", + bias_start, bias_end, ps); + + drm_buddy_free_list(&mm, &allocated, 0); + drm_buddy_fini(&mm); + + /* + * Allocate cleared blocks in the bias range when the DRM buddy's clear avail is + * zero. This will validate the bias range allocation in scenarios like system boot + * when no cleared blocks are available and exercise the fallback path too. The resulting + * blocks should always be dirty. + */ + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); + bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); + bias_end = max(bias_end, bias_start + ps); + bias_rem = bias_end - bias_start; + + flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION; + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + flags), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + + list_for_each_entry(block, &allocated, link) + KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); + + drm_buddy_free_list(&mm, &allocated, 0); + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_clear(struct kunit *test) +{ + unsigned long n_pages, total, i = 0; + const unsigned long ps = SZ_4K; + struct drm_buddy_block *block; + const int max_order = 12; + LIST_HEAD(allocated); + struct drm_buddy mm; + unsigned int order; + u32 mm_size, size; + LIST_HEAD(dirty); + LIST_HEAD(clean); + + mm_size = SZ_4K << max_order; + KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + + KUNIT_EXPECT_EQ(test, mm.max_order, max_order); + + /* + * Idea is to allocate and free some random portion of the address space, + * returning those pages as non-dirty and randomly alternate between + * requesting dirty and non-dirty pages (not going over the limit + * we freed as non-dirty), putting that into two separate lists. + * Loop over both lists at the end checking that the dirty list + * is indeed all dirty pages and vice versa. Free it all again, + * keeping the dirty/clear status. + */ + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 5 * ps, ps, &allocated, + DRM_BUDDY_TOPDOWN_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", 5 * ps); + drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); + + n_pages = 10; + do { + unsigned long flags; + struct list_head *list; + int slot = i % 2; + + if (slot == 0) { + list = &dirty; + flags = 0; + } else { + list = &clean; + flags = DRM_BUDDY_CLEAR_ALLOCATION; + } + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + ps, ps, list, + flags), + "buddy_alloc hit an error size=%lu\n", ps); + } while (++i < n_pages); + + list_for_each_entry(block, &clean, link) + KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true); + + list_for_each_entry(block, &dirty, link) + KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); + + drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); + + /* + * Trying to go over the clear limit for some allocation. + * The allocation should never fail with reasonable page-size. + */ + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 10 * ps, ps, &clean, + DRM_BUDDY_CLEAR_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", 10 * ps); + + drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); + drm_buddy_free_list(&mm, &dirty, 0); + drm_buddy_fini(&mm); + + KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + + /* + * Create a new mm. Intentionally fragment the address space by creating + * two alternating lists. Free both lists, one as dirty the other as clean. + * Try to allocate double the previous size with matching min_page_size. The + * allocation should never fail as it calls the force_merge. Also check that + * the page is always dirty after force_merge. Free the page as dirty, then + * repeat the whole thing, increment the order until we hit the max_order. + */ + + i = 0; + n_pages = mm_size / ps; + do { + struct list_head *list; + int slot = i % 2; + + if (slot == 0) + list = &dirty; + else + list = &clean; + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + ps, ps, list, 0), + "buddy_alloc hit an error size=%lu\n", ps); + } while (++i < n_pages); + + drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); + drm_buddy_free_list(&mm, &dirty, 0); + + order = 1; + do { + size = SZ_4K << order; + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + size, size, &allocated, + DRM_BUDDY_CLEAR_ALLOCATION), + "buddy_alloc hit an error size=%u\n", size); + total = 0; + list_for_each_entry(block, &allocated, link) { + if (size != mm_size) + KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); + total += drm_buddy_block_size(&mm, block); + } + KUNIT_EXPECT_EQ(test, total, size); + + drm_buddy_free_list(&mm, &allocated, 0); + } while (++order <= max_order); + + drm_buddy_fini(&mm); + + /* + * Create a new mm with a non power-of-two size. Allocate a random size from each + * root, free as cleared and then call fini. This will ensure the multi-root + * force merge during fini. + */ + mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2)); + + KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + KUNIT_EXPECT_EQ(test, mm.max_order, max_order); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, + 4 * ps, ps, &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", 4 * ps); + drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, + 2 * ps, ps, &allocated, + DRM_BUDDY_CLEAR_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", 2 * ps); + drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size, + ps, ps, &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", ps); + drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_contiguous(struct kunit *test) +{ + const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; + unsigned long i, n_pages, total; + struct drm_buddy_block *block; + struct drm_buddy mm; + LIST_HEAD(left); + LIST_HEAD(middle); + LIST_HEAD(right); + LIST_HEAD(allocated); + + KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + + /* + * Idea is to fragment the address space by alternating block + * allocations between three different lists; one for left, middle and + * right. We can then free a list to simulate fragmentation. In + * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION, + * including the try_harder path. + */ + + i = 0; + n_pages = mm_size / ps; + do { + struct list_head *list; + int slot = i % 3; + + if (slot == 0) + list = &left; + else if (slot == 1) + list = &middle; + else + list = &right; + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, 0, mm_size, + ps, ps, list, 0), + "buddy_alloc hit an error size=%lu\n", + ps); + } while (++i < n_pages); + + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%lu\n", 3 * ps); + + drm_buddy_free_list(&mm, &middle, 0); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%lu\n", 3 * ps); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 2 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%lu\n", 2 * ps); + + drm_buddy_free_list(&mm, &right, 0); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%lu\n", 3 * ps); + /* + * At this point we should have enough contiguous space for 2 blocks, + * however they are never buddies (since we freed middle and right) so + * will require the try_harder logic to find them. + */ + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 2 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", 2 * ps); + + drm_buddy_free_list(&mm, &left, 0); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%lu\n", 3 * ps); + + total = 0; + list_for_each_entry(block, &allocated, link) + total += drm_buddy_block_size(&mm, block); + + KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); + + drm_buddy_free_list(&mm, &allocated, 0); + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_pathological(struct kunit *test) +{ + u64 mm_size, size, start = 0; + struct drm_buddy_block *block; + const int max_order = 3; + unsigned long flags = 0; + int order, top; + struct drm_buddy mm; + LIST_HEAD(blocks); + LIST_HEAD(holes); + LIST_HEAD(tmp); + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. Free the largest block, then whittle down again. + * Eventually we will have a fully 50% fragmented mm. + */ + + mm_size = SZ_4K << max_order; + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + "buddy_init failed\n"); + + KUNIT_EXPECT_EQ(test, mm.max_order, max_order); + + for (top = max_order; top; top--) { + /* Make room by freeing the largest allocated block */ + block = list_first_entry_or_null(&blocks, typeof(*block), link); + if (block) { + list_del(&block->link); + drm_buddy_free_block(&mm, block); + } + + for (order = top; order--;) { + size = get_size(order, mm.chunk_size); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, + mm_size, size, size, + &tmp, flags), + "buddy_alloc hit -ENOMEM with order=%d, top=%d\n", + order, top); + + block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); + + list_move_tail(&block->link, &blocks); + } + + /* There should be one final page for this sub-allocation */ + size = get_size(0, mm.chunk_size); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc hit -ENOMEM for hole\n"); + + block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); + + list_move_tail(&block->link, &holes); + + size = get_size(top, mm.chunk_size); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", + top, max_order); + } + + drm_buddy_free_list(&mm, &holes, 0); + + /* Nothing larger than blocks of chunk_size now available */ + for (order = 1; order <= max_order; order++) { + size = get_size(order, mm.chunk_size); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + } + + list_splice_tail(&holes, &blocks); + drm_buddy_free_list(&mm, &blocks, 0); + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_pessimistic(struct kunit *test) +{ + u64 mm_size, size, start = 0; + struct drm_buddy_block *block, *bn; + const unsigned int max_order = 16; + unsigned long flags = 0; + struct drm_buddy mm; + unsigned int order; + LIST_HEAD(blocks); + LIST_HEAD(tmp); + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. + */ + + mm_size = SZ_4K << max_order; + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + "buddy_init failed\n"); + + KUNIT_EXPECT_EQ(test, mm.max_order, max_order); + + for (order = 0; order < max_order; order++) { + size = get_size(order, mm.chunk_size); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc hit -ENOMEM with order=%d\n", + order); + + block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); + + list_move_tail(&block->link, &blocks); + } + + /* And now the last remaining block available */ + size = get_size(0, mm.chunk_size); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc hit -ENOMEM on final alloc\n"); + + block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); + + list_move_tail(&block->link, &blocks); + + /* Should be completely full! */ + for (order = max_order; order--;) { + size = get_size(order, mm.chunk_size); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc unexpectedly succeeded, it should be full!"); + } + + block = list_last_entry(&blocks, typeof(*block), link); + list_del(&block->link); + drm_buddy_free_block(&mm, block); + + /* As we free in increasing size, we make available larger blocks */ + order = 1; + list_for_each_entry_safe(block, bn, &blocks, link) { + list_del(&block->link); + drm_buddy_free_block(&mm, block); + + size = get_size(order, mm.chunk_size); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc hit -ENOMEM with order=%d\n", + order); + + block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); + + list_del(&block->link); + drm_buddy_free_block(&mm, block); + order++; + } + + /* To confirm, now the whole mm should be available */ + size = get_size(max_order, mm.chunk_size); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + max_order); + + block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); + + list_del(&block->link); + drm_buddy_free_block(&mm, block); + drm_buddy_free_list(&mm, &blocks, 0); + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_optimistic(struct kunit *test) +{ + u64 mm_size, size, start = 0; + struct drm_buddy_block *block; + unsigned long flags = 0; + const int max_order = 16; + struct drm_buddy mm; + LIST_HEAD(blocks); + LIST_HEAD(tmp); + int order; + + /* + * Create a mm with one block of each order available, and + * try to allocate them all. + */ + + mm_size = SZ_4K * ((1 << (max_order + 1)) - 1); + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + "buddy_init failed\n"); + + KUNIT_EXPECT_EQ(test, mm.max_order, max_order); + + for (order = 0; order <= max_order; order++) { + size = get_size(order, mm.chunk_size); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc hit -ENOMEM with order=%d\n", + order); + + block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); + + list_move_tail(&block->link, &blocks); + } + + /* Should be completely full! */ + size = get_size(0, mm.chunk_size); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + size, size, &tmp, flags), + "buddy_alloc unexpectedly succeeded, it should be full!"); + + drm_buddy_free_list(&mm, &blocks, 0); + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_limit(struct kunit *test) +{ + u64 size = U64_MAX, start = 0; + struct drm_buddy_block *block; + unsigned long flags = 0; + LIST_HEAD(allocated); + struct drm_buddy mm; + + KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, size, SZ_4K)); + + KUNIT_EXPECT_EQ_MSG(test, mm.max_order, DRM_BUDDY_MAX_ORDER, + "mm.max_order(%d) != %d\n", mm.max_order, + DRM_BUDDY_MAX_ORDER); + + size = mm.chunk_size << mm.max_order; + KUNIT_EXPECT_FALSE(test, drm_buddy_alloc_blocks(&mm, start, size, size, + mm.chunk_size, &allocated, flags)); + + block = list_first_entry_or_null(&allocated, struct drm_buddy_block, link); + KUNIT_EXPECT_TRUE(test, block); + + KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), mm.max_order, + "block order(%d) != %d\n", + drm_buddy_block_order(block), mm.max_order); + + KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_size(&mm, block), + BIT_ULL(mm.max_order) * mm.chunk_size, + "block size(%llu) != %llu\n", + drm_buddy_block_size(&mm, block), + BIT_ULL(mm.max_order) * mm.chunk_size); + + drm_buddy_free_list(&mm, &allocated, 0); + drm_buddy_fini(&mm); +} + +static void drm_test_buddy_alloc_exceeds_max_order(struct kunit *test) +{ + u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G; + struct drm_buddy mm; + LIST_HEAD(blocks); + int err; + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + "buddy_init failed\n"); + + /* CONTIGUOUS allocation should succeed via try_harder fallback */ + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, size, + SZ_4K, &blocks, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%llu\n", size); + drm_buddy_free_list(&mm, &blocks, 0); + + /* Non-CONTIGUOUS with large min_block_size should return -EINVAL */ + err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0); + KUNIT_EXPECT_EQ(test, err, -EINVAL); + + /* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */ + err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, + DRM_BUDDY_RANGE_ALLOCATION); + KUNIT_EXPECT_EQ(test, err, -EINVAL); + + /* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */ + err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks, + DRM_BUDDY_CONTIGUOUS_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION); + KUNIT_EXPECT_EQ(test, err, -EINVAL); + + drm_buddy_fini(&mm); +} + +static int drm_buddy_suite_init(struct kunit_suite *suite) +{ + while (!random_seed) + random_seed = get_random_u32(); + + kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", + random_seed); + + return 0; +} + +static struct kunit_case drm_buddy_tests[] = { + KUNIT_CASE(drm_test_buddy_alloc_limit), + KUNIT_CASE(drm_test_buddy_alloc_optimistic), + KUNIT_CASE(drm_test_buddy_alloc_pessimistic), + KUNIT_CASE(drm_test_buddy_alloc_pathological), + KUNIT_CASE(drm_test_buddy_alloc_contiguous), + KUNIT_CASE(drm_test_buddy_alloc_clear), + KUNIT_CASE(drm_test_buddy_alloc_range_bias), + KUNIT_CASE(drm_test_buddy_fragmentation_performance), + KUNIT_CASE(drm_test_buddy_alloc_exceeds_max_order), + {} +}; + +static struct kunit_suite drm_buddy_test_suite = { + .name = "drm_buddy", + .suite_init = drm_buddy_suite_init, + .test_cases = drm_buddy_tests, +}; + +kunit_test_suite(drm_buddy_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Kunit test for drm_buddy functions"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/tests/gpu_random.c b/drivers/gpu/tests/gpu_random.c new file mode 100644 index 000000000000..ddd1f594b5d5 --- /dev/null +++ b/drivers/gpu/tests/gpu_random.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#include "gpu_random.h" + +u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +{ + return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); +} +EXPORT_SYMBOL(drm_prandom_u32_max_state); + +void drm_random_reorder(unsigned int *order, unsigned int count, + struct rnd_state *state) +{ + unsigned int i, j; + + for (i = 0; i < count; ++i) { + BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); + j = drm_prandom_u32_max_state(count, state); + swap(order[i], order[j]); + } +} +EXPORT_SYMBOL(drm_random_reorder); + +unsigned int *drm_random_order(unsigned int count, struct rnd_state *state) +{ + unsigned int *order, i; + + order = kmalloc_array(count, sizeof(*order), GFP_KERNEL); + if (!order) + return order; + + for (i = 0; i < count; i++) + order[i] = i; + + drm_random_reorder(order, count, state); + return order; +} +EXPORT_SYMBOL(drm_random_order); diff --git a/drivers/gpu/tests/gpu_random.h b/drivers/gpu/tests/gpu_random.h new file mode 100644 index 000000000000..9f827260a89d --- /dev/null +++ b/drivers/gpu/tests/gpu_random.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __DRM_RANDOM_H__ +#define __DRM_RANDOM_H__ + +/* This is a temporary home for a couple of utility functions that should + * be transposed to lib/ at the earliest convenience. + */ + +#include + +#define DRM_RND_STATE_INITIALIZER(seed__) ({ \ + struct rnd_state state__; \ + prandom_seed_state(&state__, (seed__)); \ + state__; \ +}) + +#define DRM_RND_STATE(name__, seed__) \ + struct rnd_state name__ = DRM_RND_STATE_INITIALIZER(seed__) + +unsigned int *drm_random_order(unsigned int count, + struct rnd_state *state); +void drm_random_reorder(unsigned int *order, + unsigned int count, + struct rnd_state *state); +u32 drm_prandom_u32_max_state(u32 ep_ro, + struct rnd_state *state); + +#endif /* !__DRM_RANDOM_H__ */ diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h deleted file mode 100644 index b909fa8f810a..000000000000 --- a/include/drm/drm_buddy.h +++ /dev/null @@ -1,171 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2021 Intel Corporation - */ - -#ifndef __DRM_BUDDY_H__ -#define __DRM_BUDDY_H__ - -#include -#include -#include -#include -#include - -struct drm_printer; - -#define DRM_BUDDY_RANGE_ALLOCATION BIT(0) -#define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1) -#define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) -#define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) -#define DRM_BUDDY_CLEARED BIT(4) -#define DRM_BUDDY_TRIM_DISABLE BIT(5) - -struct drm_buddy_block { -#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) -#define DRM_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) -#define DRM_BUDDY_ALLOCATED (1 << 10) -#define DRM_BUDDY_FREE (2 << 10) -#define DRM_BUDDY_SPLIT (3 << 10) -#define DRM_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) -/* Free to be used, if needed in the future */ -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) -#define DRM_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) - u64 header; - - struct drm_buddy_block *left; - struct drm_buddy_block *right; - struct drm_buddy_block *parent; - - void *private; /* owned by creator */ - - /* - * While the block is allocated by the user through drm_buddy_alloc*, - * the user has ownership of the link, for example to maintain within - * a list, if so desired. As soon as the block is freed with - * drm_buddy_free* ownership is given back to the mm. - */ - union { - struct rb_node rb; - struct list_head link; - }; - - struct list_head tmp_link; -}; - -/* Order-zero must be at least SZ_4K */ -#define DRM_BUDDY_MAX_ORDER (63 - 12) - -/* - * Binary Buddy System. - * - * Locking should be handled by the user, a simple mutex around - * drm_buddy_alloc* and drm_buddy_free* should suffice. - */ -struct drm_buddy { - /* Maintain a free list for each order. */ - struct rb_root **free_trees; - - /* - * Maintain explicit binary tree(s) to track the allocation of the - * address space. This gives us a simple way of finding a buddy block - * and performing the potentially recursive merge step when freeing a - * block. Nodes are either allocated or free, in which case they will - * also exist on the respective free list. - */ - struct drm_buddy_block **roots; - - /* - * Anything from here is public, and remains static for the lifetime of - * the mm. Everything above is considered do-not-touch. - */ - unsigned int n_roots; - unsigned int max_order; - - /* Must be at least SZ_4K */ - u64 chunk_size; - u64 size; - u64 avail; - u64 clear_avail; -}; - -static inline u64 -drm_buddy_block_offset(const struct drm_buddy_block *block) -{ - return block->header & DRM_BUDDY_HEADER_OFFSET; -} - -static inline unsigned int -drm_buddy_block_order(struct drm_buddy_block *block) -{ - return block->header & DRM_BUDDY_HEADER_ORDER; -} - -static inline unsigned int -drm_buddy_block_state(struct drm_buddy_block *block) -{ - return block->header & DRM_BUDDY_HEADER_STATE; -} - -static inline bool -drm_buddy_block_is_allocated(struct drm_buddy_block *block) -{ - return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED; -} - -static inline bool -drm_buddy_block_is_clear(struct drm_buddy_block *block) -{ - return block->header & DRM_BUDDY_HEADER_CLEAR; -} - -static inline bool -drm_buddy_block_is_free(struct drm_buddy_block *block) -{ - return drm_buddy_block_state(block) == DRM_BUDDY_FREE; -} - -static inline bool -drm_buddy_block_is_split(struct drm_buddy_block *block) -{ - return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT; -} - -static inline u64 -drm_buddy_block_size(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - return mm->chunk_size << drm_buddy_block_order(block); -} - -int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size); - -void drm_buddy_fini(struct drm_buddy *mm); - -struct drm_buddy_block * -drm_get_buddy(struct drm_buddy_block *block); - -int drm_buddy_alloc_blocks(struct drm_buddy *mm, - u64 start, u64 end, u64 size, - u64 min_page_size, - struct list_head *blocks, - unsigned long flags); - -int drm_buddy_block_trim(struct drm_buddy *mm, - u64 *start, - u64 new_size, - struct list_head *blocks); - -void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear); - -void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); - -void drm_buddy_free_list(struct drm_buddy *mm, - struct list_head *objects, - unsigned int flags); - -void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p); -void drm_buddy_block_print(struct drm_buddy *mm, - struct drm_buddy_block *block, - struct drm_printer *p); -#endif diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h new file mode 100644 index 000000000000..b909fa8f810a --- /dev/null +++ b/include/linux/gpu_buddy.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __DRM_BUDDY_H__ +#define __DRM_BUDDY_H__ + +#include +#include +#include +#include +#include + +struct drm_printer; + +#define DRM_BUDDY_RANGE_ALLOCATION BIT(0) +#define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1) +#define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) +#define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) +#define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) + +struct drm_buddy_block { +#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) +#define DRM_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) +#define DRM_BUDDY_ALLOCATED (1 << 10) +#define DRM_BUDDY_FREE (2 << 10) +#define DRM_BUDDY_SPLIT (3 << 10) +#define DRM_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) +/* Free to be used, if needed in the future */ +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) +#define DRM_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) + u64 header; + + struct drm_buddy_block *left; + struct drm_buddy_block *right; + struct drm_buddy_block *parent; + + void *private; /* owned by creator */ + + /* + * While the block is allocated by the user through drm_buddy_alloc*, + * the user has ownership of the link, for example to maintain within + * a list, if so desired. As soon as the block is freed with + * drm_buddy_free* ownership is given back to the mm. + */ + union { + struct rb_node rb; + struct list_head link; + }; + + struct list_head tmp_link; +}; + +/* Order-zero must be at least SZ_4K */ +#define DRM_BUDDY_MAX_ORDER (63 - 12) + +/* + * Binary Buddy System. + * + * Locking should be handled by the user, a simple mutex around + * drm_buddy_alloc* and drm_buddy_free* should suffice. + */ +struct drm_buddy { + /* Maintain a free list for each order. */ + struct rb_root **free_trees; + + /* + * Maintain explicit binary tree(s) to track the allocation of the + * address space. This gives us a simple way of finding a buddy block + * and performing the potentially recursive merge step when freeing a + * block. Nodes are either allocated or free, in which case they will + * also exist on the respective free list. + */ + struct drm_buddy_block **roots; + + /* + * Anything from here is public, and remains static for the lifetime of + * the mm. Everything above is considered do-not-touch. + */ + unsigned int n_roots; + unsigned int max_order; + + /* Must be at least SZ_4K */ + u64 chunk_size; + u64 size; + u64 avail; + u64 clear_avail; +}; + +static inline u64 +drm_buddy_block_offset(const struct drm_buddy_block *block) +{ + return block->header & DRM_BUDDY_HEADER_OFFSET; +} + +static inline unsigned int +drm_buddy_block_order(struct drm_buddy_block *block) +{ + return block->header & DRM_BUDDY_HEADER_ORDER; +} + +static inline unsigned int +drm_buddy_block_state(struct drm_buddy_block *block) +{ + return block->header & DRM_BUDDY_HEADER_STATE; +} + +static inline bool +drm_buddy_block_is_allocated(struct drm_buddy_block *block) +{ + return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED; +} + +static inline bool +drm_buddy_block_is_clear(struct drm_buddy_block *block) +{ + return block->header & DRM_BUDDY_HEADER_CLEAR; +} + +static inline bool +drm_buddy_block_is_free(struct drm_buddy_block *block) +{ + return drm_buddy_block_state(block) == DRM_BUDDY_FREE; +} + +static inline bool +drm_buddy_block_is_split(struct drm_buddy_block *block) +{ + return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT; +} + +static inline u64 +drm_buddy_block_size(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + return mm->chunk_size << drm_buddy_block_order(block); +} + +int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size); + +void drm_buddy_fini(struct drm_buddy *mm); + +struct drm_buddy_block * +drm_get_buddy(struct drm_buddy_block *block); + +int drm_buddy_alloc_blocks(struct drm_buddy *mm, + u64 start, u64 end, u64 size, + u64 min_page_size, + struct list_head *blocks, + unsigned long flags); + +int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, + u64 new_size, + struct list_head *blocks); + +void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear); + +void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); + +void drm_buddy_free_list(struct drm_buddy *mm, + struct list_head *objects, + unsigned int flags); + +void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p); +void drm_buddy_block_print(struct drm_buddy *mm, + struct drm_buddy_block *block, + struct drm_printer *p); +#endif -- cgit v1.2.3 From ba110db8e1bc206c13fd7d985e79b033f53bfdea Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 6 Feb 2026 08:52:38 +1000 Subject: gpu: Move DRM buddy allocator one level up (part two) Move the DRM buddy allocator one level up so that it can be used by GPU drivers (example, nova-core) that have usecases other than DRM (such as VFIO vGPU support). Modify the API, structures and Kconfigs to use "gpu_buddy" terminology. Adapt the drivers and tests to use the new API. The commit cannot be split due to bisectability, however no functional change is intended. Verified by running K-UNIT tests and build tested various configurations. Signed-off-by: Joel Fernandes Reviewed-by: Dave Airlie [airlied: I've split this into two so git can find copies easier. I've also just nuked drm_random library, that stuff needs to be done elsewhere and only the buddy tests seem to be using it]. Signed-off-by: Dave Airlie --- Documentation/gpu/drm-mm.rst | 6 + MAINTAINERS | 8 +- drivers/gpu/Kconfig | 13 + drivers/gpu/Makefile | 1 + drivers/gpu/buddy.c | 556 ++++++++++----------- drivers/gpu/drm/Kconfig | 1 + drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 79 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 18 +- drivers/gpu/drm/drm_buddy.c | 77 +++ drivers/gpu/drm/i915/i915_scatterlist.c | 8 +- drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 55 +- drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 4 +- .../gpu/drm/i915/selftests/intel_memory_region.c | 20 +- drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c | 4 +- drivers/gpu/drm/ttm/tests/ttm_mock_manager.c | 18 +- drivers/gpu/drm/ttm/tests/ttm_mock_manager.h | 2 +- drivers/gpu/drm/xe/xe_res_cursor.h | 34 +- drivers/gpu/drm/xe/xe_svm.c | 12 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 71 +-- drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 2 +- drivers/gpu/tests/Makefile | 2 +- drivers/gpu/tests/gpu_buddy_test.c | 412 +++++++-------- drivers/gpu/tests/gpu_random.c | 16 +- drivers/gpu/tests/gpu_random.h | 18 +- drivers/video/Kconfig | 1 + include/drm/drm_buddy.h | 18 + include/linux/gpu_buddy.h | 124 ++--- 30 files changed, 855 insertions(+), 741 deletions(-) create mode 100644 drivers/gpu/Kconfig create mode 100644 drivers/gpu/drm/drm_buddy.c create mode 100644 include/drm/drm_buddy.h (limited to 'include') diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index ceee0e663237..32fb506db05b 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -532,6 +532,12 @@ Buddy Allocator Function References (GPU buddy) .. kernel-doc:: drivers/gpu/buddy.c :export: +DRM Buddy Specific Logging Function References +---------------------------------------------- + +.. kernel-doc:: drivers/gpu/drm/drm_buddy.c + :export: + DRM Cache Handling and Fast WC memcpy() ======================================= diff --git a/MAINTAINERS b/MAINTAINERS index 086cbf5c36b3..f2bec2c0d7e3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8797,15 +8797,17 @@ T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/ttm/ F: include/drm/ttm/ -DRM BUDDY ALLOCATOR +GPU BUDDY ALLOCATOR M: Matthew Auld M: Arun Pravin R: Christian Koenig L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git -F: drivers/gpu/drm/drm_buddy.c -F: drivers/gpu/drm/tests/drm_buddy_test.c +F: drivers/gpu/drm_buddy.c +F: drivers/gpu/buddy.c +F: drivers/gpu/tests/gpu_buddy_test.c +F: include/linux/gpu_buddy.h F: include/drm/drm_buddy.h DRM AUTOMATED TESTING diff --git a/drivers/gpu/Kconfig b/drivers/gpu/Kconfig new file mode 100644 index 000000000000..ebb2ad4b7ea0 --- /dev/null +++ b/drivers/gpu/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + +config GPU_BUDDY + bool + help + A page based buddy allocator for GPU memory. + +config GPU_BUDDY_KUNIT_TEST + tristate "KUnit tests for GPU buddy allocator" if !KUNIT_ALL_TESTS + depends on GPU_BUDDY && KUNIT + default KUNIT_ALL_TESTS + help + KUnit tests for the GPU buddy allocator. diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index c5292ee2c852..5cd54d06e262 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -6,3 +6,4 @@ obj-y += host1x/ drm/ vga/ tests/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ obj-$(CONFIG_TRACE_GPU_MEM) += trace/ obj-$(CONFIG_NOVA_CORE) += nova-core/ +obj-$(CONFIG_GPU_BUDDY) += buddy.o diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c index 4cc63d961d26..603c59a2013a 100644 --- a/drivers/gpu/buddy.c +++ b/drivers/gpu/buddy.c @@ -11,27 +11,17 @@ #include #include -#include - -enum drm_buddy_free_tree { - DRM_BUDDY_CLEAR_TREE = 0, - DRM_BUDDY_DIRTY_TREE, - DRM_BUDDY_MAX_FREE_TREES, -}; static struct kmem_cache *slab_blocks; -#define for_each_free_tree(tree) \ - for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++) - -static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm, - struct drm_buddy_block *parent, +static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm, + struct gpu_buddy_block *parent, unsigned int order, u64 offset) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; - BUG_ON(order > DRM_BUDDY_MAX_ORDER); + BUG_ON(order > GPU_BUDDY_MAX_ORDER); block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); if (!block) @@ -43,30 +33,30 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm, RB_CLEAR_NODE(&block->rb); - BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED); + BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED); return block; } -static void drm_block_free(struct drm_buddy *mm, - struct drm_buddy_block *block) +static void gpu_block_free(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { kmem_cache_free(slab_blocks, block); } -static enum drm_buddy_free_tree -get_block_tree(struct drm_buddy_block *block) +static enum gpu_buddy_free_tree +get_block_tree(struct gpu_buddy_block *block) { - return drm_buddy_block_is_clear(block) ? - DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; + return gpu_buddy_block_is_clear(block) ? + GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; } -static struct drm_buddy_block * +static struct gpu_buddy_block * rbtree_get_free_block(const struct rb_node *node) { - return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL; + return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL; } -static struct drm_buddy_block * +static struct gpu_buddy_block * rbtree_last_free_block(struct rb_root *root) { return rbtree_get_free_block(rb_last(root)); @@ -77,33 +67,33 @@ static bool rbtree_is_empty(struct rb_root *root) return RB_EMPTY_ROOT(root); } -static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block, - const struct drm_buddy_block *node) +static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block, + const struct gpu_buddy_block *node) { - return drm_buddy_block_offset(block) < drm_buddy_block_offset(node); + return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node); } static bool rbtree_block_offset_less(struct rb_node *block, const struct rb_node *node) { - return drm_buddy_block_offset_less(rbtree_get_free_block(block), + return gpu_buddy_block_offset_less(rbtree_get_free_block(block), rbtree_get_free_block(node)); } -static void rbtree_insert(struct drm_buddy *mm, - struct drm_buddy_block *block, - enum drm_buddy_free_tree tree) +static void rbtree_insert(struct gpu_buddy *mm, + struct gpu_buddy_block *block, + enum gpu_buddy_free_tree tree) { rb_add(&block->rb, - &mm->free_trees[tree][drm_buddy_block_order(block)], + &mm->free_trees[tree][gpu_buddy_block_order(block)], rbtree_block_offset_less); } -static void rbtree_remove(struct drm_buddy *mm, - struct drm_buddy_block *block) +static void rbtree_remove(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { - unsigned int order = drm_buddy_block_order(block); - enum drm_buddy_free_tree tree; + unsigned int order = gpu_buddy_block_order(block); + enum gpu_buddy_free_tree tree; struct rb_root *root; tree = get_block_tree(block); @@ -113,42 +103,42 @@ static void rbtree_remove(struct drm_buddy *mm, RB_CLEAR_NODE(&block->rb); } -static void clear_reset(struct drm_buddy_block *block) +static void clear_reset(struct gpu_buddy_block *block) { - block->header &= ~DRM_BUDDY_HEADER_CLEAR; + block->header &= ~GPU_BUDDY_HEADER_CLEAR; } -static void mark_cleared(struct drm_buddy_block *block) +static void mark_cleared(struct gpu_buddy_block *block) { - block->header |= DRM_BUDDY_HEADER_CLEAR; + block->header |= GPU_BUDDY_HEADER_CLEAR; } -static void mark_allocated(struct drm_buddy *mm, - struct drm_buddy_block *block) +static void mark_allocated(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { - block->header &= ~DRM_BUDDY_HEADER_STATE; - block->header |= DRM_BUDDY_ALLOCATED; + block->header &= ~GPU_BUDDY_HEADER_STATE; + block->header |= GPU_BUDDY_ALLOCATED; rbtree_remove(mm, block); } -static void mark_free(struct drm_buddy *mm, - struct drm_buddy_block *block) +static void mark_free(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { - enum drm_buddy_free_tree tree; + enum gpu_buddy_free_tree tree; - block->header &= ~DRM_BUDDY_HEADER_STATE; - block->header |= DRM_BUDDY_FREE; + block->header &= ~GPU_BUDDY_HEADER_STATE; + block->header |= GPU_BUDDY_FREE; tree = get_block_tree(block); rbtree_insert(mm, block, tree); } -static void mark_split(struct drm_buddy *mm, - struct drm_buddy_block *block) +static void mark_split(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { - block->header &= ~DRM_BUDDY_HEADER_STATE; - block->header |= DRM_BUDDY_SPLIT; + block->header &= ~GPU_BUDDY_HEADER_STATE; + block->header |= GPU_BUDDY_SPLIT; rbtree_remove(mm, block); } @@ -163,10 +153,10 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) return s1 <= s2 && e1 >= e2; } -static struct drm_buddy_block * -__get_buddy(struct drm_buddy_block *block) +static struct gpu_buddy_block * +__get_buddy(struct gpu_buddy_block *block) { - struct drm_buddy_block *parent; + struct gpu_buddy_block *parent; parent = block->parent; if (!parent) @@ -178,19 +168,19 @@ __get_buddy(struct drm_buddy_block *block) return parent->left; } -static unsigned int __drm_buddy_free(struct drm_buddy *mm, - struct drm_buddy_block *block, +static unsigned int __gpu_buddy_free(struct gpu_buddy *mm, + struct gpu_buddy_block *block, bool force_merge) { - struct drm_buddy_block *parent; + struct gpu_buddy_block *parent; unsigned int order; while ((parent = block->parent)) { - struct drm_buddy_block *buddy; + struct gpu_buddy_block *buddy; buddy = __get_buddy(block); - if (!drm_buddy_block_is_free(buddy)) + if (!gpu_buddy_block_is_free(buddy)) break; if (!force_merge) { @@ -198,31 +188,31 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm, * Check the block and its buddy clear state and exit * the loop if they both have the dissimilar state. */ - if (drm_buddy_block_is_clear(block) != - drm_buddy_block_is_clear(buddy)) + if (gpu_buddy_block_is_clear(block) != + gpu_buddy_block_is_clear(buddy)) break; - if (drm_buddy_block_is_clear(block)) + if (gpu_buddy_block_is_clear(block)) mark_cleared(parent); } rbtree_remove(mm, buddy); - if (force_merge && drm_buddy_block_is_clear(buddy)) - mm->clear_avail -= drm_buddy_block_size(mm, buddy); + if (force_merge && gpu_buddy_block_is_clear(buddy)) + mm->clear_avail -= gpu_buddy_block_size(mm, buddy); - drm_block_free(mm, block); - drm_block_free(mm, buddy); + gpu_block_free(mm, block); + gpu_block_free(mm, buddy); block = parent; } - order = drm_buddy_block_order(block); + order = gpu_buddy_block_order(block); mark_free(mm, block); return order; } -static int __force_merge(struct drm_buddy *mm, +static int __force_merge(struct gpu_buddy *mm, u64 start, u64 end, unsigned int min_order) @@ -241,7 +231,7 @@ static int __force_merge(struct drm_buddy *mm, struct rb_node *iter = rb_last(&mm->free_trees[tree][i]); while (iter) { - struct drm_buddy_block *block, *buddy; + struct gpu_buddy_block *block, *buddy; u64 block_start, block_end; block = rbtree_get_free_block(iter); @@ -250,18 +240,18 @@ static int __force_merge(struct drm_buddy *mm, if (!block || !block->parent) continue; - block_start = drm_buddy_block_offset(block); - block_end = block_start + drm_buddy_block_size(mm, block) - 1; + block_start = gpu_buddy_block_offset(block); + block_end = block_start + gpu_buddy_block_size(mm, block) - 1; if (!contains(start, end, block_start, block_end)) continue; buddy = __get_buddy(block); - if (!drm_buddy_block_is_free(buddy)) + if (!gpu_buddy_block_is_free(buddy)) continue; - WARN_ON(drm_buddy_block_is_clear(block) == - drm_buddy_block_is_clear(buddy)); + WARN_ON(gpu_buddy_block_is_clear(block) == + gpu_buddy_block_is_clear(buddy)); /* * Advance to the next node when the current node is the buddy, @@ -271,10 +261,10 @@ static int __force_merge(struct drm_buddy *mm, iter = rb_prev(iter); rbtree_remove(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail -= drm_buddy_block_size(mm, block); + if (gpu_buddy_block_is_clear(block)) + mm->clear_avail -= gpu_buddy_block_size(mm, block); - order = __drm_buddy_free(mm, block, true); + order = __gpu_buddy_free(mm, block, true); if (order >= min_order) return 0; } @@ -285,9 +275,9 @@ static int __force_merge(struct drm_buddy *mm, } /** - * drm_buddy_init - init memory manager + * gpu_buddy_init - init memory manager * - * @mm: DRM buddy manager to initialize + * @mm: GPU buddy manager to initialize * @size: size in bytes to manage * @chunk_size: minimum page size in bytes for our allocations * @@ -296,7 +286,7 @@ static int __force_merge(struct drm_buddy *mm, * Returns: * 0 on success, error code on failure. */ -int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) +int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size) { unsigned int i, j, root_count = 0; u64 offset = 0; @@ -318,9 +308,9 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) mm->chunk_size = chunk_size; mm->max_order = ilog2(size) - ilog2(chunk_size); - BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER); + BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER); - mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES, + mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES, sizeof(*mm->free_trees), GFP_KERNEL); if (!mm->free_trees) @@ -340,7 +330,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) mm->n_roots = hweight64(size); mm->roots = kmalloc_array(mm->n_roots, - sizeof(struct drm_buddy_block *), + sizeof(struct gpu_buddy_block *), GFP_KERNEL); if (!mm->roots) goto out_free_tree; @@ -350,21 +340,21 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) * not itself a power-of-two. */ do { - struct drm_buddy_block *root; + struct gpu_buddy_block *root; unsigned int order; u64 root_size; order = ilog2(size) - ilog2(chunk_size); root_size = chunk_size << order; - root = drm_block_alloc(mm, NULL, order, offset); + root = gpu_block_alloc(mm, NULL, order, offset); if (!root) goto out_free_roots; mark_free(mm, root); BUG_ON(root_count > mm->max_order); - BUG_ON(drm_buddy_block_size(mm, root) < chunk_size); + BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size); mm->roots[root_count] = root; @@ -377,7 +367,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) out_free_roots: while (root_count--) - drm_block_free(mm, mm->roots[root_count]); + gpu_block_free(mm, mm->roots[root_count]); kfree(mm->roots); out_free_tree: while (i--) @@ -385,16 +375,16 @@ out_free_tree: kfree(mm->free_trees); return -ENOMEM; } -EXPORT_SYMBOL(drm_buddy_init); +EXPORT_SYMBOL(gpu_buddy_init); /** - * drm_buddy_fini - tear down the memory manager + * gpu_buddy_fini - tear down the memory manager * - * @mm: DRM buddy manager to free + * @mm: GPU buddy manager to free * * Cleanup memory manager resources and the freetree */ -void drm_buddy_fini(struct drm_buddy *mm) +void gpu_buddy_fini(struct gpu_buddy *mm) { u64 root_size, size, start; unsigned int order; @@ -404,13 +394,13 @@ void drm_buddy_fini(struct drm_buddy *mm) for (i = 0; i < mm->n_roots; ++i) { order = ilog2(size) - ilog2(mm->chunk_size); - start = drm_buddy_block_offset(mm->roots[i]); + start = gpu_buddy_block_offset(mm->roots[i]); __force_merge(mm, start, start + size, order); - if (WARN_ON(!drm_buddy_block_is_free(mm->roots[i]))) + if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i]))) kunit_fail_current_test("buddy_fini() root"); - drm_block_free(mm, mm->roots[i]); + gpu_block_free(mm, mm->roots[i]); root_size = mm->chunk_size << order; size -= root_size; @@ -423,31 +413,31 @@ void drm_buddy_fini(struct drm_buddy *mm) kfree(mm->free_trees); kfree(mm->roots); } -EXPORT_SYMBOL(drm_buddy_fini); +EXPORT_SYMBOL(gpu_buddy_fini); -static int split_block(struct drm_buddy *mm, - struct drm_buddy_block *block) +static int split_block(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { - unsigned int block_order = drm_buddy_block_order(block) - 1; - u64 offset = drm_buddy_block_offset(block); + unsigned int block_order = gpu_buddy_block_order(block) - 1; + u64 offset = gpu_buddy_block_offset(block); - BUG_ON(!drm_buddy_block_is_free(block)); - BUG_ON(!drm_buddy_block_order(block)); + BUG_ON(!gpu_buddy_block_is_free(block)); + BUG_ON(!gpu_buddy_block_order(block)); - block->left = drm_block_alloc(mm, block, block_order, offset); + block->left = gpu_block_alloc(mm, block, block_order, offset); if (!block->left) return -ENOMEM; - block->right = drm_block_alloc(mm, block, block_order, + block->right = gpu_block_alloc(mm, block, block_order, offset + (mm->chunk_size << block_order)); if (!block->right) { - drm_block_free(mm, block->left); + gpu_block_free(mm, block->left); return -ENOMEM; } mark_split(mm, block); - if (drm_buddy_block_is_clear(block)) { + if (gpu_buddy_block_is_clear(block)) { mark_cleared(block->left); mark_cleared(block->right); clear_reset(block); @@ -460,34 +450,34 @@ static int split_block(struct drm_buddy *mm, } /** - * drm_get_buddy - get buddy address + * gpu_get_buddy - get buddy address * - * @block: DRM buddy block + * @block: GPU buddy block * * Returns the corresponding buddy block for @block, or NULL * if this is a root block and can't be merged further. * Requires some kind of locking to protect against * any concurrent allocate and free operations. */ -struct drm_buddy_block * -drm_get_buddy(struct drm_buddy_block *block) +struct gpu_buddy_block * +gpu_get_buddy(struct gpu_buddy_block *block) { return __get_buddy(block); } -EXPORT_SYMBOL(drm_get_buddy); +EXPORT_SYMBOL(gpu_get_buddy); /** - * drm_buddy_reset_clear - reset blocks clear state + * gpu_buddy_reset_clear - reset blocks clear state * - * @mm: DRM buddy manager + * @mm: GPU buddy manager * @is_clear: blocks clear state * * Reset the clear state based on @is_clear value for each block * in the freetree. */ -void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear) +void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear) { - enum drm_buddy_free_tree src_tree, dst_tree; + enum gpu_buddy_free_tree src_tree, dst_tree; u64 root_size, size, start; unsigned int order; int i; @@ -495,60 +485,60 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear) size = mm->size; for (i = 0; i < mm->n_roots; ++i) { order = ilog2(size) - ilog2(mm->chunk_size); - start = drm_buddy_block_offset(mm->roots[i]); + start = gpu_buddy_block_offset(mm->roots[i]); __force_merge(mm, start, start + size, order); root_size = mm->chunk_size << order; size -= root_size; } - src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE; - dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; + src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; + dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; for (i = 0; i <= mm->max_order; ++i) { struct rb_root *root = &mm->free_trees[src_tree][i]; - struct drm_buddy_block *block, *tmp; + struct gpu_buddy_block *block, *tmp; rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { rbtree_remove(mm, block); if (is_clear) { mark_cleared(block); - mm->clear_avail += drm_buddy_block_size(mm, block); + mm->clear_avail += gpu_buddy_block_size(mm, block); } else { clear_reset(block); - mm->clear_avail -= drm_buddy_block_size(mm, block); + mm->clear_avail -= gpu_buddy_block_size(mm, block); } rbtree_insert(mm, block, dst_tree); } } } -EXPORT_SYMBOL(drm_buddy_reset_clear); +EXPORT_SYMBOL(gpu_buddy_reset_clear); /** - * drm_buddy_free_block - free a block + * gpu_buddy_free_block - free a block * - * @mm: DRM buddy manager + * @mm: GPU buddy manager * @block: block to be freed */ -void drm_buddy_free_block(struct drm_buddy *mm, - struct drm_buddy_block *block) +void gpu_buddy_free_block(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { - BUG_ON(!drm_buddy_block_is_allocated(block)); - mm->avail += drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail += drm_buddy_block_size(mm, block); + BUG_ON(!gpu_buddy_block_is_allocated(block)); + mm->avail += gpu_buddy_block_size(mm, block); + if (gpu_buddy_block_is_clear(block)) + mm->clear_avail += gpu_buddy_block_size(mm, block); - __drm_buddy_free(mm, block, false); + __gpu_buddy_free(mm, block, false); } -EXPORT_SYMBOL(drm_buddy_free_block); +EXPORT_SYMBOL(gpu_buddy_free_block); -static void __drm_buddy_free_list(struct drm_buddy *mm, +static void __gpu_buddy_free_list(struct gpu_buddy *mm, struct list_head *objects, bool mark_clear, bool mark_dirty) { - struct drm_buddy_block *block, *on; + struct gpu_buddy_block *block, *on; WARN_ON(mark_dirty && mark_clear); @@ -557,13 +547,13 @@ static void __drm_buddy_free_list(struct drm_buddy *mm, mark_cleared(block); else if (mark_dirty) clear_reset(block); - drm_buddy_free_block(mm, block); + gpu_buddy_free_block(mm, block); cond_resched(); } INIT_LIST_HEAD(objects); } -static void drm_buddy_free_list_internal(struct drm_buddy *mm, +static void gpu_buddy_free_list_internal(struct gpu_buddy *mm, struct list_head *objects) { /* @@ -571,43 +561,43 @@ static void drm_buddy_free_list_internal(struct drm_buddy *mm, * at this point. For example we might have just failed part of the * allocation. */ - __drm_buddy_free_list(mm, objects, false, false); + __gpu_buddy_free_list(mm, objects, false, false); } /** - * drm_buddy_free_list - free blocks + * gpu_buddy_free_list - free blocks * - * @mm: DRM buddy manager + * @mm: GPU buddy manager * @objects: input list head to free blocks - * @flags: optional flags like DRM_BUDDY_CLEARED + * @flags: optional flags like GPU_BUDDY_CLEARED */ -void drm_buddy_free_list(struct drm_buddy *mm, +void gpu_buddy_free_list(struct gpu_buddy *mm, struct list_head *objects, unsigned int flags) { - bool mark_clear = flags & DRM_BUDDY_CLEARED; + bool mark_clear = flags & GPU_BUDDY_CLEARED; - __drm_buddy_free_list(mm, objects, mark_clear, !mark_clear); + __gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear); } -EXPORT_SYMBOL(drm_buddy_free_list); +EXPORT_SYMBOL(gpu_buddy_free_list); -static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags) +static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags) { - bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION; + bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION; - return needs_clear != drm_buddy_block_is_clear(block); + return needs_clear != gpu_buddy_block_is_clear(block); } -static struct drm_buddy_block * -__alloc_range_bias(struct drm_buddy *mm, +static struct gpu_buddy_block * +__alloc_range_bias(struct gpu_buddy *mm, u64 start, u64 end, unsigned int order, unsigned long flags, bool fallback) { u64 req_size = mm->chunk_size << order; - struct drm_buddy_block *block; - struct drm_buddy_block *buddy; + struct gpu_buddy_block *block; + struct gpu_buddy_block *buddy; LIST_HEAD(dfs); int err; int i; @@ -622,23 +612,23 @@ __alloc_range_bias(struct drm_buddy *mm, u64 block_end; block = list_first_entry_or_null(&dfs, - struct drm_buddy_block, + struct gpu_buddy_block, tmp_link); if (!block) break; list_del(&block->tmp_link); - if (drm_buddy_block_order(block) < order) + if (gpu_buddy_block_order(block) < order) continue; - block_start = drm_buddy_block_offset(block); - block_end = block_start + drm_buddy_block_size(mm, block) - 1; + block_start = gpu_buddy_block_offset(block); + block_end = block_start + gpu_buddy_block_size(mm, block) - 1; if (!overlaps(start, end, block_start, block_end)) continue; - if (drm_buddy_block_is_allocated(block)) + if (gpu_buddy_block_is_allocated(block)) continue; if (block_start < start || block_end > end) { @@ -654,17 +644,17 @@ __alloc_range_bias(struct drm_buddy *mm, continue; if (contains(start, end, block_start, block_end) && - order == drm_buddy_block_order(block)) { + order == gpu_buddy_block_order(block)) { /* * Find the free block within the range. */ - if (drm_buddy_block_is_free(block)) + if (gpu_buddy_block_is_free(block)) return block; continue; } - if (!drm_buddy_block_is_split(block)) { + if (!gpu_buddy_block_is_split(block)) { err = split_block(mm, block); if (unlikely(err)) goto err_undo; @@ -684,19 +674,19 @@ err_undo: */ buddy = __get_buddy(block); if (buddy && - (drm_buddy_block_is_free(block) && - drm_buddy_block_is_free(buddy))) - __drm_buddy_free(mm, block, false); + (gpu_buddy_block_is_free(block) && + gpu_buddy_block_is_free(buddy))) + __gpu_buddy_free(mm, block, false); return ERR_PTR(err); } -static struct drm_buddy_block * -__drm_buddy_alloc_range_bias(struct drm_buddy *mm, +static struct gpu_buddy_block * +__gpu_buddy_alloc_range_bias(struct gpu_buddy *mm, u64 start, u64 end, unsigned int order, unsigned long flags) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; bool fallback = false; block = __alloc_range_bias(mm, start, end, order, @@ -708,12 +698,12 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm, return block; } -static struct drm_buddy_block * -get_maxblock(struct drm_buddy *mm, +static struct gpu_buddy_block * +get_maxblock(struct gpu_buddy *mm, unsigned int order, - enum drm_buddy_free_tree tree) + enum gpu_buddy_free_tree tree) { - struct drm_buddy_block *max_block = NULL, *block = NULL; + struct gpu_buddy_block *max_block = NULL, *block = NULL; struct rb_root *root; unsigned int i; @@ -728,8 +718,8 @@ get_maxblock(struct drm_buddy *mm, continue; } - if (drm_buddy_block_offset(block) > - drm_buddy_block_offset(max_block)) { + if (gpu_buddy_block_offset(block) > + gpu_buddy_block_offset(max_block)) { max_block = block; } } @@ -737,25 +727,25 @@ get_maxblock(struct drm_buddy *mm, return max_block; } -static struct drm_buddy_block * -alloc_from_freetree(struct drm_buddy *mm, +static struct gpu_buddy_block * +alloc_from_freetree(struct gpu_buddy *mm, unsigned int order, unsigned long flags) { - struct drm_buddy_block *block = NULL; + struct gpu_buddy_block *block = NULL; struct rb_root *root; - enum drm_buddy_free_tree tree; + enum gpu_buddy_free_tree tree; unsigned int tmp; int err; - tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ? - DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; + tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ? + GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; - if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { + if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) { block = get_maxblock(mm, order, tree); if (block) /* Store the obtained block order */ - tmp = drm_buddy_block_order(block); + tmp = gpu_buddy_block_order(block); } else { for (tmp = order; tmp <= mm->max_order; ++tmp) { /* Get RB tree root for this order and tree */ @@ -768,8 +758,8 @@ alloc_from_freetree(struct drm_buddy *mm, if (!block) { /* Try allocating from the other tree */ - tree = (tree == DRM_BUDDY_CLEAR_TREE) ? - DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE; + tree = (tree == GPU_BUDDY_CLEAR_TREE) ? + GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; for (tmp = order; tmp <= mm->max_order; ++tmp) { root = &mm->free_trees[tree][tmp]; @@ -782,7 +772,7 @@ alloc_from_freetree(struct drm_buddy *mm, return ERR_PTR(-ENOSPC); } - BUG_ON(!drm_buddy_block_is_free(block)); + BUG_ON(!gpu_buddy_block_is_free(block)); while (tmp != order) { err = split_block(mm, block); @@ -796,18 +786,18 @@ alloc_from_freetree(struct drm_buddy *mm, err_undo: if (tmp != order) - __drm_buddy_free(mm, block, false); + __gpu_buddy_free(mm, block, false); return ERR_PTR(err); } -static int __alloc_range(struct drm_buddy *mm, +static int __alloc_range(struct gpu_buddy *mm, struct list_head *dfs, u64 start, u64 size, struct list_head *blocks, u64 *total_allocated_on_err) { - struct drm_buddy_block *block; - struct drm_buddy_block *buddy; + struct gpu_buddy_block *block; + struct gpu_buddy_block *buddy; u64 total_allocated = 0; LIST_HEAD(allocated); u64 end; @@ -820,31 +810,31 @@ static int __alloc_range(struct drm_buddy *mm, u64 block_end; block = list_first_entry_or_null(dfs, - struct drm_buddy_block, + struct gpu_buddy_block, tmp_link); if (!block) break; list_del(&block->tmp_link); - block_start = drm_buddy_block_offset(block); - block_end = block_start + drm_buddy_block_size(mm, block) - 1; + block_start = gpu_buddy_block_offset(block); + block_end = block_start + gpu_buddy_block_size(mm, block) - 1; if (!overlaps(start, end, block_start, block_end)) continue; - if (drm_buddy_block_is_allocated(block)) { + if (gpu_buddy_block_is_allocated(block)) { err = -ENOSPC; goto err_free; } if (contains(start, end, block_start, block_end)) { - if (drm_buddy_block_is_free(block)) { + if (gpu_buddy_block_is_free(block)) { mark_allocated(mm, block); - total_allocated += drm_buddy_block_size(mm, block); - mm->avail -= drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail -= drm_buddy_block_size(mm, block); + total_allocated += gpu_buddy_block_size(mm, block); + mm->avail -= gpu_buddy_block_size(mm, block); + if (gpu_buddy_block_is_clear(block)) + mm->clear_avail -= gpu_buddy_block_size(mm, block); list_add_tail(&block->link, &allocated); continue; } else if (!mm->clear_avail) { @@ -853,7 +843,7 @@ static int __alloc_range(struct drm_buddy *mm, } } - if (!drm_buddy_block_is_split(block)) { + if (!gpu_buddy_block_is_split(block)) { err = split_block(mm, block); if (unlikely(err)) goto err_undo; @@ -880,22 +870,22 @@ err_undo: */ buddy = __get_buddy(block); if (buddy && - (drm_buddy_block_is_free(block) && - drm_buddy_block_is_free(buddy))) - __drm_buddy_free(mm, block, false); + (gpu_buddy_block_is_free(block) && + gpu_buddy_block_is_free(buddy))) + __gpu_buddy_free(mm, block, false); err_free: if (err == -ENOSPC && total_allocated_on_err) { list_splice_tail(&allocated, blocks); *total_allocated_on_err = total_allocated; } else { - drm_buddy_free_list_internal(mm, &allocated); + gpu_buddy_free_list_internal(mm, &allocated); } return err; } -static int __drm_buddy_alloc_range(struct drm_buddy *mm, +static int __gpu_buddy_alloc_range(struct gpu_buddy *mm, u64 start, u64 size, u64 *total_allocated_on_err, @@ -911,13 +901,13 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm, blocks, total_allocated_on_err); } -static int __alloc_contig_try_harder(struct drm_buddy *mm, +static int __alloc_contig_try_harder(struct gpu_buddy *mm, u64 size, u64 min_block_size, struct list_head *blocks) { u64 rhs_offset, lhs_offset, lhs_size, filled; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; unsigned int tree, order; LIST_HEAD(blocks_lhs); unsigned long pages; @@ -943,8 +933,8 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, block = rbtree_get_free_block(iter); /* Allocate blocks traversing RHS */ - rhs_offset = drm_buddy_block_offset(block); - err = __drm_buddy_alloc_range(mm, rhs_offset, size, + rhs_offset = gpu_buddy_block_offset(block); + err = __gpu_buddy_alloc_range(mm, rhs_offset, size, &filled, blocks); if (!err || err != -ENOSPC) return err; @@ -954,18 +944,18 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, lhs_size = round_up(lhs_size, min_block_size); /* Allocate blocks traversing LHS */ - lhs_offset = drm_buddy_block_offset(block) - lhs_size; - err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size, + lhs_offset = gpu_buddy_block_offset(block) - lhs_size; + err = __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size, NULL, &blocks_lhs); if (!err) { list_splice(&blocks_lhs, blocks); return 0; } else if (err != -ENOSPC) { - drm_buddy_free_list_internal(mm, blocks); + gpu_buddy_free_list_internal(mm, blocks); return err; } /* Free blocks for the next iteration */ - drm_buddy_free_list_internal(mm, blocks); + gpu_buddy_free_list_internal(mm, blocks); iter = rb_prev(iter); } @@ -975,9 +965,9 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, } /** - * drm_buddy_block_trim - free unused pages + * gpu_buddy_block_trim - free unused pages * - * @mm: DRM buddy manager + * @mm: GPU buddy manager * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. @@ -993,13 +983,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * Returns: * 0 on success, error code on failure. */ -int drm_buddy_block_trim(struct drm_buddy *mm, +int gpu_buddy_block_trim(struct gpu_buddy *mm, u64 *start, u64 new_size, struct list_head *blocks) { - struct drm_buddy_block *parent; - struct drm_buddy_block *block; + struct gpu_buddy_block *parent; + struct gpu_buddy_block *block; u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; @@ -1009,22 +999,22 @@ int drm_buddy_block_trim(struct drm_buddy *mm, return -EINVAL; block = list_first_entry(blocks, - struct drm_buddy_block, + struct gpu_buddy_block, link); - block_start = drm_buddy_block_offset(block); - block_end = block_start + drm_buddy_block_size(mm, block); + block_start = gpu_buddy_block_offset(block); + block_end = block_start + gpu_buddy_block_size(mm, block); - if (WARN_ON(!drm_buddy_block_is_allocated(block))) + if (WARN_ON(!gpu_buddy_block_is_allocated(block))) return -EINVAL; - if (new_size > drm_buddy_block_size(mm, block)) + if (new_size > gpu_buddy_block_size(mm, block)) return -EINVAL; if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size)) return -EINVAL; - if (new_size == drm_buddy_block_size(mm, block)) + if (new_size == gpu_buddy_block_size(mm, block)) return 0; new_start = block_start; @@ -1043,9 +1033,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, list_del(&block->link); mark_free(mm, block); - mm->avail += drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail += drm_buddy_block_size(mm, block); + mm->avail += gpu_buddy_block_size(mm, block); + if (gpu_buddy_block_is_clear(block)) + mm->clear_avail += gpu_buddy_block_size(mm, block); /* Prevent recursively freeing this node */ parent = block->parent; @@ -1055,26 +1045,26 @@ int drm_buddy_block_trim(struct drm_buddy *mm, err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { mark_allocated(mm, block); - mm->avail -= drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail -= drm_buddy_block_size(mm, block); + mm->avail -= gpu_buddy_block_size(mm, block); + if (gpu_buddy_block_is_clear(block)) + mm->clear_avail -= gpu_buddy_block_size(mm, block); list_add(&block->link, blocks); } block->parent = parent; return err; } -EXPORT_SYMBOL(drm_buddy_block_trim); +EXPORT_SYMBOL(gpu_buddy_block_trim); -static struct drm_buddy_block * -__drm_buddy_alloc_blocks(struct drm_buddy *mm, +static struct gpu_buddy_block * +__gpu_buddy_alloc_blocks(struct gpu_buddy *mm, u64 start, u64 end, unsigned int order, unsigned long flags) { - if (flags & DRM_BUDDY_RANGE_ALLOCATION) + if (flags & GPU_BUDDY_RANGE_ALLOCATION) /* Allocate traversing within the range */ - return __drm_buddy_alloc_range_bias(mm, start, end, + return __gpu_buddy_alloc_range_bias(mm, start, end, order, flags); else /* Allocate from freetree */ @@ -1082,15 +1072,15 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm, } /** - * drm_buddy_alloc_blocks - allocate power-of-two blocks + * gpu_buddy_alloc_blocks - allocate power-of-two blocks * - * @mm: DRM buddy manager to allocate from + * @mm: GPU buddy manager to allocate from * @start: start of the allowed range for this block * @end: end of the allowed range for this block * @size: size of the allocation in bytes * @min_block_size: alignment of the allocation * @blocks: output list head to add allocated blocks - * @flags: DRM_BUDDY_*_ALLOCATION flags + * @flags: GPU_BUDDY_*_ALLOCATION flags * * alloc_range_bias() called on range limitations, which traverses * the tree and returns the desired block. @@ -1101,13 +1091,13 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm, * Returns: * 0 on success, error code on failure. */ -int drm_buddy_alloc_blocks(struct drm_buddy *mm, +int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, u64 start, u64 end, u64 size, u64 min_block_size, struct list_head *blocks, unsigned long flags) { - struct drm_buddy_block *block = NULL; + struct gpu_buddy_block *block = NULL; u64 original_size, original_min_size; unsigned int min_order, order; LIST_HEAD(allocated); @@ -1137,14 +1127,14 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, if (!IS_ALIGNED(start | end, min_block_size)) return -EINVAL; - return __drm_buddy_alloc_range(mm, start, size, NULL, blocks); + return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks); } original_size = size; original_min_size = min_block_size; /* Roundup the size to power of 2 */ - if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) { + if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) { size = roundup_pow_of_two(size); min_block_size = size; /* Align size value to min_block_size */ @@ -1157,8 +1147,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, min_order = ilog2(min_block_size) - ilog2(mm->chunk_size); if (order > mm->max_order || size > mm->size) { - if ((flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) && - !(flags & DRM_BUDDY_RANGE_ALLOCATION)) + if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) && + !(flags & GPU_BUDDY_RANGE_ALLOCATION)) return __alloc_contig_try_harder(mm, original_size, original_min_size, blocks); @@ -1171,7 +1161,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, BUG_ON(order < min_order); do { - block = __drm_buddy_alloc_blocks(mm, start, + block = __gpu_buddy_alloc_blocks(mm, start, end, order, flags); @@ -1182,7 +1172,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, /* Try allocation through force merge method */ if (mm->clear_avail && !__force_merge(mm, start, end, min_order)) { - block = __drm_buddy_alloc_blocks(mm, start, + block = __gpu_buddy_alloc_blocks(mm, start, end, min_order, flags); @@ -1196,8 +1186,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, * Try contiguous block allocation through * try harder method. */ - if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION && - !(flags & DRM_BUDDY_RANGE_ALLOCATION)) + if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION && + !(flags & GPU_BUDDY_RANGE_ALLOCATION)) return __alloc_contig_try_harder(mm, original_size, original_min_size, @@ -1208,9 +1198,9 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); mark_allocated(mm, block); - mm->avail -= drm_buddy_block_size(mm, block); - if (drm_buddy_block_is_clear(block)) - mm->clear_avail -= drm_buddy_block_size(mm, block); + mm->avail -= gpu_buddy_block_size(mm, block); + if (gpu_buddy_block_is_clear(block)) + mm->clear_avail -= gpu_buddy_block_size(mm, block); kmemleak_update_trace(block); list_add_tail(&block->link, &allocated); @@ -1221,7 +1211,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + if (!(flags & GPU_BUDDY_TRIM_DISABLE) && original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); @@ -1234,11 +1224,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, block = list_last_entry(&allocated, typeof(*block), link); list_move(&block->link, &temp); trim_list = &temp; - trim_size = drm_buddy_block_size(mm, block) - + trim_size = gpu_buddy_block_size(mm, block) - (size - original_size); } - drm_buddy_block_trim(mm, + gpu_buddy_block_trim(mm, NULL, trim_size, trim_list); @@ -1251,44 +1241,42 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, return 0; err_free: - drm_buddy_free_list_internal(mm, &allocated); + gpu_buddy_free_list_internal(mm, &allocated); return err; } -EXPORT_SYMBOL(drm_buddy_alloc_blocks); +EXPORT_SYMBOL(gpu_buddy_alloc_blocks); /** - * drm_buddy_block_print - print block information + * gpu_buddy_block_print - print block information * - * @mm: DRM buddy manager - * @block: DRM buddy block - * @p: DRM printer to use + * @mm: GPU buddy manager + * @block: GPU buddy block */ -void drm_buddy_block_print(struct drm_buddy *mm, - struct drm_buddy_block *block, - struct drm_printer *p) +void gpu_buddy_block_print(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { - u64 start = drm_buddy_block_offset(block); - u64 size = drm_buddy_block_size(mm, block); + u64 start = gpu_buddy_block_offset(block); + u64 size = gpu_buddy_block_size(mm, block); - drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size); + pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size); } -EXPORT_SYMBOL(drm_buddy_block_print); +EXPORT_SYMBOL(gpu_buddy_block_print); /** - * drm_buddy_print - print allocator state + * gpu_buddy_print - print allocator state * - * @mm: DRM buddy manager - * @p: DRM printer to use + * @mm: GPU buddy manager + * @p: GPU printer to use */ -void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p) +void gpu_buddy_print(struct gpu_buddy *mm) { int order; - drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", - mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); + pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", + mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); for (order = mm->max_order; order >= 0; order--) { - struct drm_buddy_block *block, *tmp; + struct gpu_buddy_block *block, *tmp; struct rb_root *root; u64 count = 0, free; unsigned int tree; @@ -1297,40 +1285,38 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p) root = &mm->free_trees[tree][order]; rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { - BUG_ON(!drm_buddy_block_is_free(block)); + BUG_ON(!gpu_buddy_block_is_free(block)); count++; } } - drm_printf(p, "order-%2d ", order); - free = count * (mm->chunk_size << order); if (free < SZ_1M) - drm_printf(p, "free: %8llu KiB", free >> 10); + pr_info("order-%2d free: %8llu KiB, blocks: %llu\n", + order, free >> 10, count); else - drm_printf(p, "free: %8llu MiB", free >> 20); - - drm_printf(p, ", blocks: %llu\n", count); + pr_info("order-%2d free: %8llu MiB, blocks: %llu\n", + order, free >> 20, count); } } -EXPORT_SYMBOL(drm_buddy_print); +EXPORT_SYMBOL(gpu_buddy_print); -static void drm_buddy_module_exit(void) +static void gpu_buddy_module_exit(void) { kmem_cache_destroy(slab_blocks); } -static int __init drm_buddy_module_init(void) +static int __init gpu_buddy_module_init(void) { - slab_blocks = KMEM_CACHE(drm_buddy_block, 0); + slab_blocks = KMEM_CACHE(gpu_buddy_block, 0); if (!slab_blocks) return -ENOMEM; return 0; } -module_init(drm_buddy_module_init); -module_exit(drm_buddy_module_exit); +module_init(gpu_buddy_module_init); +module_exit(gpu_buddy_module_exit); -MODULE_DESCRIPTION("DRM Buddy Allocator"); +MODULE_DESCRIPTION("GPU Buddy Allocator"); MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 862ff4000969..758f2eb3d588 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -220,6 +220,7 @@ config DRM_GPUSVM config DRM_BUDDY tristate depends on DRM + select GPU_BUDDY help A page based buddy allocator diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 892859cfe95f..d0e37f8c2a46 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -114,7 +114,7 @@ drm_gpusvm_helper-$(CONFIG_ZONE_DEVICE) += \ obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o -obj-$(CONFIG_DRM_BUDDY) += ../buddy.o +obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o drm_dma_helper-y := drm_gem_dma_helper.o drm_dma_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_dma.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f582113d78b7..149f8f942eae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -5663,7 +5663,7 @@ int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_vram_mgr_resource *vres; struct ras_critical_region *region; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; int ret = 0; if (!bo || !bo->tbo.resource) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index be2e56ce1355..8908d9e08a30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -55,7 +55,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, uint64_t start, uint64_t size, struct amdgpu_res_cursor *cur) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; struct list_head *head, *next; struct drm_mm_node *node; @@ -71,7 +71,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, head = &to_amdgpu_vram_mgr_resource(res)->blocks; block = list_first_entry_or_null(head, - struct drm_buddy_block, + struct gpu_buddy_block, link); if (!block) goto fallback; @@ -81,7 +81,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, next = block->link.next; if (next != head) - block = list_entry(next, struct drm_buddy_block, link); + block = list_entry(next, struct gpu_buddy_block, link); } cur->start = amdgpu_vram_mgr_block_start(block) + start; @@ -125,7 +125,7 @@ fallback: */ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; struct drm_mm_node *node; struct list_head *next; @@ -146,7 +146,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) block = cur->node; next = block->link.next; - block = list_entry(next, struct drm_buddy_block, link); + block = list_entry(next, struct gpu_buddy_block, link); cur->node = block; cur->start = amdgpu_vram_mgr_block_start(block); @@ -175,7 +175,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) */ static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; switch (cur->mem_type) { case TTM_PL_VRAM: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 9d934c07fa6b..cd94f6efb7cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_vm.h" @@ -52,15 +53,15 @@ to_amdgpu_device(struct amdgpu_vram_mgr *mgr) return container_of(mgr, struct amdgpu_device, mman.vram_mgr); } -static inline struct drm_buddy_block * +static inline struct gpu_buddy_block * amdgpu_vram_mgr_first_block(struct list_head *list) { - return list_first_entry_or_null(list, struct drm_buddy_block, link); + return list_first_entry_or_null(list, struct gpu_buddy_block, link); } static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; u64 start, size; block = amdgpu_vram_mgr_first_block(head); @@ -71,7 +72,7 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) start = amdgpu_vram_mgr_block_start(block); size = amdgpu_vram_mgr_block_size(block); - block = list_entry(block->link.next, struct drm_buddy_block, link); + block = list_entry(block->link.next, struct gpu_buddy_block, link); if (start + size != amdgpu_vram_mgr_block_start(block)) return false; } @@ -81,7 +82,7 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; u64 size = 0; list_for_each_entry(block, head, link) @@ -254,7 +255,7 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = { * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM */ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, - struct drm_buddy_block *block) + struct gpu_buddy_block *block) { u64 start = amdgpu_vram_mgr_block_start(block); u64 end = start + amdgpu_vram_mgr_block_size(block); @@ -279,7 +280,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_resource *res = bo->tbo.resource; struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); - struct drm_buddy_block *block; + struct gpu_buddy_block *block; u64 usage = 0; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) @@ -299,15 +300,15 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_buddy *mm = &mgr->mm; + struct gpu_buddy *mm = &mgr->mm; struct amdgpu_vram_reservation *rsv, *temp; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; uint64_t vis_usage; list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) { - if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size, + if (gpu_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size, rsv->size, mm->chunk_size, &rsv->allocated, - DRM_BUDDY_RANGE_ALLOCATION)) + GPU_BUDDY_RANGE_ALLOCATION)) continue; block = amdgpu_vram_mgr_first_block(&rsv->allocated); @@ -403,7 +404,7 @@ int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr, uint64_t address, struct amdgpu_vram_block_info *info) { struct amdgpu_vram_mgr_resource *vres; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; u64 start, size; int ret = -ENOENT; @@ -450,8 +451,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; unsigned int adjust_dcc_size = 0; - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; + struct gpu_buddy *mm = &mgr->mm; + struct gpu_buddy_block *block; unsigned long pages_per_block; int r; @@ -493,17 +494,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, INIT_LIST_HEAD(&vres->blocks); if (place->flags & TTM_PL_FLAG_TOPDOWN) - vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + vres->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION; if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) - vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + vres->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION; if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) - vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; + vres->flags |= GPU_BUDDY_CLEAR_ALLOCATION; if (fpfn || lpfn != mgr->mm.size) /* Allocate blocks in desired range */ - vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + vres->flags |= GPU_BUDDY_RANGE_ALLOCATION; if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && adev->gmc.gmc_funcs->get_dcc_alignment) @@ -516,7 +517,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); remaining_size = (u64)dcc_size; - vres->flags |= DRM_BUDDY_TRIM_DISABLE; + vres->flags |= GPU_BUDDY_TRIM_DISABLE; } mutex_lock(&mgr->lock); @@ -536,7 +537,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, BUG_ON(min_block_size < mm->chunk_size); - r = drm_buddy_alloc_blocks(mm, fpfn, + r = gpu_buddy_alloc_blocks(mm, fpfn, lpfn, size, min_block_size, @@ -545,7 +546,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul && !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) { - vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION; + vres->flags &= ~GPU_BUDDY_CONTIGUOUS_ALLOCATION; pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT), tbo->page_alignment); @@ -566,7 +567,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, list_add_tail(&vres->vres_node, &mgr->allocated_vres_list); if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { - struct drm_buddy_block *dcc_block; + struct gpu_buddy_block *dcc_block; unsigned long dcc_start; u64 trim_start; @@ -576,7 +577,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), adjust_dcc_size); trim_start = (u64)dcc_start; - drm_buddy_block_trim(mm, &trim_start, + gpu_buddy_block_trim(mm, &trim_start, (u64)vres->base.size, &vres->blocks); } @@ -614,7 +615,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, return 0; error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks, 0); + gpu_buddy_free_list(mm, &vres->blocks, 0); mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -637,8 +638,8 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; + struct gpu_buddy *mm = &mgr->mm; + struct gpu_buddy_block *block; uint64_t vis_usage = 0; mutex_lock(&mgr->lock); @@ -649,7 +650,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, list_for_each_entry(block, &vres->blocks, link) vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - drm_buddy_free_list(mm, &vres->blocks, vres->flags); + gpu_buddy_free_list(mm, &vres->blocks, vres->flags); amdgpu_vram_mgr_do_reserve(man); mutex_unlock(&mgr->lock); @@ -688,7 +689,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, if (!*sgt) return -ENOMEM; - /* Determine the number of DRM_BUDDY blocks to export */ + /* Determine the number of GPU_BUDDY blocks to export */ amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; @@ -704,10 +705,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg->length = 0; /* - * Walk down DRM_BUDDY blocks to populate scatterlist nodes - * @note: Use iterator api to get first the DRM_BUDDY block + * Walk down GPU_BUDDY blocks to populate scatterlist nodes + * @note: Use iterator api to get first the GPU_BUDDY block * and the number of bytes from it. Access the following - * DRM_BUDDY block(s) if more buffer needs to exported + * GPU_BUDDY block(s) if more buffer needs to exported */ amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { @@ -792,10 +793,10 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr) void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev) { struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; - struct drm_buddy *mm = &mgr->mm; + struct gpu_buddy *mm = &mgr->mm; mutex_lock(&mgr->lock); - drm_buddy_reset_clear(mm, false); + gpu_buddy_reset_clear(mm, false); mutex_unlock(&mgr->lock); } @@ -815,7 +816,7 @@ static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man, size_t size) { struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res); - struct drm_buddy_block *block; + struct gpu_buddy_block *block; /* Check each drm buddy block individually */ list_for_each_entry(block, &mgr->blocks, link) { @@ -848,7 +849,7 @@ static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man, size_t size) { struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res); - struct drm_buddy_block *block; + struct gpu_buddy_block *block; /* Check each drm buddy block individually */ list_for_each_entry(block, &mgr->blocks, link) { @@ -877,7 +878,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); - struct drm_buddy *mm = &mgr->mm; + struct gpu_buddy *mm = &mgr->mm; struct amdgpu_vram_reservation *rsv; drm_printf(printer, " vis usage:%llu\n", @@ -930,7 +931,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) mgr->default_page_size = PAGE_SIZE; man->func = &amdgpu_vram_mgr_func; - err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + err = gpu_buddy_init(&mgr->mm, man->size, PAGE_SIZE); if (err) return err; @@ -965,11 +966,11 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0); + gpu_buddy_free_list(&mgr->mm, &rsv->allocated, 0); kfree(rsv); } if (!adev->gmc.is_app_apu) - drm_buddy_fini(&mgr->mm); + gpu_buddy_fini(&mgr->mm); mutex_unlock(&mgr->lock); ttm_resource_manager_cleanup(man); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 874779618056..429a21a2e9b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -28,7 +28,7 @@ struct amdgpu_vram_mgr { struct ttm_resource_manager manager; - struct drm_buddy mm; + struct gpu_buddy mm; /* protects access to buffer objects */ struct mutex lock; struct list_head reservations_pending; @@ -57,19 +57,19 @@ struct amdgpu_vram_mgr_resource { struct amdgpu_vres_task task; }; -static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) +static inline u64 amdgpu_vram_mgr_block_start(struct gpu_buddy_block *block) { - return drm_buddy_block_offset(block); + return gpu_buddy_block_offset(block); } -static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) +static inline u64 amdgpu_vram_mgr_block_size(struct gpu_buddy_block *block) { - return (u64)PAGE_SIZE << drm_buddy_block_order(block); + return (u64)PAGE_SIZE << gpu_buddy_block_order(block); } -static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) +static inline bool amdgpu_vram_mgr_is_cleared(struct gpu_buddy_block *block) { - return drm_buddy_block_is_clear(block); + return gpu_buddy_block_is_clear(block); } static inline struct amdgpu_vram_mgr_resource * @@ -82,8 +82,8 @@ static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) { struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res); - WARN_ON(ares->flags & DRM_BUDDY_CLEARED); - ares->flags |= DRM_BUDDY_CLEARED; + WARN_ON(ares->flags & GPU_BUDDY_CLEARED); + ares->flags |= GPU_BUDDY_CLEARED; } int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr, diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c new file mode 100644 index 000000000000..841f3de5f307 --- /dev/null +++ b/drivers/gpu/drm/drm_buddy.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include + +/** + * drm_buddy_block_print - print block information + * + * @mm: DRM buddy manager + * @block: DRM buddy block + * @p: DRM printer to use + */ +void drm_buddy_block_print(struct gpu_buddy *mm, + struct gpu_buddy_block *block, + struct drm_printer *p) +{ + u64 start = gpu_buddy_block_offset(block); + u64 size = gpu_buddy_block_size(mm, block); + + drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size); +} +EXPORT_SYMBOL(drm_buddy_block_print); + +/** + * drm_buddy_print - print allocator state + * + * @mm: DRM buddy manager + * @p: DRM printer to use + */ +void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p) +{ + int order; + + drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", + mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); + + for (order = mm->max_order; order >= 0; order--) { + struct gpu_buddy_block *block, *tmp; + struct rb_root *root; + u64 count = 0, free; + unsigned int tree; + + for_each_free_tree(tree) { + root = &mm->free_trees[tree][order]; + + rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { + BUG_ON(!gpu_buddy_block_is_free(block)); + count++; + } + } + + drm_printf(p, "order-%2d ", order); + + free = count * (mm->chunk_size << order); + if (free < SZ_1M) + drm_printf(p, "free: %8llu KiB", free >> 10); + else + drm_printf(p, "free: %8llu MiB", free >> 20); + + drm_printf(p, ", blocks: %llu\n", count); + } +} +EXPORT_SYMBOL(drm_buddy_print); + +MODULE_DESCRIPTION("DRM-specific GPU Buddy Allocator Print Helpers"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 30246f02bcfe..6a34dae13769 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -167,9 +167,9 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->size; const u32 max_segment = round_down(UINT_MAX, page_alignment); - struct drm_buddy *mm = bman_res->mm; + struct gpu_buddy *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; struct i915_refct_sgt *rsgt; struct scatterlist *sg; struct sg_table *st; @@ -202,8 +202,8 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, list_for_each_entry(block, blocks, link) { u64 block_size, offset; - block_size = min_t(u64, size, drm_buddy_block_size(mm, block)); - offset = drm_buddy_block_offset(block); + block_size = min_t(u64, size, gpu_buddy_block_size(mm, block)); + offset = gpu_buddy_block_offset(block); while (block_size) { u64 len; diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 6b256d95badd..c5ca90088705 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -16,7 +17,7 @@ struct i915_ttm_buddy_manager { struct ttm_resource_manager manager; - struct drm_buddy mm; + struct gpu_buddy mm; struct list_head reserved; struct mutex lock; unsigned long visible_size; @@ -38,7 +39,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, { struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); struct i915_ttm_buddy_resource *bman_res; - struct drm_buddy *mm = &bman->mm; + struct gpu_buddy *mm = &bman->mm; unsigned long n_pages, lpfn; u64 min_page_size; u64 size; @@ -57,13 +58,13 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, bman_res->mm = mm; if (place->flags & TTM_PL_FLAG_TOPDOWN) - bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + bman_res->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) - bman_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + bman_res->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION; if (place->fpfn || lpfn != man->size) - bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION; + bman_res->flags |= GPU_BUDDY_RANGE_ALLOCATION; GEM_BUG_ON(!bman_res->base.size); size = bman_res->base.size; @@ -89,7 +90,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, goto err_free_res; } - err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, + err = gpu_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, (u64)lpfn << PAGE_SHIFT, (u64)n_pages << PAGE_SHIFT, min_page_size, @@ -101,15 +102,15 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, if (lpfn <= bman->visible_size) { bman_res->used_visible_size = PFN_UP(bman_res->base.size); } else { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; list_for_each_entry(block, &bman_res->blocks, link) { unsigned long start = - drm_buddy_block_offset(block) >> PAGE_SHIFT; + gpu_buddy_block_offset(block) >> PAGE_SHIFT; if (start < bman->visible_size) { unsigned long end = start + - (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT); bman_res->used_visible_size += min(end, bman->visible_size) - start; @@ -126,7 +127,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, return 0; err_free_blocks: - drm_buddy_free_list(mm, &bman_res->blocks, 0); + gpu_buddy_free_list(mm, &bman_res->blocks, 0); mutex_unlock(&bman->lock); err_free_res: ttm_resource_fini(man, &bman_res->base); @@ -141,7 +142,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); mutex_lock(&bman->lock); - drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0); + gpu_buddy_free_list(&bman->mm, &bman_res->blocks, 0); bman->visible_avail += bman_res->used_visible_size; mutex_unlock(&bman->lock); @@ -156,8 +157,8 @@ static bool i915_ttm_buddy_man_intersects(struct ttm_resource_manager *man, { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); - struct drm_buddy *mm = &bman->mm; - struct drm_buddy_block *block; + struct gpu_buddy *mm = &bman->mm; + struct gpu_buddy_block *block; if (!place->fpfn && !place->lpfn) return true; @@ -176,9 +177,9 @@ static bool i915_ttm_buddy_man_intersects(struct ttm_resource_manager *man, /* Check each drm buddy block individually */ list_for_each_entry(block, &bman_res->blocks, link) { unsigned long fpfn = - drm_buddy_block_offset(block) >> PAGE_SHIFT; + gpu_buddy_block_offset(block) >> PAGE_SHIFT; unsigned long lpfn = fpfn + - (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT); if (place->fpfn < lpfn && place->lpfn > fpfn) return true; @@ -194,8 +195,8 @@ static bool i915_ttm_buddy_man_compatible(struct ttm_resource_manager *man, { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); - struct drm_buddy *mm = &bman->mm; - struct drm_buddy_block *block; + struct gpu_buddy *mm = &bman->mm; + struct gpu_buddy_block *block; if (!place->fpfn && !place->lpfn) return true; @@ -209,9 +210,9 @@ static bool i915_ttm_buddy_man_compatible(struct ttm_resource_manager *man, /* Check each drm buddy block individually */ list_for_each_entry(block, &bman_res->blocks, link) { unsigned long fpfn = - drm_buddy_block_offset(block) >> PAGE_SHIFT; + gpu_buddy_block_offset(block) >> PAGE_SHIFT; unsigned long lpfn = fpfn + - (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT); if (fpfn < place->fpfn || lpfn > place->lpfn) return false; @@ -224,7 +225,7 @@ static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); - struct drm_buddy_block *block; + struct gpu_buddy_block *block; mutex_lock(&bman->lock); drm_printf(printer, "default_page_size: %lluKiB\n", @@ -293,7 +294,7 @@ int i915_ttm_buddy_man_init(struct ttm_device *bdev, if (!bman) return -ENOMEM; - err = drm_buddy_init(&bman->mm, size, chunk_size); + err = gpu_buddy_init(&bman->mm, size, chunk_size); if (err) goto err_free_bman; @@ -333,7 +334,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type) { struct ttm_resource_manager *man = ttm_manager_type(bdev, type); struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); - struct drm_buddy *mm = &bman->mm; + struct gpu_buddy *mm = &bman->mm; int ret; ttm_resource_manager_set_used(man, false); @@ -345,8 +346,8 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type) ttm_set_driver_manager(bdev, type, NULL); mutex_lock(&bman->lock); - drm_buddy_free_list(mm, &bman->reserved, 0); - drm_buddy_fini(mm); + gpu_buddy_free_list(mm, &bman->reserved, 0); + gpu_buddy_fini(mm); bman->visible_avail += bman->visible_reserved; WARN_ON_ONCE(bman->visible_avail != bman->visible_size); mutex_unlock(&bman->lock); @@ -371,15 +372,15 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, u64 start, u64 size) { struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); - struct drm_buddy *mm = &bman->mm; + struct gpu_buddy *mm = &bman->mm; unsigned long fpfn = start >> PAGE_SHIFT; unsigned long flags = 0; int ret; - flags |= DRM_BUDDY_RANGE_ALLOCATION; + flags |= GPU_BUDDY_RANGE_ALLOCATION; mutex_lock(&bman->lock); - ret = drm_buddy_alloc_blocks(mm, start, + ret = gpu_buddy_alloc_blocks(mm, start, start + size, size, mm->chunk_size, &bman->reserved, diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h index d64620712830..1cff018c1689 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -13,7 +13,7 @@ struct ttm_device; struct ttm_resource_manager; -struct drm_buddy; +struct gpu_buddy; /** * struct i915_ttm_buddy_resource @@ -33,7 +33,7 @@ struct i915_ttm_buddy_resource { struct list_head blocks; unsigned long flags; unsigned long used_visible_size; - struct drm_buddy *mm; + struct gpu_buddy *mm; }; /** diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 7b856b5090f9..8307390943a2 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include "../i915_selftest.h" @@ -371,7 +371,7 @@ static int igt_mock_splintered_region(void *arg) struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; struct drm_i915_gem_object *obj; - struct drm_buddy *mm; + struct gpu_buddy *mm; unsigned int expected_order; LIST_HEAD(objects); u64 size; @@ -447,8 +447,8 @@ static int igt_mock_max_segment(void *arg) struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; struct drm_i915_gem_object *obj; - struct drm_buddy_block *block; - struct drm_buddy *mm; + struct gpu_buddy_block *block; + struct gpu_buddy *mm; struct list_head *blocks; struct scatterlist *sg; I915_RND_STATE(prng); @@ -487,8 +487,8 @@ static int igt_mock_max_segment(void *arg) mm = res->mm; size = 0; list_for_each_entry(block, blocks, link) { - if (drm_buddy_block_size(mm, block) > size) - size = drm_buddy_block_size(mm, block); + if (gpu_buddy_block_size(mm, block) > size) + size = gpu_buddy_block_size(mm, block); } if (size < max_segment) { pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", @@ -527,14 +527,14 @@ static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj) struct intel_memory_region *mr = obj->mm.region; struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(obj->mm.res); - struct drm_buddy *mm = bman_res->mm; - struct drm_buddy_block *block; + struct gpu_buddy *mm = bman_res->mm; + struct gpu_buddy_block *block; u64 total; total = 0; list_for_each_entry(block, &bman_res->blocks, link) { - u64 start = drm_buddy_block_offset(block); - u64 end = start + drm_buddy_block_size(mm, block); + u64 start = gpu_buddy_block_offset(block); + u64 end = start + gpu_buddy_block_size(mm, block); if (start < resource_size(&mr->io)) total += min_t(u64, end, resource_size(&mr->io)) - start; diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 6d95447a989d..e32f3c8d7b84 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -251,7 +251,7 @@ static void ttm_bo_validate_basic(struct kunit *test) NULL, &dummy_ttm_bo_destroy); KUNIT_EXPECT_EQ(test, err, 0); - snd_place = ttm_place_kunit_init(test, snd_mem, DRM_BUDDY_TOPDOWN_ALLOCATION); + snd_place = ttm_place_kunit_init(test, snd_mem, GPU_BUDDY_TOPDOWN_ALLOCATION); snd_placement = ttm_placement_kunit_init(test, snd_place, 1); err = ttm_bo_validate(bo, snd_placement, &ctx_val); @@ -263,7 +263,7 @@ static void ttm_bo_validate_basic(struct kunit *test) KUNIT_EXPECT_TRUE(test, ttm_tt_is_populated(bo->ttm)); KUNIT_EXPECT_EQ(test, bo->resource->mem_type, snd_mem); KUNIT_EXPECT_EQ(test, bo->resource->placement, - DRM_BUDDY_TOPDOWN_ALLOCATION); + GPU_BUDDY_TOPDOWN_ALLOCATION); ttm_bo_fini(bo); ttm_mock_manager_fini(priv->ttm_dev, snd_mem); diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c index dd395229e388..294d56d9067e 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c +++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c @@ -31,7 +31,7 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man, { struct ttm_mock_manager *manager = to_mock_mgr(man); struct ttm_mock_resource *mock_res; - struct drm_buddy *mm = &manager->mm; + struct gpu_buddy *mm = &manager->mm; u64 lpfn, fpfn, alloc_size; int err; @@ -47,14 +47,14 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man, INIT_LIST_HEAD(&mock_res->blocks); if (place->flags & TTM_PL_FLAG_TOPDOWN) - mock_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + mock_res->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) - mock_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + mock_res->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION; alloc_size = (uint64_t)mock_res->base.size; mutex_lock(&manager->lock); - err = drm_buddy_alloc_blocks(mm, fpfn, lpfn, alloc_size, + err = gpu_buddy_alloc_blocks(mm, fpfn, lpfn, alloc_size, manager->default_page_size, &mock_res->blocks, mock_res->flags); @@ -67,7 +67,7 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man, return 0; error_free_blocks: - drm_buddy_free_list(mm, &mock_res->blocks, 0); + gpu_buddy_free_list(mm, &mock_res->blocks, 0); ttm_resource_fini(man, &mock_res->base); mutex_unlock(&manager->lock); @@ -79,10 +79,10 @@ static void ttm_mock_manager_free(struct ttm_resource_manager *man, { struct ttm_mock_manager *manager = to_mock_mgr(man); struct ttm_mock_resource *mock_res = to_mock_mgr_resource(res); - struct drm_buddy *mm = &manager->mm; + struct gpu_buddy *mm = &manager->mm; mutex_lock(&manager->lock); - drm_buddy_free_list(mm, &mock_res->blocks, 0); + gpu_buddy_free_list(mm, &mock_res->blocks, 0); mutex_unlock(&manager->lock); ttm_resource_fini(man, res); @@ -106,7 +106,7 @@ int ttm_mock_manager_init(struct ttm_device *bdev, u32 mem_type, u32 size) mutex_init(&manager->lock); - err = drm_buddy_init(&manager->mm, size, PAGE_SIZE); + err = gpu_buddy_init(&manager->mm, size, PAGE_SIZE); if (err) { kfree(manager); @@ -142,7 +142,7 @@ void ttm_mock_manager_fini(struct ttm_device *bdev, u32 mem_type) ttm_resource_manager_set_used(man, false); mutex_lock(&mock_man->lock); - drm_buddy_fini(&mock_man->mm); + gpu_buddy_fini(&mock_man->mm); mutex_unlock(&mock_man->lock); ttm_set_driver_manager(bdev, mem_type, NULL); diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h index 96ea8c9aae34..08710756fd8e 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h +++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h @@ -9,7 +9,7 @@ struct ttm_mock_manager { struct ttm_resource_manager man; - struct drm_buddy mm; + struct gpu_buddy mm; u64 default_page_size; /* protects allocations of mock buffer objects */ struct mutex lock; diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 4e00008b7081..5f4ab08c0686 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -58,7 +58,7 @@ struct xe_res_cursor { /** @dma_addr: Current element in a struct drm_pagemap_addr array */ const struct drm_pagemap_addr *dma_addr; /** @mm: Buddy allocator for VRAM cursor */ - struct drm_buddy *mm; + struct gpu_buddy *mm; /** * @dma_start: DMA start address for the current segment. * This may be different to @dma_addr.addr since elements in @@ -69,7 +69,7 @@ struct xe_res_cursor { u64 dma_seg_size; }; -static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res) +static struct gpu_buddy *xe_res_get_buddy(struct ttm_resource *res) { struct ttm_resource_manager *mgr; @@ -104,30 +104,30 @@ static inline void xe_res_first(struct ttm_resource *res, case XE_PL_STOLEN: case XE_PL_VRAM0: case XE_PL_VRAM1: { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; struct list_head *head, *next; - struct drm_buddy *mm = xe_res_get_buddy(res); + struct gpu_buddy *mm = xe_res_get_buddy(res); head = &to_xe_ttm_vram_mgr_resource(res)->blocks; block = list_first_entry_or_null(head, - struct drm_buddy_block, + struct gpu_buddy_block, link); if (!block) goto fallback; - while (start >= drm_buddy_block_size(mm, block)) { - start -= drm_buddy_block_size(mm, block); + while (start >= gpu_buddy_block_size(mm, block)) { + start -= gpu_buddy_block_size(mm, block); next = block->link.next; if (next != head) - block = list_entry(next, struct drm_buddy_block, + block = list_entry(next, struct gpu_buddy_block, link); } cur->mm = mm; - cur->start = drm_buddy_block_offset(block) + start; - cur->size = min(drm_buddy_block_size(mm, block) - start, + cur->start = gpu_buddy_block_offset(block) + start; + cur->size = min(gpu_buddy_block_size(mm, block) - start, size); cur->remaining = size; cur->node = block; @@ -259,7 +259,7 @@ static inline void xe_res_first_dma(const struct drm_pagemap_addr *dma_addr, */ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; struct list_head *next; u64 start; @@ -295,18 +295,18 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) block = cur->node; next = block->link.next; - block = list_entry(next, struct drm_buddy_block, link); + block = list_entry(next, struct gpu_buddy_block, link); - while (start >= drm_buddy_block_size(cur->mm, block)) { - start -= drm_buddy_block_size(cur->mm, block); + while (start >= gpu_buddy_block_size(cur->mm, block)) { + start -= gpu_buddy_block_size(cur->mm, block); next = block->link.next; - block = list_entry(next, struct drm_buddy_block, link); + block = list_entry(next, struct gpu_buddy_block, link); } - cur->start = drm_buddy_block_offset(block) + start; - cur->size = min(drm_buddy_block_size(cur->mm, block) - start, + cur->start = gpu_buddy_block_offset(block) + start; + cur->size = min(gpu_buddy_block_size(cur->mm, block) - start, cur->remaining); cur->node = block; break; diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 213f0334518a..cda3bf7e2418 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -747,7 +747,7 @@ static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset) return PHYS_PFN(offset + xpagemap->hpa_base); } -static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram) +static struct gpu_buddy *vram_to_buddy(struct xe_vram_region *vram) { return &vram->ttm.mm; } @@ -758,17 +758,17 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati struct xe_bo *bo = to_xe_bo(devmem_allocation); struct ttm_resource *res = bo->ttm.resource; struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; int j = 0; list_for_each_entry(block, blocks, link) { struct xe_vram_region *vr = block->private; - struct drm_buddy *buddy = vram_to_buddy(vr); + struct gpu_buddy *buddy = vram_to_buddy(vr); u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap, - drm_buddy_block_offset(block)); + gpu_buddy_block_offset(block)); int i; - for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i) + for (i = 0; i < gpu_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i) pfn[j++] = block_pfn + i; } @@ -1033,7 +1033,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, struct dma_fence *pre_migrate_fence = NULL; struct xe_device *xe = vr->xe; struct device *dev = xe->drm.dev; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; struct xe_validation_ctx vctx; struct list_head *blocks; struct drm_exec exec; diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 6553a19f7cf2..d119217d566a 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -16,16 +17,16 @@ #include "xe_ttm_vram_mgr.h" #include "xe_vram_types.h" -static inline struct drm_buddy_block * +static inline struct gpu_buddy_block * xe_ttm_vram_mgr_first_block(struct list_head *list) { - return list_first_entry_or_null(list, struct drm_buddy_block, link); + return list_first_entry_or_null(list, struct gpu_buddy_block, link); } -static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm, +static inline bool xe_is_vram_mgr_blocks_contiguous(struct gpu_buddy *mm, struct list_head *head) { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; u64 start, size; block = xe_ttm_vram_mgr_first_block(head); @@ -33,12 +34,12 @@ static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm, return false; while (head != block->link.next) { - start = drm_buddy_block_offset(block); - size = drm_buddy_block_size(mm, block); + start = gpu_buddy_block_offset(block); + size = gpu_buddy_block_size(mm, block); - block = list_entry(block->link.next, struct drm_buddy_block, + block = list_entry(block->link.next, struct gpu_buddy_block, link); - if (start + size != drm_buddy_block_offset(block)) + if (start + size != gpu_buddy_block_offset(block)) return false; } @@ -52,7 +53,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, { struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct xe_ttm_vram_mgr_resource *vres; - struct drm_buddy *mm = &mgr->mm; + struct gpu_buddy *mm = &mgr->mm; u64 size, min_page_size; unsigned long lpfn; int err; @@ -79,10 +80,10 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, INIT_LIST_HEAD(&vres->blocks); if (place->flags & TTM_PL_FLAG_TOPDOWN) - vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + vres->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION; if (place->fpfn || lpfn != man->size >> PAGE_SHIFT) - vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + vres->flags |= GPU_BUDDY_RANGE_ALLOCATION; if (WARN_ON(!vres->base.size)) { err = -EINVAL; @@ -118,27 +119,27 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn); } - err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, + err = gpu_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, (u64)lpfn << PAGE_SHIFT, size, min_page_size, &vres->blocks, vres->flags); if (err) goto error_unlock; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) + if (!gpu_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } if (lpfn <= mgr->visible_size >> PAGE_SHIFT) { vres->used_visible_size = size; } else { - struct drm_buddy_block *block; + struct gpu_buddy_block *block; list_for_each_entry(block, &vres->blocks, link) { - u64 start = drm_buddy_block_offset(block); + u64 start = gpu_buddy_block_offset(block); if (start < mgr->visible_size) { - u64 end = start + drm_buddy_block_size(mm, block); + u64 end = start + gpu_buddy_block_size(mm, block); vres->used_visible_size += min(end, mgr->visible_size) - start; @@ -158,11 +159,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, * the object. */ if (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) { - struct drm_buddy_block *block = list_first_entry(&vres->blocks, + struct gpu_buddy_block *block = list_first_entry(&vres->blocks, typeof(*block), link); - vres->base.start = drm_buddy_block_offset(block) >> PAGE_SHIFT; + vres->base.start = gpu_buddy_block_offset(block) >> PAGE_SHIFT; } else { vres->base.start = XE_BO_INVALID_OFFSET; } @@ -184,10 +185,10 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res); struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); - struct drm_buddy *mm = &mgr->mm; + struct gpu_buddy *mm = &mgr->mm; mutex_lock(&mgr->lock); - drm_buddy_free_list(mm, &vres->blocks, 0); + gpu_buddy_free_list(mm, &vres->blocks, 0); mgr->visible_avail += vres->used_visible_size; mutex_unlock(&mgr->lock); @@ -200,7 +201,7 @@ static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); - struct drm_buddy *mm = &mgr->mm; + struct gpu_buddy *mm = &mgr->mm; mutex_lock(&mgr->lock); drm_printf(printer, "default_page_size: %lluKiB\n", @@ -223,8 +224,8 @@ static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man, struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; + struct gpu_buddy *mm = &mgr->mm; + struct gpu_buddy_block *block; if (!place->fpfn && !place->lpfn) return true; @@ -234,9 +235,9 @@ static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man, list_for_each_entry(block, &vres->blocks, link) { unsigned long fpfn = - drm_buddy_block_offset(block) >> PAGE_SHIFT; + gpu_buddy_block_offset(block) >> PAGE_SHIFT; unsigned long lpfn = fpfn + - (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT); if (place->fpfn < lpfn && place->lpfn > fpfn) return true; @@ -253,8 +254,8 @@ static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man, struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; + struct gpu_buddy *mm = &mgr->mm; + struct gpu_buddy_block *block; if (!place->fpfn && !place->lpfn) return true; @@ -264,9 +265,9 @@ static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man, list_for_each_entry(block, &vres->blocks, link) { unsigned long fpfn = - drm_buddy_block_offset(block) >> PAGE_SHIFT; + gpu_buddy_block_offset(block) >> PAGE_SHIFT; unsigned long lpfn = fpfn + - (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT); if (fpfn < place->fpfn || lpfn > place->lpfn) return false; @@ -296,7 +297,7 @@ static void xe_ttm_vram_mgr_fini(struct drm_device *dev, void *arg) WARN_ON_ONCE(mgr->visible_avail != mgr->visible_size); - drm_buddy_fini(&mgr->mm); + gpu_buddy_fini(&mgr->mm); ttm_resource_manager_cleanup(&mgr->manager); @@ -327,7 +328,7 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, mgr->visible_avail = io_size; ttm_resource_manager_init(man, &xe->ttm, size); - err = drm_buddy_init(&mgr->mm, man->size, default_page_size); + err = gpu_buddy_init(&mgr->mm, man->size, default_page_size); if (err) return err; @@ -375,7 +376,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, if (!*sgt) return -ENOMEM; - /* Determine the number of DRM_BUDDY blocks to export */ + /* Determine the number of GPU_BUDDY blocks to export */ xe_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; @@ -392,10 +393,10 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, sg->length = 0; /* - * Walk down DRM_BUDDY blocks to populate scatterlist nodes - * @note: Use iterator api to get first the DRM_BUDDY block + * Walk down GPU_BUDDY blocks to populate scatterlist nodes + * @note: Use iterator api to get first the GPU_BUDDY block * and the number of bytes from it. Access the following - * DRM_BUDDY block(s) if more buffer needs to exported + * GPU_BUDDY block(s) if more buffer needs to exported */ xe_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index babeec5511d9..9106da056b49 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -18,7 +18,7 @@ struct xe_ttm_vram_mgr { /** @manager: Base TTM resource manager */ struct ttm_resource_manager manager; /** @mm: DRM buddy allocator which manages the VRAM */ - struct drm_buddy mm; + struct gpu_buddy mm; /** @visible_size: Proped size of the CPU visible portion */ u64 visible_size; /** @visible_avail: CPU visible portion still unallocated */ diff --git a/drivers/gpu/tests/Makefile b/drivers/gpu/tests/Makefile index 8e7654e87d82..4183e6e2de45 100644 --- a/drivers/gpu/tests/Makefile +++ b/drivers/gpu/tests/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 gpu_buddy_tests-y = gpu_buddy_test.o gpu_random.o -obj-$(CONFIG_DRM_KUNIT_TEST) += gpu_buddy_tests.o +obj-$(CONFIG_GPU_BUDDY_KUNIT_TEST) += gpu_buddy_tests.o diff --git a/drivers/gpu/tests/gpu_buddy_test.c b/drivers/gpu/tests/gpu_buddy_test.c index b905932da990..450e71deed90 100644 --- a/drivers/gpu/tests/gpu_buddy_test.c +++ b/drivers/gpu/tests/gpu_buddy_test.c @@ -21,9 +21,9 @@ static inline u64 get_size(int order, u64 chunk_size) return (1 << order) * chunk_size; } -static void drm_test_buddy_fragmentation_performance(struct kunit *test) +static void gpu_test_buddy_fragmentation_performance(struct kunit *test) { - struct drm_buddy_block *block, *tmp; + struct gpu_buddy_block *block, *tmp; int num_blocks, i, ret, count = 0; LIST_HEAD(allocated_blocks); unsigned long elapsed_ms; @@ -32,7 +32,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test) LIST_HEAD(clear_list); LIST_HEAD(dirty_list); LIST_HEAD(free_list); - struct drm_buddy mm; + struct gpu_buddy mm; u64 mm_size = SZ_4G; ktime_t start, end; @@ -47,7 +47,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test) * quickly the allocator can satisfy larger, aligned requests from a pool of * highly fragmented space. */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), "buddy_init failed\n"); num_blocks = mm_size / SZ_64K; @@ -55,7 +55,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test) start = ktime_get(); /* Allocate with maximum fragmentation - 8K blocks with 64K alignment */ for (i = 0; i < num_blocks; i++) - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, &allocated_blocks, 0), "buddy_alloc hit an error size=%u\n", SZ_8K); @@ -68,21 +68,21 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test) } /* Free with different flags to ensure no coalescing */ - drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED); - drm_buddy_free_list(&mm, &dirty_list, 0); + gpu_buddy_free_list(&mm, &clear_list, GPU_BUDDY_CLEARED); + gpu_buddy_free_list(&mm, &dirty_list, 0); for (i = 0; i < num_blocks; i++) - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K, &test_blocks, 0), "buddy_alloc hit an error size=%u\n", SZ_64K); - drm_buddy_free_list(&mm, &test_blocks, 0); + gpu_buddy_free_list(&mm, &test_blocks, 0); end = ktime_get(); elapsed_ms = ktime_to_ms(ktime_sub(end, start)); kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms); - drm_buddy_fini(&mm); + gpu_buddy_fini(&mm); /* * Reverse free order under fragmentation @@ -96,13 +96,13 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test) * deallocation occurs in the opposite order of allocation, exposing the * cost difference between a linear freelist scan and an ordered tree lookup. */ - ret = drm_buddy_init(&mm, mm_size, SZ_4K); + ret = gpu_buddy_init(&mm, mm_size, SZ_4K); KUNIT_ASSERT_EQ(test, ret, 0); start = ktime_get(); /* Allocate maximum fragmentation */ for (i = 0; i < num_blocks; i++) - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, &allocated_blocks, 0), "buddy_alloc hit an error size=%u\n", SZ_8K); @@ -111,28 +111,28 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test) list_move_tail(&block->link, &free_list); count++; } - drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED); + gpu_buddy_free_list(&mm, &free_list, GPU_BUDDY_CLEARED); list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link) list_move(&block->link, &reverse_list); - drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED); + gpu_buddy_free_list(&mm, &reverse_list, GPU_BUDDY_CLEARED); end = ktime_get(); elapsed_ms = ktime_to_ms(ktime_sub(end, start)); kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms); - drm_buddy_fini(&mm); + gpu_buddy_fini(&mm); } -static void drm_test_buddy_alloc_range_bias(struct kunit *test) +static void gpu_test_buddy_alloc_range_bias(struct kunit *test) { u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem; - DRM_RND_STATE(prng, random_seed); + GPU_RND_STATE(prng, random_seed); unsigned int i, count, *order; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; unsigned long flags; - struct drm_buddy mm; + struct gpu_buddy mm; LIST_HEAD(allocated); bias_size = SZ_1M; @@ -142,11 +142,11 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), "buddy_init failed\n"); count = mm_size / bias_size; - order = drm_random_order(count, &prng); + order = gpu_random_order(count, &prng); KUNIT_EXPECT_TRUE(test, order); /* @@ -166,79 +166,79 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) /* internal round_up too big */ KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, bias_size + ps, bias_size, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, bias_size, bias_size); /* size too big */ KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, bias_size + ps, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, bias_size + ps, ps); /* bias range too small for size */ KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start + ps, + gpu_buddy_alloc_blocks(&mm, bias_start + ps, bias_end, bias_size, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", bias_start + ps, bias_end, bias_size, ps); /* bias misaligned */ KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start + ps, + gpu_buddy_alloc_blocks(&mm, bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); /* single big page */ KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, bias_size, bias_size, &tmp, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, bias_size, bias_size); - drm_buddy_free_list(&mm, &tmp, 0); + gpu_buddy_free_list(&mm, &tmp, 0); /* single page with internal round_up */ KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, ps, bias_size, &tmp, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, ps, bias_size); - drm_buddy_free_list(&mm, &tmp, 0); + gpu_buddy_free_list(&mm, &tmp, 0); /* random size within */ size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); if (size) KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, size, ps, &tmp, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, size, ps); bias_rem -= size; /* too big for current avail */ KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, bias_rem + ps, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, bias_rem + ps, ps); @@ -248,10 +248,10 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) size = max(size, ps); KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, size, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, size, ps); /* @@ -259,15 +259,15 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) * unallocated, and ideally not always on the bias * boundaries. */ - drm_buddy_free_list(&mm, &tmp, 0); + gpu_buddy_free_list(&mm, &tmp, 0); } else { list_splice_tail(&tmp, &allocated); } } kfree(order); - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &allocated, 0); + gpu_buddy_fini(&mm); /* * Something more free-form. Idea is to pick a random starting bias @@ -278,7 +278,7 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) * allocated nodes in the middle of the address space. */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), "buddy_init failed\n"); bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); @@ -290,10 +290,10 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, size, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, size, ps); bias_rem -= size; @@ -319,24 +319,24 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) KUNIT_ASSERT_EQ(test, bias_start, 0); KUNIT_ASSERT_EQ(test, bias_end, mm_size); KUNIT_ASSERT_TRUE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, bias_end, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, ps, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc passed with bias(%x-%x), size=%u\n", bias_start, bias_end, ps); - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &allocated, 0); + gpu_buddy_fini(&mm); /* - * Allocate cleared blocks in the bias range when the DRM buddy's clear avail is + * Allocate cleared blocks in the bias range when the GPU buddy's clear avail is * zero. This will validate the bias range allocation in scenarios like system boot * when no cleared blocks are available and exercise the fallback path too. The resulting * blocks should always be dirty. */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), "buddy_init failed\n"); bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); @@ -344,11 +344,11 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) bias_end = max(bias_end, bias_start + ps); bias_rem = bias_end - bias_start; - flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION; + flags = GPU_BUDDY_CLEAR_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION; size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, bias_start, + gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, size, ps, &allocated, flags), @@ -356,27 +356,27 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) bias_start, bias_end, size, ps); list_for_each_entry(block, &allocated, link) - KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); + KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &allocated, 0); + gpu_buddy_fini(&mm); } -static void drm_test_buddy_alloc_clear(struct kunit *test) +static void gpu_test_buddy_alloc_clear(struct kunit *test) { unsigned long n_pages, total, i = 0; const unsigned long ps = SZ_4K; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; const int max_order = 12; LIST_HEAD(allocated); - struct drm_buddy mm; + struct gpu_buddy mm; unsigned int order; u32 mm_size, size; LIST_HEAD(dirty); LIST_HEAD(clean); mm_size = SZ_4K << max_order; - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); KUNIT_EXPECT_EQ(test, mm.max_order, max_order); @@ -389,11 +389,11 @@ static void drm_test_buddy_alloc_clear(struct kunit *test) * is indeed all dirty pages and vice versa. Free it all again, * keeping the dirty/clear status. */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 5 * ps, ps, &allocated, - DRM_BUDDY_TOPDOWN_ALLOCATION), + GPU_BUDDY_TOPDOWN_ALLOCATION), "buddy_alloc hit an error size=%lu\n", 5 * ps); - drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); + gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); n_pages = 10; do { @@ -406,37 +406,37 @@ static void drm_test_buddy_alloc_clear(struct kunit *test) flags = 0; } else { list = &clean; - flags = DRM_BUDDY_CLEAR_ALLOCATION; + flags = GPU_BUDDY_CLEAR_ALLOCATION; } - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, ps, ps, list, flags), "buddy_alloc hit an error size=%lu\n", ps); } while (++i < n_pages); list_for_each_entry(block, &clean, link) - KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true); + KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), true); list_for_each_entry(block, &dirty, link) - KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); + KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); - drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); + gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); /* * Trying to go over the clear limit for some allocation. * The allocation should never fail with reasonable page-size. */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 10 * ps, ps, &clean, - DRM_BUDDY_CLEAR_ALLOCATION), + GPU_BUDDY_CLEAR_ALLOCATION), "buddy_alloc hit an error size=%lu\n", 10 * ps); - drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); - drm_buddy_free_list(&mm, &dirty, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); + gpu_buddy_free_list(&mm, &dirty, 0); + gpu_buddy_fini(&mm); - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); /* * Create a new mm. Intentionally fragment the address space by creating @@ -458,34 +458,34 @@ static void drm_test_buddy_alloc_clear(struct kunit *test) else list = &clean; - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, ps, ps, list, 0), "buddy_alloc hit an error size=%lu\n", ps); } while (++i < n_pages); - drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED); - drm_buddy_free_list(&mm, &dirty, 0); + gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); + gpu_buddy_free_list(&mm, &dirty, 0); order = 1; do { size = SZ_4K << order; - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, size, &allocated, - DRM_BUDDY_CLEAR_ALLOCATION), + GPU_BUDDY_CLEAR_ALLOCATION), "buddy_alloc hit an error size=%u\n", size); total = 0; list_for_each_entry(block, &allocated, link) { if (size != mm_size) - KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false); - total += drm_buddy_block_size(&mm, block); + KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); + total += gpu_buddy_block_size(&mm, block); } KUNIT_EXPECT_EQ(test, total, size); - drm_buddy_free_list(&mm, &allocated, 0); + gpu_buddy_free_list(&mm, &allocated, 0); } while (++order <= max_order); - drm_buddy_fini(&mm); + gpu_buddy_fini(&mm); /* * Create a new mm with a non power-of-two size. Allocate a random size from each @@ -494,44 +494,44 @@ static void drm_test_buddy_alloc_clear(struct kunit *test) */ mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2)); - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); KUNIT_EXPECT_EQ(test, mm.max_order, max_order); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, 4 * ps, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc hit an error size=%lu\n", 4 * ps); - drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, + gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, 2 * ps, ps, &allocated, - DRM_BUDDY_CLEAR_ALLOCATION), + GPU_BUDDY_CLEAR_ALLOCATION), "buddy_alloc hit an error size=%lu\n", 2 * ps); - drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size, + gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size, ps, ps, &allocated, - DRM_BUDDY_RANGE_ALLOCATION), + GPU_BUDDY_RANGE_ALLOCATION), "buddy_alloc hit an error size=%lu\n", ps); - drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); + gpu_buddy_fini(&mm); } -static void drm_test_buddy_alloc_contiguous(struct kunit *test) +static void gpu_test_buddy_alloc_contiguous(struct kunit *test) { const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; unsigned long i, n_pages, total; - struct drm_buddy_block *block; - struct drm_buddy mm; + struct gpu_buddy_block *block; + struct gpu_buddy mm; LIST_HEAD(left); LIST_HEAD(middle); LIST_HEAD(right); LIST_HEAD(allocated); - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); /* * Idea is to fragment the address space by alternating block * allocations between three different lists; one for left, middle and * right. We can then free a list to simulate fragmentation. In - * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION, + * particular we want to exercise the GPU_BUDDY_CONTIGUOUS_ALLOCATION, * including the try_harder path. */ @@ -548,66 +548,66 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) else list = &right; KUNIT_ASSERT_FALSE_MSG(test, - drm_buddy_alloc_blocks(&mm, 0, mm_size, + gpu_buddy_alloc_blocks(&mm, 0, mm_size, ps, ps, list, 0), "buddy_alloc hit an error size=%lu\n", ps); } while (++i < n_pages); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), + GPU_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc didn't error size=%lu\n", 3 * ps); - drm_buddy_free_list(&mm, &middle, 0); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + gpu_buddy_free_list(&mm, &middle, 0); + KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), + GPU_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc didn't error size=%lu\n", 3 * ps); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 2 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), + GPU_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc didn't error size=%lu\n", 2 * ps); - drm_buddy_free_list(&mm, &right, 0); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + gpu_buddy_free_list(&mm, &right, 0); + KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), + GPU_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc didn't error size=%lu\n", 3 * ps); /* * At this point we should have enough contiguous space for 2 blocks, * however they are never buddies (since we freed middle and right) so * will require the try_harder logic to find them. */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 2 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), + GPU_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc hit an error size=%lu\n", 2 * ps); - drm_buddy_free_list(&mm, &left, 0); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + gpu_buddy_free_list(&mm, &left, 0); + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), + GPU_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc hit an error size=%lu\n", 3 * ps); total = 0; list_for_each_entry(block, &allocated, link) - total += drm_buddy_block_size(&mm, block); + total += gpu_buddy_block_size(&mm, block); KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &allocated, 0); + gpu_buddy_fini(&mm); } -static void drm_test_buddy_alloc_pathological(struct kunit *test) +static void gpu_test_buddy_alloc_pathological(struct kunit *test) { u64 mm_size, size, start = 0; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; const int max_order = 3; unsigned long flags = 0; int order, top; - struct drm_buddy mm; + struct gpu_buddy mm; LIST_HEAD(blocks); LIST_HEAD(holes); LIST_HEAD(tmp); @@ -620,7 +620,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test) */ mm_size = SZ_4K << max_order; - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), "buddy_init failed\n"); KUNIT_EXPECT_EQ(test, mm.max_order, max_order); @@ -630,18 +630,18 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test) block = list_first_entry_or_null(&blocks, typeof(*block), link); if (block) { list_del(&block->link); - drm_buddy_free_block(&mm, block); + gpu_buddy_free_block(&mm, block); } for (order = top; order--;) { size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc hit -ENOMEM with order=%d, top=%d\n", order, top); - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); list_move_tail(&block->link, &blocks); @@ -649,45 +649,45 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test) /* There should be one final page for this sub-allocation */ size = get_size(0, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc hit -ENOMEM for hole\n"); - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); list_move_tail(&block->link, &holes); size = get_size(top, mm.chunk_size); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", top, max_order); } - drm_buddy_free_list(&mm, &holes, 0); + gpu_buddy_free_list(&mm, &holes, 0); /* Nothing larger than blocks of chunk_size now available */ for (order = 1; order <= max_order; order++) { size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc unexpectedly succeeded at order %d, it should be full!", order); } list_splice_tail(&holes, &blocks); - drm_buddy_free_list(&mm, &blocks, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &blocks, 0); + gpu_buddy_fini(&mm); } -static void drm_test_buddy_alloc_pessimistic(struct kunit *test) +static void gpu_test_buddy_alloc_pessimistic(struct kunit *test) { u64 mm_size, size, start = 0; - struct drm_buddy_block *block, *bn; + struct gpu_buddy_block *block, *bn; const unsigned int max_order = 16; unsigned long flags = 0; - struct drm_buddy mm; + struct gpu_buddy mm; unsigned int order; LIST_HEAD(blocks); LIST_HEAD(tmp); @@ -699,19 +699,19 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test) */ mm_size = SZ_4K << max_order; - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), "buddy_init failed\n"); KUNIT_EXPECT_EQ(test, mm.max_order, max_order); for (order = 0; order < max_order; order++) { size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc hit -ENOMEM with order=%d\n", order); - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); list_move_tail(&block->link, &blocks); @@ -719,11 +719,11 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test) /* And now the last remaining block available */ size = get_size(0, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc hit -ENOMEM on final alloc\n"); - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); list_move_tail(&block->link, &blocks); @@ -731,58 +731,58 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test) /* Should be completely full! */ for (order = max_order; order--;) { size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc unexpectedly succeeded, it should be full!"); } block = list_last_entry(&blocks, typeof(*block), link); list_del(&block->link); - drm_buddy_free_block(&mm, block); + gpu_buddy_free_block(&mm, block); /* As we free in increasing size, we make available larger blocks */ order = 1; list_for_each_entry_safe(block, bn, &blocks, link) { list_del(&block->link); - drm_buddy_free_block(&mm, block); + gpu_buddy_free_block(&mm, block); size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc hit -ENOMEM with order=%d\n", order); - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); list_del(&block->link); - drm_buddy_free_block(&mm, block); + gpu_buddy_free_block(&mm, block); order++; } /* To confirm, now the whole mm should be available */ size = get_size(max_order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc (realloc) hit -ENOMEM with order=%d\n", max_order); - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); list_del(&block->link); - drm_buddy_free_block(&mm, block); - drm_buddy_free_list(&mm, &blocks, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_block(&mm, block); + gpu_buddy_free_list(&mm, &blocks, 0); + gpu_buddy_fini(&mm); } -static void drm_test_buddy_alloc_optimistic(struct kunit *test) +static void gpu_test_buddy_alloc_optimistic(struct kunit *test) { u64 mm_size, size, start = 0; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; unsigned long flags = 0; const int max_order = 16; - struct drm_buddy mm; + struct gpu_buddy mm; LIST_HEAD(blocks); LIST_HEAD(tmp); int order; @@ -794,19 +794,19 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test) mm_size = SZ_4K * ((1 << (max_order + 1)) - 1); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), "buddy_init failed\n"); KUNIT_EXPECT_EQ(test, mm.max_order, max_order); for (order = 0; order <= max_order; order++) { size = get_size(order, mm.chunk_size); - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc hit -ENOMEM with order=%d\n", order); - block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link); + block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); list_move_tail(&block->link, &blocks); @@ -814,115 +814,115 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test) /* Should be completely full! */ size = get_size(0, mm.chunk_size); - KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size, + KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, size, size, &tmp, flags), "buddy_alloc unexpectedly succeeded, it should be full!"); - drm_buddy_free_list(&mm, &blocks, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &blocks, 0); + gpu_buddy_fini(&mm); } -static void drm_test_buddy_alloc_limit(struct kunit *test) +static void gpu_test_buddy_alloc_limit(struct kunit *test) { u64 size = U64_MAX, start = 0; - struct drm_buddy_block *block; + struct gpu_buddy_block *block; unsigned long flags = 0; LIST_HEAD(allocated); - struct drm_buddy mm; + struct gpu_buddy mm; - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, size, SZ_4K)); + KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, size, SZ_4K)); - KUNIT_EXPECT_EQ_MSG(test, mm.max_order, DRM_BUDDY_MAX_ORDER, + KUNIT_EXPECT_EQ_MSG(test, mm.max_order, GPU_BUDDY_MAX_ORDER, "mm.max_order(%d) != %d\n", mm.max_order, - DRM_BUDDY_MAX_ORDER); + GPU_BUDDY_MAX_ORDER); size = mm.chunk_size << mm.max_order; - KUNIT_EXPECT_FALSE(test, drm_buddy_alloc_blocks(&mm, start, size, size, + KUNIT_EXPECT_FALSE(test, gpu_buddy_alloc_blocks(&mm, start, size, size, mm.chunk_size, &allocated, flags)); - block = list_first_entry_or_null(&allocated, struct drm_buddy_block, link); + block = list_first_entry_or_null(&allocated, struct gpu_buddy_block, link); KUNIT_EXPECT_TRUE(test, block); - KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), mm.max_order, + KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_order(block), mm.max_order, "block order(%d) != %d\n", - drm_buddy_block_order(block), mm.max_order); + gpu_buddy_block_order(block), mm.max_order); - KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_size(&mm, block), + KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_size(&mm, block), BIT_ULL(mm.max_order) * mm.chunk_size, "block size(%llu) != %llu\n", - drm_buddy_block_size(&mm, block), + gpu_buddy_block_size(&mm, block), BIT_ULL(mm.max_order) * mm.chunk_size); - drm_buddy_free_list(&mm, &allocated, 0); - drm_buddy_fini(&mm); + gpu_buddy_free_list(&mm, &allocated, 0); + gpu_buddy_fini(&mm); } -static void drm_test_buddy_alloc_exceeds_max_order(struct kunit *test) +static void gpu_test_buddy_alloc_exceeds_max_order(struct kunit *test) { u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G; - struct drm_buddy mm; + struct gpu_buddy mm; LIST_HEAD(blocks); int err; - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K), + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), "buddy_init failed\n"); /* CONTIGUOUS allocation should succeed via try_harder fallback */ - KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, size, + KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks, - DRM_BUDDY_CONTIGUOUS_ALLOCATION), + GPU_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc hit an error size=%llu\n", size); - drm_buddy_free_list(&mm, &blocks, 0); + gpu_buddy_free_list(&mm, &blocks, 0); /* Non-CONTIGUOUS with large min_block_size should return -EINVAL */ - err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0); + err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0); KUNIT_EXPECT_EQ(test, err, -EINVAL); /* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */ - err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, - DRM_BUDDY_RANGE_ALLOCATION); + err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, + GPU_BUDDY_RANGE_ALLOCATION); KUNIT_EXPECT_EQ(test, err, -EINVAL); /* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */ - err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks, - DRM_BUDDY_CONTIGUOUS_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION); + err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks, + GPU_BUDDY_CONTIGUOUS_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION); KUNIT_EXPECT_EQ(test, err, -EINVAL); - drm_buddy_fini(&mm); + gpu_buddy_fini(&mm); } -static int drm_buddy_suite_init(struct kunit_suite *suite) +static int gpu_buddy_suite_init(struct kunit_suite *suite) { while (!random_seed) random_seed = get_random_u32(); - kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", + kunit_info(suite, "Testing GPU buddy manager, with random_seed=0x%x\n", random_seed); return 0; } -static struct kunit_case drm_buddy_tests[] = { - KUNIT_CASE(drm_test_buddy_alloc_limit), - KUNIT_CASE(drm_test_buddy_alloc_optimistic), - KUNIT_CASE(drm_test_buddy_alloc_pessimistic), - KUNIT_CASE(drm_test_buddy_alloc_pathological), - KUNIT_CASE(drm_test_buddy_alloc_contiguous), - KUNIT_CASE(drm_test_buddy_alloc_clear), - KUNIT_CASE(drm_test_buddy_alloc_range_bias), - KUNIT_CASE(drm_test_buddy_fragmentation_performance), - KUNIT_CASE(drm_test_buddy_alloc_exceeds_max_order), +static struct kunit_case gpu_buddy_tests[] = { + KUNIT_CASE(gpu_test_buddy_alloc_limit), + KUNIT_CASE(gpu_test_buddy_alloc_optimistic), + KUNIT_CASE(gpu_test_buddy_alloc_pessimistic), + KUNIT_CASE(gpu_test_buddy_alloc_pathological), + KUNIT_CASE(gpu_test_buddy_alloc_contiguous), + KUNIT_CASE(gpu_test_buddy_alloc_clear), + KUNIT_CASE(gpu_test_buddy_alloc_range_bias), + KUNIT_CASE(gpu_test_buddy_fragmentation_performance), + KUNIT_CASE(gpu_test_buddy_alloc_exceeds_max_order), {} }; -static struct kunit_suite drm_buddy_test_suite = { - .name = "drm_buddy", - .suite_init = drm_buddy_suite_init, - .test_cases = drm_buddy_tests, +static struct kunit_suite gpu_buddy_test_suite = { + .name = "gpu_buddy", + .suite_init = gpu_buddy_suite_init, + .test_cases = gpu_buddy_tests, }; -kunit_test_suite(drm_buddy_test_suite); +kunit_test_suite(gpu_buddy_test_suite); MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("Kunit test for drm_buddy functions"); +MODULE_DESCRIPTION("Kunit test for gpu_buddy functions"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/tests/gpu_random.c b/drivers/gpu/tests/gpu_random.c index ddd1f594b5d5..6356372f7e52 100644 --- a/drivers/gpu/tests/gpu_random.c +++ b/drivers/gpu/tests/gpu_random.c @@ -8,26 +8,26 @@ #include "gpu_random.h" -u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +u32 gpu_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) { return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); } -EXPORT_SYMBOL(drm_prandom_u32_max_state); +EXPORT_SYMBOL(gpu_prandom_u32_max_state); -void drm_random_reorder(unsigned int *order, unsigned int count, +void gpu_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state) { unsigned int i, j; for (i = 0; i < count; ++i) { BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); - j = drm_prandom_u32_max_state(count, state); + j = gpu_prandom_u32_max_state(count, state); swap(order[i], order[j]); } } -EXPORT_SYMBOL(drm_random_reorder); +EXPORT_SYMBOL(gpu_random_reorder); -unsigned int *drm_random_order(unsigned int count, struct rnd_state *state) +unsigned int *gpu_random_order(unsigned int count, struct rnd_state *state) { unsigned int *order, i; @@ -38,7 +38,7 @@ unsigned int *drm_random_order(unsigned int count, struct rnd_state *state) for (i = 0; i < count; i++) order[i] = i; - drm_random_reorder(order, count, state); + gpu_random_reorder(order, count, state); return order; } -EXPORT_SYMBOL(drm_random_order); +EXPORT_SYMBOL(gpu_random_order); diff --git a/drivers/gpu/tests/gpu_random.h b/drivers/gpu/tests/gpu_random.h index 9f827260a89d..b68cf3448264 100644 --- a/drivers/gpu/tests/gpu_random.h +++ b/drivers/gpu/tests/gpu_random.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __DRM_RANDOM_H__ -#define __DRM_RANDOM_H__ +#ifndef __GPU_RANDOM_H__ +#define __GPU_RANDOM_H__ /* This is a temporary home for a couple of utility functions that should * be transposed to lib/ at the earliest convenience. @@ -8,21 +8,21 @@ #include -#define DRM_RND_STATE_INITIALIZER(seed__) ({ \ +#define GPU_RND_STATE_INITIALIZER(seed__) ({ \ struct rnd_state state__; \ prandom_seed_state(&state__, (seed__)); \ state__; \ }) -#define DRM_RND_STATE(name__, seed__) \ - struct rnd_state name__ = DRM_RND_STATE_INITIALIZER(seed__) +#define GPU_RND_STATE(name__, seed__) \ + struct rnd_state name__ = GPU_RND_STATE_INITIALIZER(seed__) -unsigned int *drm_random_order(unsigned int count, +unsigned int *gpu_random_order(unsigned int count, struct rnd_state *state); -void drm_random_reorder(unsigned int *order, +void gpu_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state); -u32 drm_prandom_u32_max_state(u32 ep_ro, +u32 gpu_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state); -#endif /* !__DRM_RANDOM_H__ */ +#endif /* !__GPU_RANDOM_H__ */ diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index d51777df12d1..0adb1e2fa533 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -37,6 +37,7 @@ source "drivers/char/agp/Kconfig" source "drivers/gpu/vga/Kconfig" +source "drivers/gpu/Kconfig" source "drivers/gpu/host1x/Kconfig" source "drivers/gpu/ipu-v3/Kconfig" source "drivers/gpu/nova-core/Kconfig" diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h new file mode 100644 index 000000000000..3054369bebff --- /dev/null +++ b/include/drm/drm_buddy.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __DRM_BUDDY_H__ +#define __DRM_BUDDY_H__ + +#include + +struct drm_printer; + +/* DRM-specific GPU Buddy Allocator print helpers */ +void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p); +void drm_buddy_block_print(struct gpu_buddy *mm, + struct gpu_buddy_block *block, + struct drm_printer *p); +#endif diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h index b909fa8f810a..07ac65db6d2e 100644 --- a/include/linux/gpu_buddy.h +++ b/include/linux/gpu_buddy.h @@ -3,8 +3,8 @@ * Copyright © 2021 Intel Corporation */ -#ifndef __DRM_BUDDY_H__ -#define __DRM_BUDDY_H__ +#ifndef __GPU_BUDDY_H__ +#define __GPU_BUDDY_H__ #include #include @@ -12,38 +12,45 @@ #include #include -struct drm_printer; - -#define DRM_BUDDY_RANGE_ALLOCATION BIT(0) -#define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1) -#define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) -#define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) -#define DRM_BUDDY_CLEARED BIT(4) -#define DRM_BUDDY_TRIM_DISABLE BIT(5) - -struct drm_buddy_block { -#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) -#define DRM_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) -#define DRM_BUDDY_ALLOCATED (1 << 10) -#define DRM_BUDDY_FREE (2 << 10) -#define DRM_BUDDY_SPLIT (3 << 10) -#define DRM_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) +#define GPU_BUDDY_RANGE_ALLOCATION BIT(0) +#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1) +#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) +#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3) +#define GPU_BUDDY_CLEARED BIT(4) +#define GPU_BUDDY_TRIM_DISABLE BIT(5) + +enum gpu_buddy_free_tree { + GPU_BUDDY_CLEAR_TREE = 0, + GPU_BUDDY_DIRTY_TREE, + GPU_BUDDY_MAX_FREE_TREES, +}; + +#define for_each_free_tree(tree) \ + for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++) + +struct gpu_buddy_block { +#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) +#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) +#define GPU_BUDDY_ALLOCATED (1 << 10) +#define GPU_BUDDY_FREE (2 << 10) +#define GPU_BUDDY_SPLIT (3 << 10) +#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) /* Free to be used, if needed in the future */ -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) -#define DRM_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) +#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) +#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) u64 header; - struct drm_buddy_block *left; - struct drm_buddy_block *right; - struct drm_buddy_block *parent; + struct gpu_buddy_block *left; + struct gpu_buddy_block *right; + struct gpu_buddy_block *parent; void *private; /* owned by creator */ /* - * While the block is allocated by the user through drm_buddy_alloc*, + * While the block is allocated by the user through gpu_buddy_alloc*, * the user has ownership of the link, for example to maintain within * a list, if so desired. As soon as the block is freed with - * drm_buddy_free* ownership is given back to the mm. + * gpu_buddy_free* ownership is given back to the mm. */ union { struct rb_node rb; @@ -54,15 +61,15 @@ struct drm_buddy_block { }; /* Order-zero must be at least SZ_4K */ -#define DRM_BUDDY_MAX_ORDER (63 - 12) +#define GPU_BUDDY_MAX_ORDER (63 - 12) /* * Binary Buddy System. * * Locking should be handled by the user, a simple mutex around - * drm_buddy_alloc* and drm_buddy_free* should suffice. + * gpu_buddy_alloc* and gpu_buddy_free* should suffice. */ -struct drm_buddy { +struct gpu_buddy { /* Maintain a free list for each order. */ struct rb_root **free_trees; @@ -73,7 +80,7 @@ struct drm_buddy { * block. Nodes are either allocated or free, in which case they will * also exist on the respective free list. */ - struct drm_buddy_block **roots; + struct gpu_buddy_block **roots; /* * Anything from here is public, and remains static for the lifetime of @@ -90,82 +97,81 @@ struct drm_buddy { }; static inline u64 -drm_buddy_block_offset(const struct drm_buddy_block *block) +gpu_buddy_block_offset(const struct gpu_buddy_block *block) { - return block->header & DRM_BUDDY_HEADER_OFFSET; + return block->header & GPU_BUDDY_HEADER_OFFSET; } static inline unsigned int -drm_buddy_block_order(struct drm_buddy_block *block) +gpu_buddy_block_order(struct gpu_buddy_block *block) { - return block->header & DRM_BUDDY_HEADER_ORDER; + return block->header & GPU_BUDDY_HEADER_ORDER; } static inline unsigned int -drm_buddy_block_state(struct drm_buddy_block *block) +gpu_buddy_block_state(struct gpu_buddy_block *block) { - return block->header & DRM_BUDDY_HEADER_STATE; + return block->header & GPU_BUDDY_HEADER_STATE; } static inline bool -drm_buddy_block_is_allocated(struct drm_buddy_block *block) +gpu_buddy_block_is_allocated(struct gpu_buddy_block *block) { - return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED; + return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED; } static inline bool -drm_buddy_block_is_clear(struct drm_buddy_block *block) +gpu_buddy_block_is_clear(struct gpu_buddy_block *block) { - return block->header & DRM_BUDDY_HEADER_CLEAR; + return block->header & GPU_BUDDY_HEADER_CLEAR; } static inline bool -drm_buddy_block_is_free(struct drm_buddy_block *block) +gpu_buddy_block_is_free(struct gpu_buddy_block *block) { - return drm_buddy_block_state(block) == DRM_BUDDY_FREE; + return gpu_buddy_block_state(block) == GPU_BUDDY_FREE; } static inline bool -drm_buddy_block_is_split(struct drm_buddy_block *block) +gpu_buddy_block_is_split(struct gpu_buddy_block *block) { - return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT; + return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT; } static inline u64 -drm_buddy_block_size(struct drm_buddy *mm, - struct drm_buddy_block *block) +gpu_buddy_block_size(struct gpu_buddy *mm, + struct gpu_buddy_block *block) { - return mm->chunk_size << drm_buddy_block_order(block); + return mm->chunk_size << gpu_buddy_block_order(block); } -int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size); +int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size); -void drm_buddy_fini(struct drm_buddy *mm); +void gpu_buddy_fini(struct gpu_buddy *mm); -struct drm_buddy_block * -drm_get_buddy(struct drm_buddy_block *block); +struct gpu_buddy_block * +gpu_get_buddy(struct gpu_buddy_block *block); -int drm_buddy_alloc_blocks(struct drm_buddy *mm, +int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, u64 start, u64 end, u64 size, u64 min_page_size, struct list_head *blocks, unsigned long flags); -int drm_buddy_block_trim(struct drm_buddy *mm, +int gpu_buddy_block_trim(struct gpu_buddy *mm, u64 *start, u64 new_size, struct list_head *blocks); -void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear); +void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear); -void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); +void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block); -void drm_buddy_free_list(struct drm_buddy *mm, +void gpu_buddy_free_list(struct gpu_buddy *mm, struct list_head *objects, unsigned int flags); -void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p); -void drm_buddy_block_print(struct drm_buddy *mm, - struct drm_buddy_block *block, - struct drm_printer *p); +void gpu_buddy_print(struct gpu_buddy *mm); +void gpu_buddy_block_print(struct gpu_buddy *mm, + struct gpu_buddy_block *block); #endif -- cgit v1.2.3 From 6d438685340df6ac8570326aaa51c3603a2fe25c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 5 Feb 2026 15:10:44 +0100 Subject: drm/fbdev-emulation: Remove empty placeholders Only DRM clients for fbdev emulation invoke fbdev helpers. Hence remove the empty placeholders for non-fbdev builds, as they are unused. Signed-off-by: Thomas Zimmermann Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/20260205141142.412048-1-tzimmermann@suse.de --- include/drm/drm_fb_helper.h | 105 -------------------------------------------- 1 file changed, 105 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 05cca77b7249..15274b8a1d97 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -271,111 +271,6 @@ int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd, int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper); -#else -static inline void drm_fb_helper_prepare(struct drm_device *dev, - struct drm_fb_helper *helper, - unsigned int preferred_bpp, - const struct drm_fb_helper_funcs *funcs) -{ -} - -static inline void drm_fb_helper_unprepare(struct drm_fb_helper *fb_helper) -{ -} - -static inline int drm_fb_helper_init(struct drm_device *dev, - struct drm_fb_helper *helper) -{ - /* So drivers can use it to free the struct */ - helper->dev = dev; - dev->fb_helper = helper; - - return 0; -} - -static inline void drm_fb_helper_fini(struct drm_fb_helper *helper) -{ - if (helper && helper->dev) - helper->dev->fb_helper = NULL; -} - -static inline int drm_fb_helper_blank(int blank, struct fb_info *info) -{ - return 0; -} - -static inline int drm_fb_helper_pan_display(struct fb_var_screeninfo *var, - struct fb_info *info) -{ - return 0; -} - -static inline int drm_fb_helper_set_par(struct fb_info *info) -{ - return 0; -} - -static inline int drm_fb_helper_check_var(struct fb_var_screeninfo *var, - struct fb_info *info) -{ - return 0; -} - -static inline int -drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper) -{ - return 0; -} - -static inline void drm_fb_helper_unregister_info(struct drm_fb_helper *fb_helper) -{ -} - -static inline void -drm_fb_helper_fill_info(struct fb_info *info, - struct drm_fb_helper *fb_helper, - struct drm_fb_helper_surface_size *sizes) -{ -} - -static inline int drm_fb_helper_setcmap(struct fb_cmap *cmap, - struct fb_info *info) -{ - return 0; -} - -static inline int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd, - unsigned long arg) -{ - return 0; -} - -#ifdef CONFIG_FB_DEFERRED_IO -static inline void drm_fb_helper_deferred_io(struct fb_info *info, - struct list_head *pagelist) -{ -} -#endif - -static inline void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, - bool suspend) -{ -} - -static inline void -drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper, bool suspend) -{ -} - -static inline int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) -{ - return 0; -} - -static inline int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper) -{ - return 0; -} #endif #endif -- cgit v1.2.3 From 62918542b7bf08860a60ebbde7654486e0ac0776 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 16 Jun 2025 16:59:52 +0100 Subject: dma-fence: Fix sparse warnings due __rcu annotations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __rcu annotations on the return types from dma_fence_driver_name() and dma_fence_timeline_name() cause sparse to complain because both the constant signaled strings, and the strings return by the dma_fence_ops are not __rcu annotated. For a simple fix it is easiest to cast them with __rcu added and undo the smarts from the tracpoints side of things. There is no functional change since the rest is left in place. Later we can consider changing the dma_fence_ops return types too, and handle all the individual drivers which define them. Signed-off-by: Tvrtko Ursulin Fixes: 506aa8b02a8d ("dma-fence: Add safe access helpers and document the rules") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506162214.1eA69hLe-lkp@intel.com/ Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250616155952.24259-1-tvrtko.ursulin@igalia.com Signed-off-by: Christian König --- drivers/dma-buf/dma-fence.c | 8 ++++---- include/trace/events/dma_fence.h | 35 +++++------------------------------ 2 files changed, 9 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index c9a036b0d592..e05beae6e407 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -1133,9 +1133,9 @@ const char __rcu *dma_fence_driver_name(struct dma_fence *fence) "RCU protection is required for safe access to returned string"); if (!dma_fence_test_signaled_flag(fence)) - return fence->ops->get_driver_name(fence); + return (const char __rcu *)fence->ops->get_driver_name(fence); else - return "detached-driver"; + return (const char __rcu *)"detached-driver"; } EXPORT_SYMBOL(dma_fence_driver_name); @@ -1165,8 +1165,8 @@ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) "RCU protection is required for safe access to returned string"); if (!dma_fence_test_signaled_flag(fence)) - return fence->ops->get_timeline_name(fence); + return (const char __rcu *)fence->ops->get_driver_name(fence); else - return "signaled-timeline"; + return (const char __rcu *)"signaled-timeline"; } EXPORT_SYMBOL(dma_fence_timeline_name); diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h index 4814a65b68dc..3abba45c0601 100644 --- a/include/trace/events/dma_fence.h +++ b/include/trace/events/dma_fence.h @@ -9,37 +9,12 @@ struct dma_fence; -DECLARE_EVENT_CLASS(dma_fence, - - TP_PROTO(struct dma_fence *fence), - - TP_ARGS(fence), - - TP_STRUCT__entry( - __string(driver, dma_fence_driver_name(fence)) - __string(timeline, dma_fence_timeline_name(fence)) - __field(unsigned int, context) - __field(unsigned int, seqno) - ), - - TP_fast_assign( - __assign_str(driver); - __assign_str(timeline); - __entry->context = fence->context; - __entry->seqno = fence->seqno; - ), - - TP_printk("driver=%s timeline=%s context=%u seqno=%u", - __get_str(driver), __get_str(timeline), __entry->context, - __entry->seqno) -); - /* * Safe only for call sites which are guaranteed to not race with fence * signaling,holding the fence->lock and having checked for not signaled, or the * signaling path itself. */ -DECLARE_EVENT_CLASS(dma_fence_unsignaled, +DECLARE_EVENT_CLASS(dma_fence, TP_PROTO(struct dma_fence *fence), @@ -64,14 +39,14 @@ DECLARE_EVENT_CLASS(dma_fence_unsignaled, __entry->seqno) ); -DEFINE_EVENT(dma_fence_unsignaled, dma_fence_emit, +DEFINE_EVENT(dma_fence, dma_fence_emit, TP_PROTO(struct dma_fence *fence), TP_ARGS(fence) ); -DEFINE_EVENT(dma_fence_unsignaled, dma_fence_init, +DEFINE_EVENT(dma_fence, dma_fence_init, TP_PROTO(struct dma_fence *fence), @@ -85,14 +60,14 @@ DEFINE_EVENT(dma_fence, dma_fence_destroy, TP_ARGS(fence) ); -DEFINE_EVENT(dma_fence_unsignaled, dma_fence_enable_signal, +DEFINE_EVENT(dma_fence, dma_fence_enable_signal, TP_PROTO(struct dma_fence *fence), TP_ARGS(fence) ); -DEFINE_EVENT(dma_fence_unsignaled, dma_fence_signaled, +DEFINE_EVENT(dma_fence, dma_fence_signaled, TP_PROTO(struct dma_fence *fence), -- cgit v1.2.3 From 24a4241995ab7456c6751e0bd63382a95e70757f Mon Sep 17 00:00:00 2001 From: Chaitanya Kumar Borah Date: Mon, 2 Feb 2026 15:11:54 +0530 Subject: drm/colorop: Add destroy helper for colorop objects Add a helper that performs common cleanup and frees the associated object. This can be used by drivers if they do not require any driver-specific teardown. v2: - Add function documentation only before definition (Jani) Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Suraj Kandpal Reviewed-by: Uma Shankar Reviewed-by: Alex Hung Acked-by: Jani Nikula Signed-off-by: Suraj Kandpal Link: https://patch.msgid.link/20260202094202.2871478-2-chaitanya.kumar.borah@intel.com --- drivers/gpu/drm/drm_colorop.c | 15 +++++++++++++++ include/drm/drm_colorop.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index 44eb823585d2..c226870fde9e 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -178,6 +178,21 @@ void drm_colorop_cleanup(struct drm_colorop *colorop) } EXPORT_SYMBOL(drm_colorop_cleanup); +/** + * drm_colorop_destroy - destroy colorop + * @colorop: drm colorop + * + * Destroys @colorop by performing common DRM cleanup and freeing the + * colorop object. This can be used by drivers if they do not + * require any driver-specific teardown. + */ +void drm_colorop_destroy(struct drm_colorop *colorop) +{ + drm_colorop_cleanup(colorop); + kfree(colorop); +} +EXPORT_SYMBOL(drm_colorop_destroy); + /** * drm_colorop_pipeline_destroy - Helper for color pipeline destruction * diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index a3a32f9f918c..3056f3f02597 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -420,6 +420,8 @@ void drm_colorop_atomic_destroy_state(struct drm_colorop *colorop, */ void drm_colorop_reset(struct drm_colorop *colorop); +void drm_colorop_destroy(struct drm_colorop *colorop); + /** * drm_colorop_index - find the index of a registered colorop * @colorop: colorop to find index for -- cgit v1.2.3 From 2864667476a40525511a1e854bcfa7c90392a990 Mon Sep 17 00:00:00 2001 From: Chaitanya Kumar Borah Date: Mon, 2 Feb 2026 15:11:55 +0530 Subject: drm: Allow driver-managed destruction of colorop objects Some drivers might want to embed struct drm_colorop inside driver-specific objects, similar to planes or CRTCs. In such cases, freeing only the drm_colorop is incorrect. Add a drm_colorop_funcs callback to allow drivers to provide a destroy hook that cleans up the full enclosing object. Make changes in helper functions to accept helper functions as argument. Pass NULL for now to retain current behavior. Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Suraj Kandpal Reviewed-by: Uma Shankar Reviewed-by: Alex Hung Acked-by: Jani Nikula Signed-off-by: Suraj Kandpal Link: https://patch.msgid.link/20260202094202.2871478-3-chaitanya.kumar.borah@intel.com --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 18 +++++++------ drivers/gpu/drm/drm_colorop.c | 31 +++++++++++++++------- .../gpu/drm/i915/display/intel_color_pipeline.c | 8 +++--- drivers/gpu/drm/vkms/vkms_colorop.c | 10 ++++--- include/drm/drm_colorop.h | 30 +++++++++++++++++---- 5 files changed, 66 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index a2de3bba8346..dfdb4fb4219f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -72,7 +72,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, + ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, NULL, amdgpu_dm_supported_degam_tfs, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) @@ -89,7 +89,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_mult_init(dev, ops[i], plane, DRM_COLOROP_FLAG_ALLOW_BYPASS); + ret = drm_plane_colorop_mult_init(dev, ops[i], plane, NULL, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) goto cleanup; @@ -104,7 +104,8 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_ctm_3x4_init(dev, ops[i], plane, DRM_COLOROP_FLAG_ALLOW_BYPASS); + ret = drm_plane_colorop_ctm_3x4_init(dev, ops[i], plane, NULL, + DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) goto cleanup; @@ -120,7 +121,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, + ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, NULL, amdgpu_dm_supported_shaper_tfs, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) @@ -137,7 +138,8 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, MAX_COLOR_LUT_ENTRIES, + ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, NULL, + MAX_COLOR_LUT_ENTRIES, DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) @@ -154,7 +156,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_3dlut_init(dev, ops[i], plane, LUT3D_SIZE, + ret = drm_plane_colorop_3dlut_init(dev, ops[i], plane, NULL, LUT3D_SIZE, DRM_COLOROP_LUT3D_INTERPOLATION_TETRAHEDRAL, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) @@ -172,7 +174,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, + ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, NULL, amdgpu_dm_supported_blnd_tfs, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) @@ -189,7 +191,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, MAX_COLOR_LUT_ENTRIES, + ret = drm_plane_colorop_curve_1d_lut_init(dev, ops[i], plane, NULL, MAX_COLOR_LUT_ENTRIES, DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) diff --git a/drivers/gpu/drm/drm_colorop.c b/drivers/gpu/drm/drm_colorop.c index c226870fde9e..2bce29176ab3 100644 --- a/drivers/gpu/drm/drm_colorop.c +++ b/drivers/gpu/drm/drm_colorop.c @@ -93,7 +93,8 @@ static const struct drm_prop_enum_list drm_colorop_lut3d_interpolation_list[] = /* Init Helpers */ static int drm_plane_colorop_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, enum drm_colorop_type type, + struct drm_plane *plane, const struct drm_colorop_funcs *funcs, + enum drm_colorop_type type, uint32_t flags) { struct drm_mode_config *config = &dev->mode_config; @@ -109,6 +110,7 @@ static int drm_plane_colorop_init(struct drm_device *dev, struct drm_colorop *co colorop->type = type; colorop->plane = plane; colorop->next = NULL; + colorop->funcs = funcs; list_add_tail(&colorop->head, &config->colorop_list); colorop->index = config->num_colorop++; @@ -218,6 +220,7 @@ EXPORT_SYMBOL(drm_colorop_pipeline_destroy); * @dev: DRM device * @colorop: The drm_colorop object to initialize * @plane: The associated drm_plane + * @funcs: control functions for the new colorop * @supported_tfs: A bitfield of supported drm_plane_colorop_curve_1d_init enum values, * created using BIT(curve_type) and combined with the OR '|' * operator. @@ -225,7 +228,8 @@ EXPORT_SYMBOL(drm_colorop_pipeline_destroy); * @return zero on success, -E value on failure */ int drm_plane_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, u64 supported_tfs, uint32_t flags) + struct drm_plane *plane, const struct drm_colorop_funcs *funcs, + u64 supported_tfs, uint32_t flags) { struct drm_prop_enum_list enum_list[DRM_COLOROP_1D_CURVE_COUNT]; int i, len; @@ -246,7 +250,7 @@ int drm_plane_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop * return -EINVAL; } - ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_1D_CURVE, flags); + ret = drm_plane_colorop_init(dev, colorop, plane, funcs, DRM_COLOROP_1D_CURVE, flags); if (ret) return ret; @@ -303,20 +307,23 @@ static int drm_colorop_create_data_prop(struct drm_device *dev, struct drm_color * @dev: DRM device * @colorop: The drm_colorop object to initialize * @plane: The associated drm_plane + * @funcs: control functions for new colorop * @lut_size: LUT size supported by driver * @interpolation: 1D LUT interpolation type * @flags: bitmask of misc, see DRM_COLOROP_FLAG_* defines. * @return zero on success, -E value on failure */ int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, uint32_t lut_size, + struct drm_plane *plane, + const struct drm_colorop_funcs *funcs, + uint32_t lut_size, enum drm_colorop_lut1d_interpolation_type interpolation, uint32_t flags) { struct drm_property *prop; int ret; - ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_1D_LUT, flags); + ret = drm_plane_colorop_init(dev, colorop, plane, funcs, DRM_COLOROP_1D_LUT, flags); if (ret) return ret; @@ -354,11 +361,12 @@ int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_color EXPORT_SYMBOL(drm_plane_colorop_curve_1d_lut_init); int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, uint32_t flags) + struct drm_plane *plane, const struct drm_colorop_funcs *funcs, + uint32_t flags) { int ret; - ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_CTM_3X4, flags); + ret = drm_plane_colorop_init(dev, colorop, plane, funcs, DRM_COLOROP_CTM_3X4, flags); if (ret) return ret; @@ -378,16 +386,18 @@ EXPORT_SYMBOL(drm_plane_colorop_ctm_3x4_init); * @dev: DRM device * @colorop: The drm_colorop object to initialize * @plane: The associated drm_plane + * @funcs: control functions for the new colorop * @flags: bitmask of misc, see DRM_COLOROP_FLAG_* defines. * @return zero on success, -E value on failure */ int drm_plane_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, uint32_t flags) + struct drm_plane *plane, const struct drm_colorop_funcs *funcs, + uint32_t flags) { struct drm_property *prop; int ret; - ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_MULTIPLIER, flags); + ret = drm_plane_colorop_init(dev, colorop, plane, funcs, DRM_COLOROP_MULTIPLIER, flags); if (ret) return ret; @@ -406,6 +416,7 @@ EXPORT_SYMBOL(drm_plane_colorop_mult_init); int drm_plane_colorop_3dlut_init(struct drm_device *dev, struct drm_colorop *colorop, struct drm_plane *plane, + const struct drm_colorop_funcs *funcs, uint32_t lut_size, enum drm_colorop_lut3d_interpolation_type interpolation, uint32_t flags) @@ -413,7 +424,7 @@ int drm_plane_colorop_3dlut_init(struct drm_device *dev, struct drm_colorop *col struct drm_property *prop; int ret; - ret = drm_plane_colorop_init(dev, colorop, plane, DRM_COLOROP_3D_LUT, flags); + ret = drm_plane_colorop_init(dev, colorop, plane, funcs, DRM_COLOROP_3D_LUT, flags); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_color_pipeline.c b/drivers/gpu/drm/i915/display/intel_color_pipeline.c index 04af552b3648..d3d73d60727c 100644 --- a/drivers/gpu/drm/i915/display/intel_color_pipeline.c +++ b/drivers/gpu/drm/i915/display/intel_color_pipeline.c @@ -25,7 +25,7 @@ int _intel_color_pipeline_plane_init(struct drm_plane *plane, struct drm_prop_en colorop = intel_colorop_create(INTEL_PLANE_CB_PRE_CSC_LUT); - ret = drm_plane_colorop_curve_1d_lut_init(dev, &colorop->base, plane, + ret = drm_plane_colorop_curve_1d_lut_init(dev, &colorop->base, plane, NULL, PLANE_DEGAMMA_SIZE, DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR, DRM_COLOROP_FLAG_ALLOW_BYPASS); @@ -39,7 +39,7 @@ int _intel_color_pipeline_plane_init(struct drm_plane *plane, struct drm_prop_en prev_op = &colorop->base; colorop = intel_colorop_create(INTEL_PLANE_CB_CSC); - ret = drm_plane_colorop_ctm_3x4_init(dev, &colorop->base, plane, + ret = drm_plane_colorop_ctm_3x4_init(dev, &colorop->base, plane, NULL, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) return ret; @@ -52,7 +52,7 @@ int _intel_color_pipeline_plane_init(struct drm_plane *plane, struct drm_prop_en plane->type == DRM_PLANE_TYPE_PRIMARY) { colorop = intel_colorop_create(INTEL_PLANE_CB_3DLUT); - ret = drm_plane_colorop_3dlut_init(dev, &colorop->base, plane, 17, + ret = drm_plane_colorop_3dlut_init(dev, &colorop->base, plane, NULL, 17, DRM_COLOROP_LUT3D_INTERPOLATION_TETRAHEDRAL, true); if (ret) @@ -64,7 +64,7 @@ int _intel_color_pipeline_plane_init(struct drm_plane *plane, struct drm_prop_en } colorop = intel_colorop_create(INTEL_PLANE_CB_POST_CSC_LUT); - ret = drm_plane_colorop_curve_1d_lut_init(dev, &colorop->base, plane, + ret = drm_plane_colorop_curve_1d_lut_init(dev, &colorop->base, plane, NULL, PLANE_GAMMA_SIZE, DRM_COLOROP_LUT1D_INTERPOLATION_LINEAR, DRM_COLOROP_FLAG_ALLOW_BYPASS); diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c b/drivers/gpu/drm/vkms/vkms_colorop.c index d03a1f2e9c41..9e9dd0494628 100644 --- a/drivers/gpu/drm/vkms/vkms_colorop.c +++ b/drivers/gpu/drm/vkms/vkms_colorop.c @@ -31,7 +31,7 @@ static int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, supported_tfs, + ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, NULL, supported_tfs, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) goto cleanup; @@ -48,7 +48,8 @@ static int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_ctm_3x4_init(dev, ops[i], plane, DRM_COLOROP_FLAG_ALLOW_BYPASS); + ret = drm_plane_colorop_ctm_3x4_init(dev, ops[i], plane, NULL, + DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) goto cleanup; @@ -64,7 +65,8 @@ static int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_ctm_3x4_init(dev, ops[i], plane, DRM_COLOROP_FLAG_ALLOW_BYPASS); + ret = drm_plane_colorop_ctm_3x4_init(dev, ops[i], plane, NULL, + DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) goto cleanup; @@ -80,7 +82,7 @@ static int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_pr goto cleanup; } - ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, supported_tfs, + ret = drm_plane_colorop_curve_1d_init(dev, ops[i], plane, NULL, supported_tfs, DRM_COLOROP_FLAG_ALLOW_BYPASS); if (ret) goto cleanup; diff --git a/include/drm/drm_colorop.h b/include/drm/drm_colorop.h index 3056f3f02597..bd082854ca74 100644 --- a/include/drm/drm_colorop.h +++ b/include/drm/drm_colorop.h @@ -187,6 +187,19 @@ struct drm_colorop_state { struct drm_atomic_state *state; }; +/** + * struct drm_colorop_funcs - driver colorop control functions + */ +struct drm_colorop_funcs { + /** + * @destroy: + * + * Clean up colorop resources. This is called at driver unload time + * through drm_mode_config_cleanup() + */ + void (*destroy)(struct drm_colorop *colorop); +}; + /** * struct drm_colorop - DRM color operation control structure * @@ -362,6 +375,8 @@ struct drm_colorop { */ struct drm_property *next_property; + /** @funcs: colorop control functions */ + const struct drm_colorop_funcs *funcs; }; #define obj_to_colorop(x) container_of(x, struct drm_colorop, base) @@ -390,17 +405,22 @@ void drm_colorop_pipeline_destroy(struct drm_device *dev); void drm_colorop_cleanup(struct drm_colorop *colorop); int drm_plane_colorop_curve_1d_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, u64 supported_tfs, uint32_t flags); + struct drm_plane *plane, const struct drm_colorop_funcs *funcs, + u64 supported_tfs, uint32_t flags); int drm_plane_colorop_curve_1d_lut_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, uint32_t lut_size, + struct drm_plane *plane, + const struct drm_colorop_funcs *funcs, + uint32_t lut_size, enum drm_colorop_lut1d_interpolation_type interpolation, uint32_t flags); int drm_plane_colorop_ctm_3x4_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, uint32_t flags); + struct drm_plane *plane, const struct drm_colorop_funcs *funcs, + uint32_t flags); int drm_plane_colorop_mult_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, uint32_t flags); + struct drm_plane *plane, const struct drm_colorop_funcs *funcs, + uint32_t flags); int drm_plane_colorop_3dlut_init(struct drm_device *dev, struct drm_colorop *colorop, - struct drm_plane *plane, + struct drm_plane *plane, const struct drm_colorop_funcs *funcs, uint32_t lut_size, enum drm_colorop_lut3d_interpolation_type interpolation, uint32_t flags); -- cgit v1.2.3 From 95ffa10056b33bf5a90090b02da2edd52e1e281c Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 28 Jan 2026 13:43:45 +0100 Subject: drm/atomic: Make drm_atomic_private_obj_init fallible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we're going to move the drm_private_obj state allocation to a callback, we need to be able to deal with its possible failure. Make drm_private_obj_init return an error code on failure. Suggested-by: Ville Syrjälä Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/20260128-drm-private-obj-reset-v4-1-90891fa3d3b0@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_atomic.c | 14 +++++++++----- include/drm/drm_atomic.h | 8 ++++---- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 52738b80ddbe..4191a8333fc4 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -927,12 +927,14 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, * * Initialize the private object, which can be embedded into any * driver private object that needs its own atomic state. + * + * RETURNS: + * Zero on success, error code on failure */ -void -drm_atomic_private_obj_init(struct drm_device *dev, - struct drm_private_obj *obj, - struct drm_private_state *state, - const struct drm_private_state_funcs *funcs) +int drm_atomic_private_obj_init(struct drm_device *dev, + struct drm_private_obj *obj, + struct drm_private_state *state, + const struct drm_private_state_funcs *funcs) { memset(obj, 0, sizeof(*obj)); @@ -944,6 +946,8 @@ drm_atomic_private_obj_init(struct drm_device *dev, list_add_tail(&obj->head, &dev->mode_config.privobj_list); state->obj = obj; + + return 0; } EXPORT_SYMBOL(drm_atomic_private_obj_init); diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 178f8f62c80f..712f5fb977bf 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -723,10 +723,10 @@ struct drm_connector_state * __must_check drm_atomic_get_connector_state(struct drm_atomic_state *state, struct drm_connector *connector); -void drm_atomic_private_obj_init(struct drm_device *dev, - struct drm_private_obj *obj, - struct drm_private_state *state, - const struct drm_private_state_funcs *funcs); +int drm_atomic_private_obj_init(struct drm_device *dev, + struct drm_private_obj *obj, + struct drm_private_state *state, + const struct drm_private_state_funcs *funcs); void drm_atomic_private_obj_fini(struct drm_private_obj *obj); struct drm_private_state * __must_check -- cgit v1.2.3 From 47b5ac7daa46e2bc8e4916d856fdc036ac145bb6 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 28 Jan 2026 13:43:46 +0100 Subject: drm/atomic: Add new atomic_create_state callback to drm_private_obj The drm_private_obj initialization was inconsistent with the rest of the KMS objects. Indeed, it required to pass a preallocated state in drm_private_obj_init(), while all the others objects would have a reset callback that would be called later on to create the state. However, reset really is meant to reset the hardware and software state. That it creates an initial state is a side-effect that has been used in all objects but drm_private_obj. This is made more complex since some drm_private_obj, the DisplayPort ones in particular, need to be persistent across and suspend/resume cycle, and such a cycle would call drm_mode_config_reset(). Thus, we need to add a new callback to allocate a pristine state for a given private object. This discussion has also came up during the atomic state readout discussion, so it might be introduced into the other objects later on. Until all drivers are converted to that new allocation pattern, we will only call it if the passed state is NULL. This will be removed eventually. Reviewed-by: Dmitry Baryshkov Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/20260128-drm-private-obj-reset-v4-2-90891fa3d3b0@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_atomic.c | 18 ++++++++++++++++-- include/drm/drm_atomic.h | 13 +++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 4191a8333fc4..e3029c8f02e5 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -941,11 +941,25 @@ int drm_atomic_private_obj_init(struct drm_device *dev, drm_modeset_lock_init(&obj->lock); obj->dev = dev; - obj->state = state; obj->funcs = funcs; list_add_tail(&obj->head, &dev->mode_config.privobj_list); - state->obj = obj; + /* + * Not all users of drm_atomic_private_obj_init have been + * converted to using &drm_private_obj_funcs.atomic_create_state yet. + * For the time being, let's only call reset if the passed state is + * NULL. Otherwise, we will fallback to the previous behaviour. + */ + if (!state) { + state = obj->funcs->atomic_create_state(obj); + if (IS_ERR(state)) + return PTR_ERR(state); + + obj->state = state; + } else { + obj->state = state; + state->obj = obj; + } return 0; } diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 712f5fb977bf..0b1b32bcd2bd 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -261,6 +261,19 @@ struct drm_private_state; * drm_atomic_get_private_obj_state(). */ struct drm_private_state_funcs { + /** + * @atomic_create_state: + * + * Allocates a pristine, initialized, state for the private + * object and returns it. + * + * RETURNS: + * + * A new, pristine, private state instance or an error pointer + * on failure. + */ + struct drm_private_state *(*atomic_create_state)(struct drm_private_obj *obj); + /** * @atomic_duplicate_state: * -- cgit v1.2.3 From e7be39ed171662474d6d5c9a83d790ef7d244bcd Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 28 Jan 2026 13:43:47 +0100 Subject: drm/atomic-helper: Add private_obj atomic_create_state helper Now that we have an atomic_create_state callback for drm_private_objs, we can provide a helper for it. It's somewhat different from the other similar helpers though, because we definitely expect drm_private_obj to be subclassed. It wouldn't make sense for a driver to use it as-is. So we can't provide a straight implementation of the atomic_create_state callback, but rather we provide the parts that will deal with the drm_private_obj initialization, and we will leave the allocation and initialization of the subclass to drivers. Reviewed-by: Dmitry Baryshkov Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/20260128-drm-private-obj-reset-v4-3-90891fa3d3b0@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_atomic_state_helper.c | 22 ++++++++++++++++++++++ include/drm/drm_atomic_state_helper.h | 3 +++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index cee6d8fc44ad..d21f32f0ad51 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -714,6 +714,28 @@ void drm_atomic_helper_connector_destroy_state(struct drm_connector *connector, } EXPORT_SYMBOL(drm_atomic_helper_connector_destroy_state); +/** + * __drm_atomic_helper_private_obj_create_state - initializes private object state + * @obj: private object + * @state: new state to initialize + * + * Initializes the newly allocated @state, usually required when + * initializing the drivers. + * + * @obj is assumed to be zeroed. + * + * This is useful for drivers that use private states. + */ +void __drm_atomic_helper_private_obj_create_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + if (state) + state->obj = obj; + + obj->state = state; +} +EXPORT_SYMBOL(__drm_atomic_helper_private_obj_create_state); + /** * __drm_atomic_helper_private_obj_duplicate_state - copy atomic private state * @obj: CRTC object diff --git a/include/drm/drm_atomic_state_helper.h b/include/drm/drm_atomic_state_helper.h index b9740edb2658..900672c6ea90 100644 --- a/include/drm/drm_atomic_state_helper.h +++ b/include/drm/drm_atomic_state_helper.h @@ -84,6 +84,9 @@ void __drm_atomic_helper_connector_destroy_state(struct drm_connector_state *state); void drm_atomic_helper_connector_destroy_state(struct drm_connector *connector, struct drm_connector_state *state); + +void __drm_atomic_helper_private_obj_create_state(struct drm_private_obj *obj, + struct drm_private_state *state); void __drm_atomic_helper_private_obj_duplicate_state(struct drm_private_obj *obj, struct drm_private_state *state); -- cgit v1.2.3 From 95cef38e70250234a254e6228eb7342b6deaaffa Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 17 Feb 2026 16:56:18 +0100 Subject: firmware: google: Export coreboot table entries Move types for coreboot table entries to . Allows drivers in other subsystems to use these structures. Signed-off-by: Thomas Zimmermann Acked-by: Tzung-Bi Shih Acked-by: Julius Werner Link: https://patch.msgid.link/20260217155836.96267-9-tzimmermann@suse.de --- MAINTAINERS | 1 + drivers/firmware/google/coreboot_table.c | 10 ++++ drivers/firmware/google/coreboot_table.h | 60 +---------------------- drivers/firmware/google/framebuffer-coreboot.c | 2 - include/linux/coreboot.h | 66 ++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 61 deletions(-) create mode 100644 include/linux/coreboot.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 4a2d5e8f0f63..d0dfcfd15e59 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10753,6 +10753,7 @@ L: chrome-platform@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git F: drivers/firmware/google/ +F: include/linux/coreboot.h GOOGLE TENSOR SoC SUPPORT M: Peter Griffin diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c index a031d6fe6bc5..c769631ea15d 100644 --- a/drivers/firmware/google/coreboot_table.c +++ b/drivers/firmware/google/coreboot_table.c @@ -22,6 +22,16 @@ #include "coreboot_table.h" +/* Coreboot table header structure */ +struct coreboot_table_header { + char signature[4]; + u32 header_bytes; + u32 header_checksum; + u32 table_bytes; + u32 table_checksum; + u32 table_entries; +}; + #define CB_DEV(d) container_of(d, struct coreboot_device, dev) #define CB_DRV(d) container_of_const(d, struct coreboot_driver, drv) diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h index 17e9e5c3f6e1..616ca3903e5c 100644 --- a/drivers/firmware/google/coreboot_table.h +++ b/drivers/firmware/google/coreboot_table.h @@ -12,67 +12,9 @@ #ifndef __COREBOOT_TABLE_H #define __COREBOOT_TABLE_H +#include #include -struct coreboot_device_id; - -/* Coreboot table header structure */ -struct coreboot_table_header { - char signature[4]; - u32 header_bytes; - u32 header_checksum; - u32 table_bytes; - u32 table_checksum; - u32 table_entries; -}; - -/* List of coreboot entry structures that is used */ -/* Generic */ -struct coreboot_table_entry { - u32 tag; - u32 size; -}; - -/* Points to a CBMEM entry */ -struct lb_cbmem_ref { - u32 tag; - u32 size; - - u64 cbmem_addr; -}; - -#define LB_TAG_CBMEM_ENTRY 0x31 - -/* Corresponds to LB_TAG_CBMEM_ENTRY */ -struct lb_cbmem_entry { - u32 tag; - u32 size; - - u64 address; - u32 entry_size; - u32 id; -}; - -/* Describes framebuffer setup by coreboot */ -struct lb_framebuffer { - u32 tag; - u32 size; - - u64 physical_address; - u32 x_resolution; - u32 y_resolution; - u32 bytes_per_line; - u8 bits_per_pixel; - u8 red_mask_pos; - u8 red_mask_size; - u8 green_mask_pos; - u8 green_mask_size; - u8 blue_mask_pos; - u8 blue_mask_size; - u8 reserved_mask_pos; - u8 reserved_mask_size; -}; - /* A device, additionally with information from coreboot. */ struct coreboot_device { struct device dev; diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c index 81aa522edb1e..fab3f28655d3 100644 --- a/drivers/firmware/google/framebuffer-coreboot.c +++ b/drivers/firmware/google/framebuffer-coreboot.c @@ -21,8 +21,6 @@ #include "coreboot_table.h" -#define CB_TAG_FRAMEBUFFER 0x12 - #if defined(CONFIG_PCI) static bool framebuffer_pci_dev_is_enabled(struct pci_dev *pdev) { diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h new file mode 100644 index 000000000000..48705b439c6e --- /dev/null +++ b/include/linux/coreboot.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * coreboot.h + * + * Coreboot device and driver interfaces. + * + * Copyright 2014 Gerd Hoffmann + * Copyright 2017 Google Inc. + * Copyright 2017 Samuel Holland + */ + +#ifndef _LINUX_COREBOOT_H +#define _LINUX_COREBOOT_H + +#include + +/* List of coreboot entry structures that is used */ + +#define CB_TAG_FRAMEBUFFER 0x12 +#define LB_TAG_CBMEM_ENTRY 0x31 + +/* Generic */ +struct coreboot_table_entry { + u32 tag; + u32 size; +}; + +/* Points to a CBMEM entry */ +struct lb_cbmem_ref { + u32 tag; + u32 size; + + u64 cbmem_addr; +}; + +/* Corresponds to LB_TAG_CBMEM_ENTRY */ +struct lb_cbmem_entry { + u32 tag; + u32 size; + + u64 address; + u32 entry_size; + u32 id; +}; + +/* Describes framebuffer setup by coreboot */ +struct lb_framebuffer { + u32 tag; + u32 size; + + u64 physical_address; + u32 x_resolution; + u32 y_resolution; + u32 bytes_per_line; + u8 bits_per_pixel; + u8 red_mask_pos; + u8 red_mask_size; + u8 green_mask_pos; + u8 green_mask_size; + u8 blue_mask_pos; + u8 blue_mask_size; + u8 reserved_mask_pos; + u8 reserved_mask_size; +}; + +#endif /* _LINUX_COREBOOT_H */ -- cgit v1.2.3 From 27fc52b5505a3acca96b884a4bf1345344e5a566 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 17 Feb 2026 16:56:19 +0100 Subject: firmware: google: Pack structures for coreboot table entries Pack the fields in the coreboot table entries. These entries are part of the coreboot ABI, so they don't follow regular calling conventions. Fields of type u64 are aligned to boundaries of 4 bytes instead of 8. [1] So far this has not been a problem. In the future, padding bytes should be added where explicit alignment is required. Signed-off-by: Thomas Zimmermann Link: https://github.com/coreboot/coreboot/blob/main/payloads/libpayload/include/coreboot_tables.h#L96 # [1] Suggested-by: Julius Werner Acked-by: Julius Werner Acked-by: Tzung-Bi Shih Link: https://patch.msgid.link/20260217155836.96267-10-tzimmermann@suse.de --- include/linux/coreboot.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h index 48705b439c6e..5746b99a070d 100644 --- a/include/linux/coreboot.h +++ b/include/linux/coreboot.h @@ -12,8 +12,11 @@ #ifndef _LINUX_COREBOOT_H #define _LINUX_COREBOOT_H +#include #include +typedef __aligned(4) u64 cb_u64; + /* List of coreboot entry structures that is used */ #define CB_TAG_FRAMEBUFFER 0x12 @@ -30,7 +33,7 @@ struct lb_cbmem_ref { u32 tag; u32 size; - u64 cbmem_addr; + cb_u64 cbmem_addr; }; /* Corresponds to LB_TAG_CBMEM_ENTRY */ @@ -38,7 +41,7 @@ struct lb_cbmem_entry { u32 tag; u32 size; - u64 address; + cb_u64 address; u32 entry_size; u32 id; }; @@ -48,7 +51,7 @@ struct lb_framebuffer { u32 tag; u32 size; - u64 physical_address; + cb_u64 physical_address; u32 x_resolution; u32 y_resolution; u32 bytes_per_line; -- cgit v1.2.3 From a29a1f0ec8d69ee917a9d4c84b844df0decff0ef Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 17 Feb 2026 16:56:21 +0100 Subject: drm/sysfb: corebootdrm: Add DRM driver for coreboot framebuffers Add corebootdrm, a DRM driver for coreboot framebuffers. The driver supports a pre-initialized framebuffer with various packed RGB formats. The driver code is fairly small and uses the same logic as the other sysfb drivers. Most of the implementation comes from existing sysfb helpers. Until now, coreboot relied on simpledrm or simplefb for boot-up graphics output. Initialize the platform device for corebootdrm in the same place in framebuffer_probe(). With a later commit, the simple-framebuffer should be removed. v4: - sort include statements (Tzung-Bi) v3: - comment on _HAS_LFB semantics (Tzung-Bi) - fix typo in commit description (Tzung-Bi) - comment on simple-framebuffer being obsolete for coreboot v2: - reimplement as platform driver - limit resources and mappings to known framebuffer memory; no page alignment - create corebootdrm device from coreboot framebuffer code Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Acked-by: Julius Werner Acked-by: Tzung-Bi Shih # coreboot Link: https://patch.msgid.link/20260217155836.96267-12-tzimmermann@suse.de --- drivers/firmware/google/Kconfig | 3 +- drivers/firmware/google/framebuffer-coreboot.c | 22 +- drivers/gpu/drm/sysfb/Kconfig | 16 + drivers/gpu/drm/sysfb/Makefile | 1 + drivers/gpu/drm/sysfb/corebootdrm.c | 412 +++++++++++++++++++++++++ include/linux/coreboot.h | 8 + 6 files changed, 458 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/sysfb/corebootdrm.c (limited to 'include') diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig index 3ab3e089328b..b78c644fa253 100644 --- a/drivers/firmware/google/Kconfig +++ b/drivers/firmware/google/Kconfig @@ -63,7 +63,8 @@ config GOOGLE_FRAMEBUFFER_COREBOOT help This option enables the kernel to search for a framebuffer in the coreboot table. If found, it is registered with a platform - device of type simple-framebuffer. + device of type coreboot-framebuffer. Using the old device of + type simple-framebuffer is deprecated. config GOOGLE_MEMCONSOLE_COREBOOT tristate "Firmware Memory Console" diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c index fab3f28655d3..2c63a9bd0dcb 100644 --- a/drivers/firmware/google/framebuffer-coreboot.c +++ b/drivers/firmware/google/framebuffer-coreboot.c @@ -76,22 +76,23 @@ static struct device *framebuffer_parent_dev(struct resource *res) return NULL; } -static const struct simplefb_format formats[] = SIMPLEFB_FORMATS; - static int framebuffer_probe(struct coreboot_device *dev) { - int i; struct lb_framebuffer *fb = &dev->framebuffer; struct device *parent; struct platform_device *pdev; struct resource res; int ret; +#if !IS_ENABLED(CONFIG_DRM_COREBOOTDRM) struct simplefb_platform_data pdata = { .width = fb->x_resolution, .height = fb->y_resolution, .stride = fb->bytes_per_line, .format = NULL, }; + int i; + static const struct simplefb_format formats[] = SIMPLEFB_FORMATS; +#endif /* * On coreboot systems, the advertised LB_TAG_FRAMEBUFFER entry @@ -118,6 +119,20 @@ static int framebuffer_probe(struct coreboot_device *dev) if (IS_ERR(parent)) return PTR_ERR(parent); +#if IS_ENABLED(CONFIG_DRM_COREBOOTDRM) + pdev = platform_device_register_resndata(parent, "coreboot-framebuffer", 0, + &res, 1, fb, fb->size); + if (IS_ERR(pdev)) { + pr_warn("coreboot: could not register framebuffer\n"); + ret = PTR_ERR(pdev); + goto out_put_device_parent; + } +#else + /* + * FIXME: Coreboot systems should use a driver that binds to + * coreboot-framebuffer devices. Remove support for + * simple-framebuffer at some point. + */ for (i = 0; i < ARRAY_SIZE(formats); ++i) { if (fb->bits_per_pixel == formats[i].bits_per_pixel && fb->red_mask_pos == formats[i].red.offset && @@ -142,6 +157,7 @@ static int framebuffer_probe(struct coreboot_device *dev) pr_warn("coreboot: could not register framebuffer\n"); goto out_put_device_parent; } +#endif ret = 0; diff --git a/drivers/gpu/drm/sysfb/Kconfig b/drivers/gpu/drm/sysfb/Kconfig index 9c9884c7efc6..2559ead6cf1f 100644 --- a/drivers/gpu/drm/sysfb/Kconfig +++ b/drivers/gpu/drm/sysfb/Kconfig @@ -7,6 +7,22 @@ config DRM_SYSFB_HELPER tristate depends on DRM +config DRM_COREBOOTDRM + tristate "Coreboot framebuffer driver" + depends on DRM && MMU + depends on GOOGLE_FRAMEBUFFER_COREBOOT + select APERTURE_HELPERS + select DRM_CLIENT_SELECTION + select DRM_GEM_SHMEM_HELPER + select DRM_KMS_HELPER + select DRM_SYSFB_HELPER + help + DRM driver for coreboot-provided framebuffers. + + This driver assumes that the display hardware has been initialized + by coreboot firmware before the kernel boots. Scanout buffer, size, + and display format must be provided via coreboot framebuffer device. + config DRM_EFIDRM tristate "EFI framebuffer driver" depends on DRM && MMU && EFI && (!SYSFB_SIMPLEFB || COMPILE_TEST) diff --git a/drivers/gpu/drm/sysfb/Makefile b/drivers/gpu/drm/sysfb/Makefile index a156c496413d..85c9087ab03d 100644 --- a/drivers/gpu/drm/sysfb/Makefile +++ b/drivers/gpu/drm/sysfb/Makefile @@ -6,6 +6,7 @@ drm_sysfb_helper-y := \ drm_sysfb_helper-$(CONFIG_SCREEN_INFO) += drm_sysfb_screen_info.o obj-$(CONFIG_DRM_SYSFB_HELPER) += drm_sysfb_helper.o +obj-$(CONFIG_DRM_COREBOOTDRM) += corebootdrm.o obj-$(CONFIG_DRM_EFIDRM) += efidrm.o obj-$(CONFIG_DRM_OFDRM) += ofdrm.o obj-$(CONFIG_DRM_SIMPLEDRM) += simpledrm.o diff --git a/drivers/gpu/drm/sysfb/corebootdrm.c b/drivers/gpu/drm/sysfb/corebootdrm.c new file mode 100644 index 000000000000..745318580a5d --- /dev/null +++ b/drivers/gpu/drm/sysfb/corebootdrm.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "drm_sysfb_helper.h" + +#define DRIVER_NAME "corebootdrm" +#define DRIVER_DESC "DRM driver for Coreboot framebuffers" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 + +static const struct drm_format_info * +corebootdrm_get_format_fb(struct drm_device *dev, const struct lb_framebuffer *fb) +{ + static const struct drm_sysfb_format formats[] = { + { PIXEL_FORMAT_XRGB1555, DRM_FORMAT_XRGB1555, }, + { PIXEL_FORMAT_RGB565, DRM_FORMAT_RGB565, }, + { PIXEL_FORMAT_RGB888, DRM_FORMAT_RGB888, }, + { PIXEL_FORMAT_XRGB8888, DRM_FORMAT_XRGB8888, }, + { PIXEL_FORMAT_XBGR8888, DRM_FORMAT_XBGR8888, }, + { PIXEL_FORMAT_XRGB2101010, DRM_FORMAT_XRGB2101010, }, + }; + const struct pixel_format pixel = { + .bits_per_pixel = fb->bits_per_pixel, + .indexed = false, + .alpha = { + .offset = 0, + .length = 0, + }, + .red = { + .offset = fb->red_mask_pos, + .length = fb->red_mask_size, + }, + .green = { + .offset = fb->green_mask_pos, + .length = fb->green_mask_size, + }, + .blue = { + .offset = fb->blue_mask_pos, + .length = fb->blue_mask_size, + }, + }; + + return drm_sysfb_get_format(dev, formats, ARRAY_SIZE(formats), &pixel); +} + +static int corebootdrm_get_width_fb(struct drm_device *dev, const struct lb_framebuffer *fb) +{ + return drm_sysfb_get_validated_int0(dev, "width", fb->x_resolution, INT_MAX); +} + +static int corebootdrm_get_height_fb(struct drm_device *dev, const struct lb_framebuffer *fb) +{ + return drm_sysfb_get_validated_int0(dev, "height", fb->y_resolution, INT_MAX); +} + +static int corebootdrm_get_pitch_fb(struct drm_device *dev, const struct drm_format_info *format, + unsigned int width, const struct lb_framebuffer *fb) +{ + u64 bytes_per_line = fb->bytes_per_line; + + if (!bytes_per_line) + bytes_per_line = drm_format_info_min_pitch(format, 0, width); + + return drm_sysfb_get_validated_int0(dev, "pitch", bytes_per_line, INT_MAX); +} + +static resource_size_t corebootdrm_get_size_fb(struct drm_device *dev, unsigned int height, + unsigned int pitch, + const struct lb_framebuffer *fb) +{ + resource_size_t size; + + if (check_mul_overflow(height, pitch, &size)) + return 0; + + return size; +} + +static phys_addr_t corebootdrm_get_address_fb(struct drm_device *dev, resource_size_t size, + const struct lb_framebuffer *fb) +{ + if (size > PHYS_ADDR_MAX) + return 0; + if (!fb->physical_address) + return 0; + if (fb->physical_address > (PHYS_ADDR_MAX - size)) + return 0; + + return fb->physical_address; +} + +/* + * Simple Framebuffer device + */ + +struct corebootdrm_device { + struct drm_sysfb_device sysfb; + + /* modesetting */ + u32 formats[DRM_SYSFB_PLANE_NFORMATS(1)]; + struct drm_plane primary_plane; + struct drm_crtc crtc; + struct drm_encoder encoder; + struct drm_connector connector; +}; + +/* + * Modesetting + */ + +static const u64 corebootdrm_primary_plane_format_modifiers[] = { + DRM_SYSFB_PLANE_FORMAT_MODIFIERS, +}; + +static const struct drm_plane_helper_funcs corebootdrm_primary_plane_helper_funcs = { + DRM_SYSFB_PLANE_HELPER_FUNCS, +}; + +static const struct drm_plane_funcs corebootdrm_primary_plane_funcs = { + DRM_SYSFB_PLANE_FUNCS, + .destroy = drm_plane_cleanup, +}; + +static const struct drm_crtc_helper_funcs corebootdrm_crtc_helper_funcs = { + DRM_SYSFB_CRTC_HELPER_FUNCS, +}; + +static const struct drm_crtc_funcs corebootdrm_crtc_funcs = { + DRM_SYSFB_CRTC_FUNCS, + .destroy = drm_crtc_cleanup, +}; + +static const struct drm_encoder_funcs corebootdrm_encoder_funcs = { + .destroy = drm_encoder_cleanup, +}; + +static const struct drm_connector_helper_funcs corebootdrm_connector_helper_funcs = { + DRM_SYSFB_CONNECTOR_HELPER_FUNCS, +}; + +static const struct drm_connector_funcs corebootdrm_connector_funcs = { + DRM_SYSFB_CONNECTOR_FUNCS, + .destroy = drm_connector_cleanup, +}; + +static const struct drm_mode_config_funcs corebootdrm_mode_config_funcs = { + DRM_SYSFB_MODE_CONFIG_FUNCS, +}; + +static int corebootdrm_mode_config_init(struct corebootdrm_device *cdev) +{ + struct drm_sysfb_device *sysfb = &cdev->sysfb; + struct drm_device *dev = &sysfb->dev; + const struct drm_format_info *format = sysfb->fb_format; + unsigned int width = sysfb->fb_mode.hdisplay; + unsigned int height = sysfb->fb_mode.vdisplay; + struct drm_plane *primary_plane; + struct drm_crtc *crtc; + struct drm_encoder *encoder; + struct drm_connector *connector; + size_t nformats; + int ret; + + ret = drmm_mode_config_init(dev); + if (ret) + return ret; + + dev->mode_config.min_width = width; + dev->mode_config.max_width = max_t(unsigned int, width, DRM_SHADOW_PLANE_MAX_WIDTH); + dev->mode_config.min_height = height; + dev->mode_config.max_height = max_t(unsigned int, height, DRM_SHADOW_PLANE_MAX_HEIGHT); + dev->mode_config.funcs = &corebootdrm_mode_config_funcs; + dev->mode_config.preferred_depth = format->depth; + + /* Primary plane */ + + nformats = drm_sysfb_build_fourcc_list(dev, &format->format, 1, + cdev->formats, ARRAY_SIZE(cdev->formats)); + + primary_plane = &cdev->primary_plane; + ret = drm_universal_plane_init(dev, primary_plane, 0, &corebootdrm_primary_plane_funcs, + cdev->formats, nformats, + corebootdrm_primary_plane_format_modifiers, + DRM_PLANE_TYPE_PRIMARY, NULL); + if (ret) + return ret; + drm_plane_helper_add(primary_plane, &corebootdrm_primary_plane_helper_funcs); + drm_plane_enable_fb_damage_clips(primary_plane); + + /* CRTC */ + + crtc = &cdev->crtc; + ret = drm_crtc_init_with_planes(dev, crtc, primary_plane, NULL, + &corebootdrm_crtc_funcs, NULL); + if (ret) + return ret; + drm_crtc_helper_add(crtc, &corebootdrm_crtc_helper_funcs); + + /* Encoder */ + + encoder = &cdev->encoder; + ret = drm_encoder_init(dev, encoder, &corebootdrm_encoder_funcs, + DRM_MODE_ENCODER_NONE, NULL); + if (ret) + return ret; + encoder->possible_crtcs = drm_crtc_mask(crtc); + + /* Connector */ + + connector = &cdev->connector; + ret = drm_connector_init(dev, connector, &corebootdrm_connector_funcs, + DRM_MODE_CONNECTOR_Unknown); + if (ret) + return ret; + drm_connector_helper_add(connector, &corebootdrm_connector_helper_funcs); + drm_connector_set_panel_orientation_with_quirk(connector, + DRM_MODE_PANEL_ORIENTATION_UNKNOWN, + width, height); + + ret = drm_connector_attach_encoder(connector, encoder); + if (ret) + return ret; + + return 0; +} + +/* + * DRM driver + */ + +DEFINE_DRM_GEM_FOPS(corebootdrm_fops); + +static struct drm_driver corebootdrm_drm_driver = { + DRM_GEM_SHMEM_DRIVER_OPS, + DRM_FBDEV_SHMEM_DRIVER_OPS, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .driver_features = DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET, + .fops = &corebootdrm_fops, +}; + +/* + * Coreboot driver + */ + +static int corebootdrm_probe(struct platform_device *pdev) +{ + const struct lb_framebuffer *fb = dev_get_platdata(&pdev->dev); + struct corebootdrm_device *cdev; + struct drm_sysfb_device *sysfb; + struct drm_device *dev; + const struct drm_format_info *format; + int width, height, pitch; + resource_size_t size; + phys_addr_t address; + struct resource *res, *mem = NULL; + struct resource aperture; + void __iomem *screen_base; + int ret; + + cdev = devm_drm_dev_alloc(&pdev->dev, &corebootdrm_drm_driver, + struct corebootdrm_device, sysfb.dev); + if (IS_ERR(cdev)) + return PTR_ERR(cdev); + platform_set_drvdata(pdev, cdev); + + sysfb = &cdev->sysfb; + dev = &sysfb->dev; + + if (!fb) { + drm_err(dev, "coreboot framebuffer not found\n"); + return -EINVAL; + } else if (!LB_FRAMEBUFFER_HAS_LFB(fb)) { + drm_err(dev, "coreboot framebuffer entry too small\n"); + return -EINVAL; + } + + /* + * Hardware settings + */ + + format = corebootdrm_get_format_fb(dev, fb); + if (!format) + return -EINVAL; + width = corebootdrm_get_width_fb(dev, fb); + if (width < 0) + return width; + height = corebootdrm_get_height_fb(dev, fb); + if (height < 0) + return height; + pitch = corebootdrm_get_pitch_fb(dev, format, width, fb); + if (pitch < 0) + return pitch; + size = corebootdrm_get_size_fb(dev, height, pitch, fb); + if (!size) + return -EINVAL; + address = corebootdrm_get_address_fb(dev, size, fb); + if (!address) + return -EINVAL; + + sysfb->fb_mode = drm_sysfb_mode(width, height, 0, 0); + sysfb->fb_format = format; + sysfb->fb_pitch = pitch; + + drm_dbg(dev, "display mode={" DRM_MODE_FMT "}\n", DRM_MODE_ARG(&sysfb->fb_mode)); + drm_dbg(dev, "framebuffer format=%p4cc, size=%dx%d, pitch=%d byte\n", + &format->format, width, height, pitch); + + /* + * Memory management + */ + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + drm_err(dev, "memory resource not found\n"); + return -EINVAL; + } + + mem = devm_request_mem_region(&pdev->dev, res->start, resource_size(res), + dev->driver->name); + if (!mem) { + drm_warn(dev, "could not acquire memory resource at %pr\n", res); + /* + * We cannot make this fatal. Sometimes this comes from magic + * spaces our resource handlers simply don't know about. Use + * the memory resource as-is and try to map that instead. + */ + mem = res; + } + + drm_dbg(dev, "using memory resource at %pr\n", mem); + + aperture = DEFINE_RES_MEM(address, size); + if (!resource_contains(mem, &aperture)) { + drm_err(dev, "framebuffer aperture at invalid memory range %pr\n", &aperture); + return -EINVAL; + } + + ret = devm_aperture_acquire_for_platform_device(pdev, address, size); + if (ret) { + drm_err(dev, "could not acquire framebuffer aperture: %d\n", ret); + return ret; + } + + screen_base = devm_ioremap_wc(&pdev->dev, address, size); + if (!screen_base) + return -ENOMEM; + + iosys_map_set_vaddr_iomem(&sysfb->fb_addr, screen_base); + + /* + * DRM mode setting and registration + */ + + ret = corebootdrm_mode_config_init(cdev); + if (ret) + return ret; + + drm_mode_config_reset(dev); + + ret = drm_dev_register(dev, 0); + if (ret) + return ret; + + drm_client_setup(dev, sysfb->fb_format); + + return 0; +} + +static void corebootdrm_remove(struct platform_device *pdev) +{ + struct corebootdrm_device *cdev = platform_get_drvdata(pdev); + struct drm_device *dev = &cdev->sysfb.dev; + + drm_dev_unplug(dev); +} + +static struct platform_driver corebootdrm_platform_driver = { + .driver = { + .name = "coreboot-framebuffer", + }, + .probe = corebootdrm_probe, + .remove = corebootdrm_remove, +}; + +module_platform_driver(corebootdrm_platform_driver); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h index 5746b99a070d..885da106fee3 100644 --- a/include/linux/coreboot.h +++ b/include/linux/coreboot.h @@ -13,6 +13,7 @@ #define _LINUX_COREBOOT_H #include +#include #include typedef __aligned(4) u64 cb_u64; @@ -66,4 +67,11 @@ struct lb_framebuffer { u8 reserved_mask_size; }; +/* + * True if the coreboot-provided data is large enough to hold information + * on the linear framebuffer. False otherwise. + */ +#define LB_FRAMEBUFFER_HAS_LFB(__fb) \ + ((__fb)->size >= offsetofend(struct lb_framebuffer, reserved_mask_size)) + #endif /* _LINUX_COREBOOT_H */ -- cgit v1.2.3 From 058fc04b8587ad07a86dfa8f99d8d99db0a55443 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 17 Feb 2026 16:56:22 +0100 Subject: drm/sysfb: corebootdrm: Support panel orientation Add fields and constants for coreboot framebuffer orientation. Set corebootdrm's DRM connector state from the values. Not all firmware provides orientation, so make it optional. Systems without, continue to use unknown orientation. v3: - comment on _HAS_ORIENTATION semantics (Tzung-Bi) Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Acked-by: Julius Werner Acked-by: Tzung-Bi Shih # coreboot Link: https://patch.msgid.link/20260217155836.96267-13-tzimmermann@suse.de --- drivers/gpu/drm/sysfb/corebootdrm.c | 30 ++++++++++++++++++++++++++---- include/linux/coreboot.h | 13 +++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/sysfb/corebootdrm.c b/drivers/gpu/drm/sysfb/corebootdrm.c index 745318580a5d..5dc6f3c76f7b 100644 --- a/drivers/gpu/drm/sysfb/corebootdrm.c +++ b/drivers/gpu/drm/sysfb/corebootdrm.c @@ -110,6 +110,26 @@ static phys_addr_t corebootdrm_get_address_fb(struct drm_device *dev, resource_s return fb->physical_address; } +static enum drm_panel_orientation corebootdrm_get_orientation_fb(struct drm_device *dev, + const struct lb_framebuffer *fb) +{ + if (!LB_FRAMEBUFFER_HAS_ORIENTATION(fb)) + return DRM_MODE_PANEL_ORIENTATION_UNKNOWN; + + switch (fb->orientation) { + case LB_FRAMEBUFFER_ORIENTATION_NORMAL: + return DRM_MODE_PANEL_ORIENTATION_NORMAL; + case LB_FRAMEBUFFER_ORIENTATION_BOTTOM_UP: + return DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP; + case LB_FRAMEBUFFER_ORIENTATION_LEFT_UP: + return DRM_MODE_PANEL_ORIENTATION_LEFT_UP; + case LB_FRAMEBUFFER_ORIENTATION_RIGHT_UP: + return DRM_MODE_PANEL_ORIENTATION_RIGHT_UP; + } + + return DRM_MODE_PANEL_ORIENTATION_UNKNOWN; +} + /* * Simple Framebuffer device */ @@ -168,7 +188,8 @@ static const struct drm_mode_config_funcs corebootdrm_mode_config_funcs = { DRM_SYSFB_MODE_CONFIG_FUNCS, }; -static int corebootdrm_mode_config_init(struct corebootdrm_device *cdev) +static int corebootdrm_mode_config_init(struct corebootdrm_device *cdev, + enum drm_panel_orientation orientation) { struct drm_sysfb_device *sysfb = &cdev->sysfb; struct drm_device *dev = &sysfb->dev; @@ -234,8 +255,7 @@ static int corebootdrm_mode_config_init(struct corebootdrm_device *cdev) if (ret) return ret; drm_connector_helper_add(connector, &corebootdrm_connector_helper_funcs); - drm_connector_set_panel_orientation_with_quirk(connector, - DRM_MODE_PANEL_ORIENTATION_UNKNOWN, + drm_connector_set_panel_orientation_with_quirk(connector, orientation, width, height); ret = drm_connector_attach_encoder(connector, encoder); @@ -276,6 +296,7 @@ static int corebootdrm_probe(struct platform_device *pdev) int width, height, pitch; resource_size_t size; phys_addr_t address; + enum drm_panel_orientation orientation; struct resource *res, *mem = NULL; struct resource aperture; void __iomem *screen_base; @@ -320,6 +341,7 @@ static int corebootdrm_probe(struct platform_device *pdev) address = corebootdrm_get_address_fb(dev, size, fb); if (!address) return -EINVAL; + orientation = corebootdrm_get_orientation_fb(dev, fb); sysfb->fb_mode = drm_sysfb_mode(width, height, 0, 0); sysfb->fb_format = format; @@ -375,7 +397,7 @@ static int corebootdrm_probe(struct platform_device *pdev) * DRM mode setting and registration */ - ret = corebootdrm_mode_config_init(cdev); + ret = corebootdrm_mode_config_init(cdev, orientation); if (ret) return ret; diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h index 885da106fee3..5d40ca7a1d89 100644 --- a/include/linux/coreboot.h +++ b/include/linux/coreboot.h @@ -47,6 +47,11 @@ struct lb_cbmem_entry { u32 id; }; +#define LB_FRAMEBUFFER_ORIENTATION_NORMAL 0 +#define LB_FRAMEBUFFER_ORIENTATION_BOTTOM_UP 1 +#define LB_FRAMEBUFFER_ORIENTATION_LEFT_UP 2 +#define LB_FRAMEBUFFER_ORIENTATION_RIGHT_UP 3 + /* Describes framebuffer setup by coreboot */ struct lb_framebuffer { u32 tag; @@ -65,6 +70,7 @@ struct lb_framebuffer { u8 blue_mask_size; u8 reserved_mask_pos; u8 reserved_mask_size; + u8 orientation; }; /* @@ -74,4 +80,11 @@ struct lb_framebuffer { #define LB_FRAMEBUFFER_HAS_LFB(__fb) \ ((__fb)->size >= offsetofend(struct lb_framebuffer, reserved_mask_size)) +/* + * True if the coreboot-provided data is large enough to hold information + * on the display orientation. False otherwise. + */ +#define LB_FRAMEBUFFER_HAS_ORIENTATION(__fb) \ + ((__fb)->size >= offsetofend(struct lb_framebuffer, orientation)) + #endif /* _LINUX_COREBOOT_H */ -- cgit v1.2.3 From 77ae37018a2705f5abe8cc428e3496651258901d Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Fri, 6 Feb 2026 09:04:12 +0800 Subject: drm/bridge: synopsys: dw-dp: Set pixel mode by platform data In the implementation and integration of the SoC, the DW DisplayPort hardware block can be configured to work in single, dual, quad pixel mode on differnt platforms, so make the pixel mode set by plat_data to support the upcoming rk3576 variant. Signed-off-by: Andy Yan Reviewed-by: Sebastian Reichel Tested-by: Sebastian Reichel Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20260206010421.443605-3-andyshrk@163.com --- drivers/gpu/drm/bridge/synopsys/dw-dp.c | 8 +------- drivers/gpu/drm/rockchip/dw_dp-rockchip.c | 19 +++++++++++++++---- include/drm/bridge/dw_dp.h | 7 +++++++ 3 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/bridge/synopsys/dw-dp.c b/drivers/gpu/drm/bridge/synopsys/dw-dp.c index 432342452484..ccc0d7c85645 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-dp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-dp.c @@ -352,12 +352,6 @@ enum { DW_DP_YCBCR420_16BIT, }; -enum { - DW_DP_MP_SINGLE_PIXEL, - DW_DP_MP_DUAL_PIXEL, - DW_DP_MP_QUAD_PIXEL, -}; - enum { DW_DP_SDP_VERTICAL_INTERVAL = BIT(0), DW_DP_SDP_HORIZONTAL_INTERVAL = BIT(1), @@ -1984,7 +1978,7 @@ struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder, return ERR_CAST(dp); dp->dev = dev; - dp->pixel_mode = DW_DP_MP_QUAD_PIXEL; + dp->pixel_mode = plat_data->pixel_mode; dp->plat_data.max_link_rate = plat_data->max_link_rate; bridge = &dp->bridge; diff --git a/drivers/gpu/drm/rockchip/dw_dp-rockchip.c b/drivers/gpu/drm/rockchip/dw_dp-rockchip.c index 25ab4e46301e..89d614d53596 100644 --- a/drivers/gpu/drm/rockchip/dw_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_dp-rockchip.c @@ -75,7 +75,7 @@ static const struct drm_encoder_helper_funcs dw_dp_encoder_helper_funcs = { static int dw_dp_rockchip_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); - struct dw_dp_plat_data plat_data; + const struct dw_dp_plat_data *plat_data; struct drm_device *drm_dev = data; struct rockchip_dw_dp *dp; struct drm_encoder *encoder; @@ -89,7 +89,10 @@ static int dw_dp_rockchip_bind(struct device *dev, struct device *master, void * dp->dev = dev; platform_set_drvdata(pdev, dp); - plat_data.max_link_rate = 810000; + plat_data = of_device_get_match_data(dev); + if (!plat_data) + return -ENODEV; + encoder = &dp->encoder.encoder; encoder->possible_crtcs = drm_of_find_possible_crtcs(drm_dev, dev->of_node); rockchip_drm_encoder_set_crtc_endpoint_id(&dp->encoder, dev->of_node, 0, 0); @@ -99,7 +102,7 @@ static int dw_dp_rockchip_bind(struct device *dev, struct device *master, void * return ret; drm_encoder_helper_add(encoder, &dw_dp_encoder_helper_funcs); - dp->base = dw_dp_bind(dev, encoder, &plat_data); + dp->base = dw_dp_bind(dev, encoder, plat_data); if (IS_ERR(dp->base)) { ret = PTR_ERR(dp->base); return ret; @@ -134,8 +137,16 @@ static void dw_dp_remove(struct platform_device *pdev) component_del(dp->dev, &dw_dp_rockchip_component_ops); } +static const struct dw_dp_plat_data rk3588_dp_plat_data = { + .max_link_rate = 810000, + .pixel_mode = DW_DP_MP_QUAD_PIXEL, +}; + static const struct of_device_id dw_dp_of_match[] = { - { .compatible = "rockchip,rk3588-dp", }, + { + .compatible = "rockchip,rk3588-dp", + .data = &rk3588_dp_plat_data, + }, {} }; MODULE_DEVICE_TABLE(of, dw_dp_of_match); diff --git a/include/drm/bridge/dw_dp.h b/include/drm/bridge/dw_dp.h index d05df49fd884..25363541e69d 100644 --- a/include/drm/bridge/dw_dp.h +++ b/include/drm/bridge/dw_dp.h @@ -11,8 +11,15 @@ struct drm_encoder; struct dw_dp; +enum { + DW_DP_MP_SINGLE_PIXEL, + DW_DP_MP_DUAL_PIXEL, + DW_DP_MP_QUAD_PIXEL, +}; + struct dw_dp_plat_data { u32 max_link_rate; + u8 pixel_mode; }; struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder, -- cgit v1.2.3 From 5cab6d386bd30c3bb4efceb05b25842a6f144693 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 12 Feb 2026 14:55:29 +0530 Subject: drm/buddy: Add kernel-doc for allocator structures and flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing kernel-doc for GPU buddy allocator flags, gpu_buddy_block, and gpu_buddy. The documentation covers block header fields, allocator roots, free trees, and allocation flags such as RANGE, TOPDOWN, CONTIGUOUS, CLEAR, and TRIM_DISABLE. Private members are marked with kernel-doc private markers and documented with regular comments. No functional changes. v2: - Corrected GPU_BUDDY_CLEAR_TREE and GPU_BUDDY_DIRTY_TREE index values (Arun) - Rebased after DRM buddy allocator moved to drivers/gpu/ - Updated commit message v3: - Document reserved bits 8:6 in header layout (Arun) - Fix checkpatch warning Cc: Christian König Cc: Arunpravin Paneer Selvam Suggested-by: Matthew Auld Signed-off-by: Sanjay Yadav Reviewed-by: Arunpravin Paneer Selvam Signed-off-by: Arunpravin Paneer Selvam Link: https://patch.msgid.link/20260212092527.718455-5-sanjay.kumar.yadav@intel.com --- include/linux/gpu_buddy.h | 123 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 103 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h index 07ac65db6d2e..bf2a42256536 100644 --- a/include/linux/gpu_buddy.h +++ b/include/linux/gpu_buddy.h @@ -12,11 +12,58 @@ #include #include +/** + * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range + * + * When set, allocation is restricted to the range [start, end) specified + * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored + * and allocation can use any free space. + */ #define GPU_BUDDY_RANGE_ALLOCATION BIT(0) + +/** + * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space + * + * Allocate starting from high addresses and working down. Useful for + * separating different allocation types (e.g., kernel vs userspace) + * to reduce fragmentation. + */ #define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1) + +/** + * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks + * + * The allocation must be satisfied with a single contiguous block. + * If the requested size cannot be allocated contiguously, the + * allocation fails with -ENOSPC. + */ #define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) + +/** + * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory + * + * Attempt to allocate from the clear tree first. If insufficient clear + * memory is available, falls back to dirty memory. Useful when the + * caller needs zeroed memory and wants to avoid GPU clear operations. + */ #define GPU_BUDDY_CLEAR_ALLOCATION BIT(3) + +/** + * GPU_BUDDY_CLEARED - Mark returned blocks as cleared + * + * Used with gpu_buddy_free_list() to indicate that the memory being + * freed has been cleared (zeroed). The blocks will be placed in the + * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests. + */ #define GPU_BUDDY_CLEARED BIT(4) + +/** + * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming + * + * By default, if an allocation is smaller than the allocated block, + * excess memory is trimmed and returned to the free pool. This flag + * disables trimming, keeping the full power-of-two block size. + */ #define GPU_BUDDY_TRIM_DISABLE BIT(5) enum gpu_buddy_free_tree { @@ -28,7 +75,28 @@ enum gpu_buddy_free_tree { #define for_each_free_tree(tree) \ for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++) +/** + * struct gpu_buddy_block - Block within a buddy allocator + * + * Each block in the buddy allocator is represented by this structure. + * Blocks are organized in a binary tree where each parent block can be + * split into two children (left and right buddies). The allocator manages + * blocks at various orders (power-of-2 sizes) from chunk_size up to the + * largest contiguous region. + * + * @private: Private data owned by the allocator user (e.g., driver-specific data) + * @link: List node for user ownership while block is allocated + */ struct gpu_buddy_block { +/* private: */ + /* + * Header bit layout: + * - Bits 63:12: block offset within the address space + * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT) + * - Bit 9: clear bit (1 if memory is zeroed) + * - Bits 8:6: reserved + * - Bits 5:0: order (log2 of size relative to chunk_size) + */ #define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) #define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) #define GPU_BUDDY_ALLOCATED (1 << 10) @@ -43,7 +111,7 @@ struct gpu_buddy_block { struct gpu_buddy_block *left; struct gpu_buddy_block *right; struct gpu_buddy_block *parent; - +/* public: */ void *private; /* owned by creator */ /* @@ -53,43 +121,58 @@ struct gpu_buddy_block { * gpu_buddy_free* ownership is given back to the mm. */ union { +/* private: */ struct rb_node rb; +/* public: */ struct list_head link; }; - +/* private: */ struct list_head tmp_link; }; /* Order-zero must be at least SZ_4K */ #define GPU_BUDDY_MAX_ORDER (63 - 12) -/* - * Binary Buddy System. +/** + * struct gpu_buddy - GPU binary buddy allocator + * + * The buddy allocator provides efficient power-of-two memory allocation + * with fast allocation and free operations. It is commonly used for GPU + * memory management where allocations can be split into power-of-two + * block sizes. * - * Locking should be handled by the user, a simple mutex around - * gpu_buddy_alloc* and gpu_buddy_free* should suffice. + * Locking should be handled by the user; a simple mutex around + * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list() + * should suffice. + * + * @n_roots: Number of root blocks in the roots array. + * @max_order: Maximum block order (log2 of largest block size / chunk_size). + * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K. + * @size: Total size of the address space managed by this allocator in bytes. + * @avail: Total free space currently available for allocation in bytes. + * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes. + * This is a subset of @avail. */ struct gpu_buddy { - /* Maintain a free list for each order. */ - struct rb_root **free_trees; - +/* private: */ /* - * Maintain explicit binary tree(s) to track the allocation of the - * address space. This gives us a simple way of finding a buddy block - * and performing the potentially recursive merge step when freeing a - * block. Nodes are either allocated or free, in which case they will - * also exist on the respective free list. + * Array of red-black trees for free block management. + * Indexed as free_trees[clear/dirty][order] where: + * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content + * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content + * Each tree holds free blocks of the corresponding order. */ - struct gpu_buddy_block **roots; - + struct rb_root **free_trees; /* - * Anything from here is public, and remains static for the lifetime of - * the mm. Everything above is considered do-not-touch. + * Array of root blocks representing the top-level blocks of the + * binary tree(s). Multiple roots exist when the total size is not + * a power of two, with each root being the largest power-of-two + * that fits in the remaining space. */ + struct gpu_buddy_block **roots; +/* public: */ unsigned int n_roots; unsigned int max_order; - - /* Must be at least SZ_4K */ u64 chunk_size; u64 size; u64 avail; -- cgit v1.2.3 From df8c7892e06efa5df2aa780a338f33a4f666370b Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 12 Feb 2026 14:55:30 +0530 Subject: drm/buddy: Move internal helpers to buddy.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move gpu_buddy_block_state(), gpu_buddy_block_is_allocated(), and gpu_buddy_block_is_split() from gpu_buddy.h to gpu_buddy.c as static functions since they have no external callers. Remove gpu_get_buddy() as it was an unused exported wrapper around the internal __get_buddy(). No functional changes. v2: - Rebased after DRM buddy allocator moved to drivers/gpu/ - Keep gpu_buddy_block_is_free() in header since it's now used by drm_buddy.c - Updated commit message Cc: Christian König Cc: Arunpravin Paneer Selvam Suggested-by: Matthew Auld Signed-off-by: Sanjay Yadav Reviewed-by: Arunpravin Paneer Selvam Signed-off-by: Arunpravin Paneer Selvam Link: https://patch.msgid.link/20260212092527.718455-6-sanjay.kumar.yadav@intel.com --- drivers/gpu/buddy.c | 35 ++++++++++++++++++----------------- include/linux/gpu_buddy.h | 25 ++----------------------- 2 files changed, 20 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c index 603c59a2013a..b27761246d4b 100644 --- a/drivers/gpu/buddy.c +++ b/drivers/gpu/buddy.c @@ -14,6 +14,24 @@ static struct kmem_cache *slab_blocks; +static unsigned int +gpu_buddy_block_state(struct gpu_buddy_block *block) +{ + return block->header & GPU_BUDDY_HEADER_STATE; +} + +static bool +gpu_buddy_block_is_allocated(struct gpu_buddy_block *block) +{ + return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED; +} + +static bool +gpu_buddy_block_is_split(struct gpu_buddy_block *block) +{ + return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT; +} + static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm, struct gpu_buddy_block *parent, unsigned int order, @@ -449,23 +467,6 @@ static int split_block(struct gpu_buddy *mm, return 0; } -/** - * gpu_get_buddy - get buddy address - * - * @block: GPU buddy block - * - * Returns the corresponding buddy block for @block, or NULL - * if this is a root block and can't be merged further. - * Requires some kind of locking to protect against - * any concurrent allocate and free operations. - */ -struct gpu_buddy_block * -gpu_get_buddy(struct gpu_buddy_block *block) -{ - return __get_buddy(block); -} -EXPORT_SYMBOL(gpu_get_buddy); - /** * gpu_buddy_reset_clear - reset blocks clear state * diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h index bf2a42256536..f1fb6eff604a 100644 --- a/include/linux/gpu_buddy.h +++ b/include/linux/gpu_buddy.h @@ -191,16 +191,10 @@ gpu_buddy_block_order(struct gpu_buddy_block *block) return block->header & GPU_BUDDY_HEADER_ORDER; } -static inline unsigned int -gpu_buddy_block_state(struct gpu_buddy_block *block) -{ - return block->header & GPU_BUDDY_HEADER_STATE; -} - static inline bool -gpu_buddy_block_is_allocated(struct gpu_buddy_block *block) +gpu_buddy_block_is_free(struct gpu_buddy_block *block) { - return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED; + return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE; } static inline bool @@ -209,18 +203,6 @@ gpu_buddy_block_is_clear(struct gpu_buddy_block *block) return block->header & GPU_BUDDY_HEADER_CLEAR; } -static inline bool -gpu_buddy_block_is_free(struct gpu_buddy_block *block) -{ - return gpu_buddy_block_state(block) == GPU_BUDDY_FREE; -} - -static inline bool -gpu_buddy_block_is_split(struct gpu_buddy_block *block) -{ - return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT; -} - static inline u64 gpu_buddy_block_size(struct gpu_buddy *mm, struct gpu_buddy_block *block) @@ -232,9 +214,6 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size); void gpu_buddy_fini(struct gpu_buddy *mm); -struct gpu_buddy_block * -gpu_get_buddy(struct gpu_buddy_block *block); - int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, u64 start, u64 end, u64 size, u64 min_page_size, -- cgit v1.2.3 From f4cc3ab824d6772a48ca9d9c74ac623b3309985d Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 7 Oct 2025 14:06:05 +0200 Subject: dma-buf: protected fence ops by RCU v8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fence ops of a dma_fence currently need to life as long as the dma_fence is alive. This means that the module which originally issued a dma_fence can't unload unless all fences are freed up. As first step to solve this issue protect the fence ops by RCU. While it is counter intuitive to protect a constant function pointer table by RCU it allows modules to wait for an RCU grace period before they unload, to make sure that nobody is executing their functions any more. This patch has not much functional change, but only adds the RCU handling for the static checker to test. v2: make one the now duplicated lockdep warnings a comment instead. v3: Add more documentation to ->wait and ->release callback. v4: fix typo in documentation v5: rebased on drm-tip v6: improve code comments v7: improve commit message and code comments v8: fix sparse rcu warnings Signed-off-by: Christian König Reviewed-by: Tvrtko Ursulin Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/r/20260219160822.1529-2-christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 71 +++++++++++++++++++++++---------- drivers/gpu/drm/drm_crtc.c | 2 +- drivers/gpu/drm/scheduler/sched_fence.c | 4 +- include/linux/dma-fence.h | 33 ++++++++++++--- 4 files changed, 80 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 7e8db99186c2..076e6e6c75be 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -522,6 +522,7 @@ EXPORT_SYMBOL(dma_fence_signal); signed long dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) { + const struct dma_fence_ops *ops; signed long ret; if (WARN_ON(timeout < 0)) @@ -533,15 +534,22 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) dma_fence_enable_sw_signaling(fence); - if (trace_dma_fence_wait_start_enabled()) { - rcu_read_lock(); - trace_dma_fence_wait_start(fence); + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + trace_dma_fence_wait_start(fence); + if (ops->wait) { + /* + * Implementing the wait ops is deprecated and not supported for + * issuers of fences who need their lifetime to be independent + * of their module after they signal, so it is ok to use the + * ops outside the RCU protected section. + */ + rcu_read_unlock(); + ret = ops->wait(fence, intr, timeout); + } else { rcu_read_unlock(); - } - if (fence->ops->wait) - ret = fence->ops->wait(fence, intr, timeout); - else ret = dma_fence_default_wait(fence, intr, timeout); + } if (trace_dma_fence_wait_end_enabled()) { rcu_read_lock(); trace_dma_fence_wait_end(fence); @@ -562,6 +570,7 @@ void dma_fence_release(struct kref *kref) { struct dma_fence *fence = container_of(kref, struct dma_fence, refcount); + const struct dma_fence_ops *ops; rcu_read_lock(); trace_dma_fence_destroy(fence); @@ -593,12 +602,12 @@ void dma_fence_release(struct kref *kref) spin_unlock_irqrestore(fence->lock, flags); } - rcu_read_unlock(); - - if (fence->ops->release) - fence->ops->release(fence); + ops = rcu_dereference(fence->ops); + if (ops->release) + ops->release(fence); else dma_fence_free(fence); + rcu_read_unlock(); } EXPORT_SYMBOL(dma_fence_release); @@ -617,6 +626,7 @@ EXPORT_SYMBOL(dma_fence_free); static bool __dma_fence_enable_signaling(struct dma_fence *fence) { + const struct dma_fence_ops *ops; bool was_set; lockdep_assert_held(fence->lock); @@ -627,14 +637,18 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence) if (dma_fence_test_signaled_flag(fence)) return false; - if (!was_set && fence->ops->enable_signaling) { + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (!was_set && ops->enable_signaling) { trace_dma_fence_enable_signal(fence); - if (!fence->ops->enable_signaling(fence)) { + if (!ops->enable_signaling(fence)) { + rcu_read_unlock(); dma_fence_signal_locked(fence); return false; } } + rcu_read_unlock(); return true; } @@ -1007,8 +1021,13 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout); */ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) { - if (fence->ops->set_deadline && !dma_fence_is_signaled(fence)) - fence->ops->set_deadline(fence, deadline); + const struct dma_fence_ops *ops; + + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (ops->set_deadline && !dma_fence_is_signaled(fence)) + ops->set_deadline(fence, deadline); + rcu_read_unlock(); } EXPORT_SYMBOL(dma_fence_set_deadline); @@ -1049,7 +1068,13 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); kref_init(&fence->refcount); - fence->ops = ops; + /* + * While it is counter intuitive to protect a constant function pointer + * table by RCU it allows modules to wait for an RCU grace period + * before they unload, to make sure that nobody is executing their + * functions any more. + */ + RCU_INIT_POINTER(fence->ops, ops); INIT_LIST_HEAD(&fence->cb_list); fence->lock = lock; fence->context = context; @@ -1129,11 +1154,12 @@ EXPORT_SYMBOL(dma_fence_init64); */ const char __rcu *dma_fence_driver_name(struct dma_fence *fence) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "RCU protection is required for safe access to returned string"); + const struct dma_fence_ops *ops; + /* RCU protection is required for safe access to returned string */ + ops = rcu_dereference(fence->ops); if (!dma_fence_test_signaled_flag(fence)) - return (const char __rcu *)fence->ops->get_driver_name(fence); + return (const char __rcu *)ops->get_driver_name(fence); else return (const char __rcu *)"detached-driver"; } @@ -1161,11 +1187,12 @@ EXPORT_SYMBOL(dma_fence_driver_name); */ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "RCU protection is required for safe access to returned string"); + const struct dma_fence_ops *ops; + /* RCU protection is required for safe access to returned string */ + ops = rcu_dereference(fence->ops); if (!dma_fence_test_signaled_flag(fence)) - return (const char __rcu *)fence->ops->get_driver_name(fence); + return (const char __rcu *)ops->get_driver_name(fence); else return (const char __rcu *)"signaled-timeline"; } diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 90684f30a048..960fdc1cc6ba 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -158,7 +158,7 @@ static const struct dma_fence_ops drm_crtc_fence_ops; static struct drm_crtc *fence_to_crtc(struct dma_fence *fence) { - BUG_ON(fence->ops != &drm_crtc_fence_ops); + BUG_ON(rcu_access_pointer(fence->ops) != &drm_crtc_fence_ops); return container_of(fence->lock, struct drm_crtc, fence_lock); } diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 9391d6f0dc01..a27786cb86fb 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -195,10 +195,10 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = { struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) { - if (f->ops == &drm_sched_fence_ops_scheduled) + if (rcu_access_pointer(f->ops) == &drm_sched_fence_ops_scheduled) return container_of(f, struct drm_sched_fence, scheduled); - if (f->ops == &drm_sched_fence_ops_finished) + if (rcu_access_pointer(f->ops) == &drm_sched_fence_ops_finished) return container_of(f, struct drm_sched_fence, finished); return NULL; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 9c4d25289239..fa3cfe3e98ac 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -67,7 +67,7 @@ struct seq_file; */ struct dma_fence { spinlock_t *lock; - const struct dma_fence_ops *ops; + const struct dma_fence_ops __rcu *ops; /* * We clear the callback list on kref_put so that by the time we * release the fence it is unused. No one should be adding to the @@ -220,6 +220,10 @@ struct dma_fence_ops { * timed out. Can also return other error values on custom implementations, * which should be treated as if the fence is signaled. For example a hardware * lockup could be reported like that. + * + * Implementing this callback prevents the fence from detaching after + * signaling and so it is necessary for the module providing the + * dma_fence_ops to stay loaded as long as the dma_fence exists. */ signed long (*wait)(struct dma_fence *fence, bool intr, signed long timeout); @@ -231,6 +235,13 @@ struct dma_fence_ops { * Can be called from irq context. This callback is optional. If it is * NULL, then dma_fence_free() is instead called as the default * implementation. + * + * Implementing this callback prevents the fence from detaching after + * signaling and so it is necessary for the module providing the + * dma_fence_ops to stay loaded as long as the dma_fence exists. + * + * If the callback is implemented the memory backing the dma_fence + * object must be freed RCU safe. */ void (*release)(struct dma_fence *fence); @@ -454,13 +465,19 @@ dma_fence_test_signaled_flag(struct dma_fence *fence) static inline bool dma_fence_is_signaled_locked(struct dma_fence *fence) { + const struct dma_fence_ops *ops; + if (dma_fence_test_signaled_flag(fence)) return true; - if (fence->ops->signaled && fence->ops->signaled(fence)) { + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (ops->signaled && ops->signaled(fence)) { + rcu_read_unlock(); dma_fence_signal_locked(fence); return true; } + rcu_read_unlock(); return false; } @@ -484,13 +501,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence) static inline bool dma_fence_is_signaled(struct dma_fence *fence) { + const struct dma_fence_ops *ops; + if (dma_fence_test_signaled_flag(fence)) return true; - if (fence->ops->signaled && fence->ops->signaled(fence)) { + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (ops->signaled && ops->signaled(fence)) { + rcu_read_unlock(); dma_fence_signal(fence); return true; } + rcu_read_unlock(); return false; } @@ -695,7 +718,7 @@ extern const struct dma_fence_ops dma_fence_chain_ops; */ static inline bool dma_fence_is_array(struct dma_fence *fence) { - return fence->ops == &dma_fence_array_ops; + return rcu_access_pointer(fence->ops) == &dma_fence_array_ops; } /** @@ -706,7 +729,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence) */ static inline bool dma_fence_is_chain(struct dma_fence *fence) { - return fence->ops == &dma_fence_chain_ops; + return rcu_access_pointer(fence->ops) == &dma_fence_chain_ops; } /** -- cgit v1.2.3 From 541c8f2468b933acc5d129e84bd264923675a66e Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 8 Oct 2025 18:12:46 +0200 Subject: dma-buf: detach fence ops on signal v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When neither a release nor a wait backend ops is specified it is possible to let the dma_fence live on independently of the module who issued it. This makes it possible to unload drivers and only wait for all their fences to signal. v2: fix typo in comment v3: fix sparse rcu warnings Signed-off-by: Christian König Reviewed-by: Tvrtko Ursulin Reviewed-by: Philipp Stanner Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/r/20260219160822.1529-3-christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 18 ++++++++++++++---- include/linux/dma-fence.h | 4 ++-- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 076e6e6c75be..3279d82ffa98 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -362,6 +362,7 @@ void __dma_fence_might_wait(void) void dma_fence_signal_timestamp_locked(struct dma_fence *fence, ktime_t timestamp) { + const struct dma_fence_ops *ops; struct dma_fence_cb *cur, *tmp; struct list_head cb_list; @@ -371,6 +372,15 @@ void dma_fence_signal_timestamp_locked(struct dma_fence *fence, &fence->flags))) return; + /* + * When neither a release nor a wait operation is specified set the ops + * pointer to NULL to allow the fence structure to become independent + * from who originally issued it. + */ + ops = rcu_dereference_protected(fence->ops, true); + if (!ops->release && !ops->wait) + RCU_INIT_POINTER(fence->ops, NULL); + /* Stash the cb_list before replacing it with the timestamp */ list_replace(&fence->cb_list, &cb_list); @@ -537,7 +547,7 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) rcu_read_lock(); ops = rcu_dereference(fence->ops); trace_dma_fence_wait_start(fence); - if (ops->wait) { + if (ops && ops->wait) { /* * Implementing the wait ops is deprecated and not supported for * issuers of fences who need their lifetime to be independent @@ -603,7 +613,7 @@ void dma_fence_release(struct kref *kref) } ops = rcu_dereference(fence->ops); - if (ops->release) + if (ops && ops->release) ops->release(fence); else dma_fence_free(fence); @@ -639,7 +649,7 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence) rcu_read_lock(); ops = rcu_dereference(fence->ops); - if (!was_set && ops->enable_signaling) { + if (!was_set && ops && ops->enable_signaling) { trace_dma_fence_enable_signal(fence); if (!ops->enable_signaling(fence)) { @@ -1025,7 +1035,7 @@ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) rcu_read_lock(); ops = rcu_dereference(fence->ops); - if (ops->set_deadline && !dma_fence_is_signaled(fence)) + if (ops && ops->set_deadline && !dma_fence_is_signaled(fence)) ops->set_deadline(fence, deadline); rcu_read_unlock(); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index fa3cfe3e98ac..9ff2c4a09cdc 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -472,7 +472,7 @@ dma_fence_is_signaled_locked(struct dma_fence *fence) rcu_read_lock(); ops = rcu_dereference(fence->ops); - if (ops->signaled && ops->signaled(fence)) { + if (ops && ops->signaled && ops->signaled(fence)) { rcu_read_unlock(); dma_fence_signal_locked(fence); return true; @@ -508,7 +508,7 @@ dma_fence_is_signaled(struct dma_fence *fence) rcu_read_lock(); ops = rcu_dereference(fence->ops); - if (ops->signaled && ops->signaled(fence)) { + if (ops && ops->signaled && ops->signaled(fence)) { rcu_read_unlock(); dma_fence_signal(fence); return true; -- cgit v1.2.3 From 3e5067931b5df667f5350fafe4410554e228e53e Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 9 Oct 2025 10:40:06 +0200 Subject: dma-buf: abstract fence locking v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dma_fence_lock_irqsafe() and dma_fence_unlock_irqrestore() wrappers and mechanically apply them everywhere. Just a pre-requisite cleanup for a follow up patch. v2: add some missing i915 bits, add abstraction for lockdep assertion as well v3: one more suggestion by Tvrtko Signed-off-by: Christian König Reviewed-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20260219160822.1529-4-christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 48 +++++++++++++---------------- drivers/dma-buf/st-dma-fence.c | 6 ++-- drivers/dma-buf/sw_sync.c | 14 ++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 2 +- drivers/gpu/drm/i915/i915_active.c | 19 +++++++----- drivers/gpu/drm/nouveau/nouveau_drm.c | 5 +-- drivers/gpu/drm/scheduler/sched_fence.c | 6 ++-- drivers/gpu/drm/xe/xe_sched_job.c | 4 +-- include/linux/dma-fence.h | 38 +++++++++++++++++++++++ 12 files changed, 96 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 3279d82ffa98..698260c49f52 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -366,7 +366,7 @@ void dma_fence_signal_timestamp_locked(struct dma_fence *fence, struct dma_fence_cb *cur, *tmp; struct list_head cb_list; - lockdep_assert_held(fence->lock); + dma_fence_assert_held(fence); if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) @@ -414,9 +414,9 @@ void dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) if (WARN_ON(!fence)) return; - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); dma_fence_signal_timestamp_locked(fence, timestamp); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); } EXPORT_SYMBOL(dma_fence_signal_timestamp); @@ -475,9 +475,9 @@ bool dma_fence_check_and_signal(struct dma_fence *fence) unsigned long flags; bool ret; - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); ret = dma_fence_check_and_signal_locked(fence); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); return ret; } @@ -503,9 +503,9 @@ void dma_fence_signal(struct dma_fence *fence) tmp = dma_fence_begin_signalling(); - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); dma_fence_signal_timestamp_locked(fence, ktime_get()); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); dma_fence_end_signalling(tmp); } @@ -606,10 +606,10 @@ void dma_fence_release(struct kref *kref) * don't leave chains dangling. We set the error flag first * so that the callbacks know this signal is due to an error. */ - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); fence->error = -EDEADLK; dma_fence_signal_locked(fence); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); } ops = rcu_dereference(fence->ops); @@ -639,7 +639,7 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence) const struct dma_fence_ops *ops; bool was_set; - lockdep_assert_held(fence->lock); + dma_fence_assert_held(fence); was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); @@ -675,9 +675,9 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence) { unsigned long flags; - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); __dma_fence_enable_signaling(fence); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); } EXPORT_SYMBOL(dma_fence_enable_sw_signaling); @@ -717,8 +717,7 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, return -ENOENT; } - spin_lock_irqsave(fence->lock, flags); - + dma_fence_lock_irqsave(fence, flags); if (__dma_fence_enable_signaling(fence)) { cb->func = func; list_add_tail(&cb->node, &fence->cb_list); @@ -726,8 +725,7 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, INIT_LIST_HEAD(&cb->node); ret = -ENOENT; } - - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); return ret; } @@ -750,9 +748,9 @@ int dma_fence_get_status(struct dma_fence *fence) unsigned long flags; int status; - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); status = dma_fence_get_status_locked(fence); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); return status; } @@ -782,13 +780,11 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) unsigned long flags; bool ret; - spin_lock_irqsave(fence->lock, flags); - + dma_fence_lock_irqsave(fence, flags); ret = !list_empty(&cb->node); if (ret) list_del_init(&cb->node); - - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); return ret; } @@ -827,7 +823,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) unsigned long flags; signed long ret = timeout ? timeout : 1; - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (dma_fence_test_signaled_flag(fence)) goto out; @@ -851,11 +847,11 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); ret = schedule_timeout(ret); - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } @@ -865,7 +861,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) __set_current_state(TASK_RUNNING); out: - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); return ret; } EXPORT_SYMBOL(dma_fence_default_wait); diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index 73ed6fd48a13..5d0d9abc6e21 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -410,8 +410,10 @@ struct race_thread { static void __wait_for_callbacks(struct dma_fence *f) { - spin_lock_irq(f->lock); - spin_unlock_irq(f->lock); + unsigned long flags; + + dma_fence_lock_irqsave(f, flags); + dma_fence_unlock_irqrestore(f, flags); } static int thread_signal_callback(void *arg) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 963a72324d16..8df20b0218a9 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -156,12 +156,12 @@ static void timeline_fence_release(struct dma_fence *fence) struct sync_timeline *parent = dma_fence_parent(fence); unsigned long flags; - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (!list_empty(&pt->link)) { list_del(&pt->link); rb_erase(&pt->node, &parent->pt_tree); } - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); sync_timeline_put(parent); dma_fence_free(fence); @@ -179,7 +179,7 @@ static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadlin struct sync_pt *pt = dma_fence_to_sync_pt(fence); unsigned long flags; - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { if (ktime_before(deadline, pt->deadline)) pt->deadline = deadline; @@ -187,7 +187,7 @@ static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadlin pt->deadline = deadline; __set_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags); } - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); } static const struct dma_fence_ops timeline_fence_ops = { @@ -431,13 +431,13 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a goto put_fence; } - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { ret = -ENOENT; goto unlock; } data.deadline_ns = ktime_to_ns(pt->deadline); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); dma_fence_put(fence); @@ -450,7 +450,7 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a return 0; unlock: - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); put_fence: dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 4638a686a84e..7c047f5a1549 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -479,10 +479,10 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (!dma_fence_is_signaled_locked(fence)) dma_fence_set_error(fence, -ENODATA); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); while (!dma_fence_is_signaled(fence) && ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f2beb980e3c3..8b095087feb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2785,8 +2785,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_unlocked); dma_fence_wait(vm->last_tlb_flush, false); /* Make sure that all fence callbacks have completed */ - spin_lock_irqsave(vm->last_tlb_flush->lock, flags); - spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags); + dma_fence_lock_irqsave(vm->last_tlb_flush, flags); + dma_fence_unlock_irqrestore(vm->last_tlb_flush, flags); dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 806d62ed61ef..a914ceec90aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -639,7 +639,7 @@ static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm) * sure that the dma_fence structure isn't freed up. */ rcu_read_lock(); - lock = vm->last_tlb_flush->lock; + lock = dma_fence_spinlock(vm->last_tlb_flush); rcu_read_unlock(); spin_lock_irqsave(lock, flags); diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index a2b413982ce6..c10ac0ab3bfa 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -148,7 +148,7 @@ __dma_fence_signal__notify(struct dma_fence *fence, { struct dma_fence_cb *cur, *tmp; - lockdep_assert_held(fence->lock); + dma_fence_assert_held(fence); list_for_each_entry_safe(cur, tmp, list, node) { INIT_LIST_HEAD(&cur->node); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 25c46d7b1ea7..cd44cbfb53b5 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -1045,9 +1045,10 @@ __i915_active_fence_set(struct i915_active_fence *active, * nesting rules for the fence->lock; the inner lock is always the * older lock. */ - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (prev) - spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); + spin_lock_nested(dma_fence_spinlock(prev), + SINGLE_DEPTH_NESTING); /* * A does the cmpxchg first, and so it sees C or NULL, as before, or @@ -1061,17 +1062,18 @@ __i915_active_fence_set(struct i915_active_fence *active, */ while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) { if (prev) { - spin_unlock(prev->lock); + spin_unlock(dma_fence_spinlock(prev)); dma_fence_put(prev); } - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); prev = i915_active_fence_get(active); GEM_BUG_ON(prev == fence); - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (prev) - spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); + spin_lock_nested(dma_fence_spinlock(prev), + SINGLE_DEPTH_NESTING); } /* @@ -1088,10 +1090,11 @@ __i915_active_fence_set(struct i915_active_fence *active, */ if (prev) { __list_del_entry(&active->cb.node); - spin_unlock(prev->lock); /* serialise with prev->cb_list */ + /* serialise with prev->cb_list */ + spin_unlock(dma_fence_spinlock(prev)); } list_add_tail(&active->cb.node, &fence->cb_list); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); return prev; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index cb22237ac17d..17c114645d9f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -156,12 +156,13 @@ nouveau_name(struct drm_device *dev) static inline bool nouveau_cli_work_ready(struct dma_fence *fence) { + unsigned long flags; bool ret = true; - spin_lock_irq(fence->lock); + dma_fence_lock_irqsave(fence, flags); if (!dma_fence_is_signaled_locked(fence)) ret = false; - spin_unlock_irq(fence->lock); + dma_fence_unlock_irqrestore(fence, flags); if (ret == true) dma_fence_put(fence); diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index a27786cb86fb..096fe28aa9c9 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -156,19 +156,19 @@ static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, struct dma_fence *parent; unsigned long flags; - spin_lock_irqsave(&fence->lock, flags); + dma_fence_lock_irqsave(f, flags); /* If we already have an earlier deadline, keep it: */ if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) && ktime_before(fence->deadline, deadline)) { - spin_unlock_irqrestore(&fence->lock, flags); + dma_fence_unlock_irqrestore(f, flags); return; } fence->deadline = deadline; set_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags); - spin_unlock_irqrestore(&fence->lock, flags); + dma_fence_unlock_irqrestore(f, flags); /* * smp_load_aquire() to ensure that if we are racing another diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 3927666fe556..ae5b38b2a884 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -190,11 +190,11 @@ static bool xe_fence_set_error(struct dma_fence *fence, int error) unsigned long irq_flags; bool signaled; - spin_lock_irqsave(fence->lock, irq_flags); + dma_fence_lock_irqsave(fence, irq_flags); signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); if (!signaled) dma_fence_set_error(fence, error); - spin_unlock_irqrestore(fence->lock, irq_flags); + dma_fence_unlock_irqrestore(fence, irq_flags); return signaled; } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 9ff2c4a09cdc..85d6eac9fa85 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -377,6 +377,44 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep) } while (1); } +/** + * dma_fence_spinlock - return pointer to the spinlock protecting the fence + * @fence: the fence to get the lock from + * + * Return the pointer to the extern lock. + */ +static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence) +{ + return fence->lock; +} + +/** + * dma_fence_lock_irqsave - irqsave lock the fence + * @fence: the fence to lock + * @flags: where to store the CPU flags. + * + * Lock the fence, preventing it from changing to the signaled state. + */ +#define dma_fence_lock_irqsave(fence, flags) \ + spin_lock_irqsave(fence->lock, flags) + +/** + * dma_fence_unlock_irqrestore - unlock the fence and irqrestore + * @fence: the fence to unlock + * @flags the CPU flags to restore + * + * Unlock the fence, allowing it to change it's state to signaled again. + */ +#define dma_fence_unlock_irqrestore(fence, flags) \ + spin_unlock_irqrestore(fence->lock, flags) + +/** + * dma_fence_assert_held - lockdep assertion that fence is locked + * @fence: the fence which should be locked + */ +#define dma_fence_assert_held(fence) \ + lockdep_assert_held(dma_fence_spinlock(fence)); + #ifdef CONFIG_LOCKDEP bool dma_fence_begin_signalling(void); void dma_fence_end_signalling(bool cookie); -- cgit v1.2.3 From 1f32f310a13c9fb67a9993ab67f596b3f960206f Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 9 Oct 2025 10:40:06 +0200 Subject: dma-buf: inline spinlock for fence protection v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement per-fence spinlocks, allowing implementations to not give an external spinlock to protect the fence internal state. Instead a spinlock embedded into the fence structure itself is used in this case. Shared spinlocks have the problem that implementations need to guarantee that the lock lives at least as long all fences referencing them. Using a per-fence spinlock allows completely decoupling spinlock producer and consumer life times, simplifying the handling in most use cases. v2: improve naming, coverage and function documentation v3: fix one additional locking in the selftests v4: separate out some changes to make the patch smaller, fix one amdgpu crash found by CI systems v5: improve comments Signed-off-by: Christian König Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/r/20260219160822.1529-5-christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 21 ++++++++++++++++----- drivers/dma-buf/sync_debug.h | 2 +- drivers/gpu/drm/drm_crtc.c | 2 +- drivers/gpu/drm/drm_writeback.c | 2 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 3 ++- drivers/gpu/drm/qxl/qxl_release.c | 3 ++- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 3 ++- drivers/gpu/drm/xe/xe_hw_fence.c | 3 ++- include/linux/dma-fence.h | 19 +++++++++++++------ 9 files changed, 40 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 698260c49f52..4ad863d2a52c 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -343,7 +343,6 @@ void __dma_fence_might_wait(void) } #endif - /** * dma_fence_signal_timestamp_locked - signal completion of a fence * @fence: the fence to signal @@ -1070,7 +1069,6 @@ static void __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno, unsigned long flags) { - BUG_ON(!lock); BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); kref_init(&fence->refcount); @@ -1082,10 +1080,15 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, */ RCU_INIT_POINTER(fence->ops, ops); INIT_LIST_HEAD(&fence->cb_list); - fence->lock = lock; fence->context = context; fence->seqno = seqno; fence->flags = flags | BIT(DMA_FENCE_FLAG_INITIALIZED_BIT); + if (lock) { + fence->extern_lock = lock; + } else { + spin_lock_init(&fence->inline_lock); + fence->flags |= BIT(DMA_FENCE_FLAG_INLINE_LOCK_BIT); + } fence->error = 0; trace_dma_fence_init(fence); @@ -1095,7 +1098,7 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, * dma_fence_init - Initialize a custom fence. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence - * @lock: the irqsafe spinlock to use for locking this fence + * @lock: optional irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * @@ -1105,6 +1108,10 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, * * context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). + * + * It is strongly discouraged to provide an external lock because this couples + * lock and fence life time. This is only allowed for legacy use cases when + * multiple fences need to be prevented from signaling out of order. */ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, @@ -1118,7 +1125,7 @@ EXPORT_SYMBOL(dma_fence_init); * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence - * @lock: the irqsafe spinlock to use for locking this fence + * @lock: optional irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * @@ -1128,6 +1135,10 @@ EXPORT_SYMBOL(dma_fence_init); * * Context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). + * + * It is strongly discouraged to provide an external lock because this couples + * lock and fence life time. This is only allowed for legacy use cases when + * multiple fences need to be prevented from signaling out of order. */ void dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops, diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 02af347293d0..c49324505b20 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -47,7 +47,7 @@ struct sync_timeline { static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) { - return container_of(fence->lock, struct sync_timeline, lock); + return container_of(fence->extern_lock, struct sync_timeline, lock); } /** diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 960fdc1cc6ba..8d6f721c2c9a 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -159,7 +159,7 @@ static const struct dma_fence_ops drm_crtc_fence_ops; static struct drm_crtc *fence_to_crtc(struct dma_fence *fence) { BUG_ON(rcu_access_pointer(fence->ops) != &drm_crtc_fence_ops); - return container_of(fence->lock, struct drm_crtc, fence_lock); + return container_of(fence->extern_lock, struct drm_crtc, fence_lock); } static const char *drm_crtc_fence_get_driver_name(struct dma_fence *fence) diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index 09362cf4f22f..4da5d6094721 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -81,7 +81,7 @@ * From userspace, this property will always read as zero. */ -#define fence_to_wb_connector(x) container_of(x->lock, \ +#define fence_to_wb_connector(x) container_of(x->extern_lock, \ struct drm_writeback_connector, \ fence_lock) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 903d326927ca..edbe9e08ba0f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -41,7 +41,8 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy; static inline struct nouveau_fence_chan * nouveau_fctx(struct nouveau_fence *fence) { - return container_of(fence->base.lock, struct nouveau_fence_chan, lock); + return container_of(fence->base.extern_lock, struct nouveau_fence_chan, + lock); } static bool diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 720d6d57151c..06979d0e8a9f 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -62,7 +62,8 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr, struct qxl_device *qdev; unsigned long cur, end = jiffies + timeout; - qdev = container_of(fence->lock, struct qxl_device, release_lock); + qdev = container_of(fence->extern_lock, struct qxl_device, + release_lock); if (!wait_event_timeout(qdev->release_event, (dma_fence_is_signaled(fence) || diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 3469e2c9e706..4ef84ff9b638 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -47,7 +47,8 @@ struct vmw_event_fence_action { static struct vmw_fence_manager * fman_from_fence(struct vmw_fence_obj *fence) { - return container_of(fence->base.lock, struct vmw_fence_manager, lock); + return container_of(fence->base.extern_lock, struct vmw_fence_manager, + lock); } static void vmw_fence_obj_destroy(struct dma_fence *f) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index ae8ed15b64c5..14720623ad00 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -124,7 +124,8 @@ static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence); static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence) { - return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock); + return container_of(fence->dma.extern_lock, struct xe_hw_fence_irq, + lock); } static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 85d6eac9fa85..3dc93f068bf6 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -34,7 +34,8 @@ struct seq_file; * @ops: dma_fence_ops associated with this fence * @rcu: used for releasing fence with kfree_rcu * @cb_list: list of all callbacks to call - * @lock: spin_lock_irqsave used for locking + * @extern_lock: external spin_lock_irqsave used for locking (deprecated) + * @inline_lock: alternative internal spin_lock_irqsave used for locking * @context: execution context this fence belongs to, returned by * dma_fence_context_alloc() * @seqno: the sequence number of this fence inside the execution context, @@ -49,6 +50,7 @@ struct seq_file; * of the time. * * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized + * DMA_FENCE_FLAG_INLINE_LOCK_BIT - use inline spinlock instead of external one * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called @@ -66,7 +68,10 @@ struct seq_file; * been completed, or never called at all. */ struct dma_fence { - spinlock_t *lock; + union { + spinlock_t *extern_lock; + spinlock_t inline_lock; + }; const struct dma_fence_ops __rcu *ops; /* * We clear the callback list on kref_put so that by the time we @@ -100,6 +105,7 @@ struct dma_fence { enum dma_fence_flag_bits { DMA_FENCE_FLAG_INITIALIZED_BIT, + DMA_FENCE_FLAG_INLINE_LOCK_BIT, DMA_FENCE_FLAG_SEQNO64_BIT, DMA_FENCE_FLAG_SIGNALED_BIT, DMA_FENCE_FLAG_TIMESTAMP_BIT, @@ -381,11 +387,12 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep) * dma_fence_spinlock - return pointer to the spinlock protecting the fence * @fence: the fence to get the lock from * - * Return the pointer to the extern lock. + * Return either the pointer to the embedded or the external spin lock. */ static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence) { - return fence->lock; + return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ? + &fence->inline_lock : fence->extern_lock; } /** @@ -396,7 +403,7 @@ static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence) * Lock the fence, preventing it from changing to the signaled state. */ #define dma_fence_lock_irqsave(fence, flags) \ - spin_lock_irqsave(fence->lock, flags) + spin_lock_irqsave(dma_fence_spinlock(fence), flags) /** * dma_fence_unlock_irqrestore - unlock the fence and irqrestore @@ -406,7 +413,7 @@ static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence) * Unlock the fence, allowing it to change it's state to signaled again. */ #define dma_fence_unlock_irqrestore(fence, flags) \ - spin_unlock_irqrestore(fence->lock, flags) + spin_unlock_irqrestore(dma_fence_spinlock(fence), flags) /** * dma_fence_assert_held - lockdep assertion that fence is locked -- cgit v1.2.3 From 5943243914b9fed8e26edcb9d45421721a5e3576 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 9 Oct 2025 16:18:53 +0200 Subject: dma-buf: use inline lock for the dma-fence-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the inline lock is now the recommended way for dma_fence implementations. So use this approach for the framework's internal fences as well. Also saves about 4 bytes for the external spinlock. Signed-off-by: Christian König Reviewed-by: Tvrtko Ursulin Reviewed-by: Philipp Stanner Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/r/20260219160822.1529-8-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 5 ++--- include/linux/dma-fence-array.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 37e2c6179d77..cd970eceaefb 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -204,9 +204,8 @@ void dma_fence_array_init(struct dma_fence_array *array, array->num_fences = num_fences; - spin_lock_init(&array->lock); - dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock, - context, seqno); + dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, + seqno); init_irq_work(&array->work, irq_dma_fence_array_work); atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 079b3dec0a16..370b3d2bba37 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -38,7 +38,6 @@ struct dma_fence_array_cb { struct dma_fence_array { struct dma_fence base; - spinlock_t lock; unsigned num_fences; atomic_t num_pending; struct dma_fence **fences; -- cgit v1.2.3 From a408c0ca0c411ca1ead995bdae3112a806c87556 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 9 Oct 2025 16:32:33 +0200 Subject: dma-buf: use inline lock for the dma-fence-chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the inline lock is now the recommended way for dma_fence implementations. So use this approach for the framework's internal fences as well. Also saves about 4 bytes for the external spinlock. Signed-off-by: Christian König Reviewed-by: Tvrtko Ursulin Reviewed-by: Philipp Stanner Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/r/20260219160822.1529-9-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-chain.c | 3 +-- include/linux/dma-fence-chain.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index a8a90acf4f34..a707792b6025 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -245,7 +245,6 @@ void dma_fence_chain_init(struct dma_fence_chain *chain, struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev); uint64_t context; - spin_lock_init(&chain->lock); rcu_assign_pointer(chain->prev, prev); chain->fence = fence; chain->prev_seqno = 0; @@ -261,7 +260,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain, seqno = max(prev->seqno, seqno); } - dma_fence_init64(&chain->base, &dma_fence_chain_ops, &chain->lock, + dma_fence_init64(&chain->base, &dma_fence_chain_ops, NULL, context, seqno); /* diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 5cd3ba53b4a1..df3beadf1515 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -46,7 +46,6 @@ struct dma_fence_chain { */ struct irq_work work; }; - spinlock_t lock; }; -- cgit v1.2.3 From be6d4c9e9d714ebbf358be41332726a0f94b9ffa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 31 Jan 2026 07:34:16 +0200 Subject: dma-buf: Add dma_buf_attach_revocable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some exporters need a flow to synchronously revoke access to the DMA-buf by importers. Once revoke is completed the importer is not permitted to touch the memory otherwise they may get IOMMU faults, AERs, or worse. DMA-buf today defines a revoke flow, for both pinned and dynamic importers, which is broadly: dma_resv_lock(dmabuf->resv, NULL); // Prevent new mappings from being established priv->revoked = true; // Tell all importers to eventually unmap dma_buf_invalidate_mappings(dmabuf); // Wait for any inprogress fences on the old mapping dma_resv_wait_timeout(dmabuf->resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); dma_resv_unlock(dmabuf->resv, NULL); // Wait for all importers to complete unmap wait_for_completion(&priv->unmapped_comp); This works well, and an importer that continues to access the DMA-buf after unmapping it is very buggy. However, the final wait for unmap is effectively unbounded. Several importers do not support invalidate_mappings() at all and won't unmap until userspace triggers it. This unbounded wait is not suitable for exporters like VFIO and RDMA tha need to issue revoke as part of their normal operations. Add dma_buf_attach_revocable() to allow exporters to determine the difference between importers that can complete the above in bounded time, and those that can't. It can be called inside the exporter's attach op to reject incompatible importers. Document these details about how dma_buf_invalidate_mappings() works and what the required sequence is to achieve a full revocation. Signed-off-by: Leon Romanovsky Reviewed-by: Christian König Signed-off-by: Christian König Link: https://lore.kernel.org/r/20260131-dmabuf-revoke-v7-6-463d956bd527@nvidia.com --- drivers/dma-buf/dma-buf.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++- include/linux/dma-buf.h | 9 +++------ 2 files changed, 50 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 3b32f15fbc18..a202a308c079 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -1318,13 +1318,59 @@ void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach, } EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, "DMA_BUF"); +/** + * dma_buf_attach_revocable - check if a DMA-buf importer implements + * revoke semantics. + * @attach: the DMA-buf attachment to check + * + * Returns true if the DMA-buf importer can support the revoke sequence + * explained in dma_buf_invalidate_mappings() within bounded time. Meaning the + * importer implements invalidate_mappings() and ensures that unmap is called as + * a result. + */ +bool dma_buf_attach_revocable(struct dma_buf_attachment *attach) +{ + return attach->importer_ops && + attach->importer_ops->invalidate_mappings; +} +EXPORT_SYMBOL_NS_GPL(dma_buf_attach_revocable, "DMA_BUF"); + /** * dma_buf_invalidate_mappings - notify attachments that DMA-buf is moving * * @dmabuf: [in] buffer which is moving * * Informs all attachments that they need to destroy and recreate all their - * mappings. + * mappings. If the attachment is dynamic then the dynamic importer is expected + * to invalidate any caches it has of the mapping result and perform a new + * mapping request before allowing HW to do any further DMA. + * + * If the attachment is pinned then this informs the pinned importer that the + * underlying mapping is no longer available. Pinned importers may take this is + * as a permanent revocation and never establish new mappings so exporters + * should not trigger it lightly. + * + * Upon return importers may continue to access the DMA-buf memory. The caller + * must do two additional waits to ensure that the memory is no longer being + * accessed: + * 1) Until dma_resv_wait_timeout() retires fences the importer is allowed to + * fully access the memory. + * 2) Until the importer calls unmap it is allowed to speculatively + * read-and-discard the memory. It must not write to the memory. + * + * A caller wishing to use dma_buf_invalidate_mappings() to fully stop access to + * the DMA-buf must wait for both. Dynamic callers can often use just the first. + * + * All importers providing a invalidate_mappings() op must ensure that unmap is + * called within bounded time after the op. + * + * Pinned importers that do not support a invalidate_mappings() op will + * eventually perform unmap when they are done with the buffer, which may be an + * ubounded time from calling this function. dma_buf_attach_revocable() can be + * used to prevent such importers from attaching. + * + * Importers are free to request a new mapping in parallel as this function + * returns. */ void dma_buf_invalidate_mappings(struct dma_buf *dmabuf) { diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index e744b8f9bfad..166933b82e27 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -456,12 +456,8 @@ struct dma_buf_attach_ops { * called with this lock held as well. This makes sure that no mapping * is created concurrently with an ongoing move operation. * - * Mappings stay valid and are not directly affected by this callback. - * But the DMA-buf can now be in a different physical location, so all - * mappings should be destroyed and re-created as soon as possible. - * - * New mappings can be created after this callback returns, and will - * point to the new location of the DMA-buf. + * See the kdoc for dma_buf_invalidate_mappings() for details on the + * required behavior. */ void (*invalidate_mappings)(struct dma_buf_attachment *attach); }; @@ -579,6 +575,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); void dma_buf_invalidate_mappings(struct dma_buf *dma_buf); +bool dma_buf_attach_revocable(struct dma_buf_attachment *attach); int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); int dma_buf_end_cpu_access(struct dma_buf *dma_buf, -- cgit v1.2.3 From 196b2b95fec447c2c4460f753b277d840633fbef Mon Sep 17 00:00:00 2001 From: Mel Henning Date: Thu, 19 Feb 2026 15:05:54 -0500 Subject: drm/nouveau: Add DRM_IOCTL_NOUVEAU_GET_ZCULL_INFO Add kernel-side support for using the zcull hardware in nvidia gpus. zcull aims to improve memory bandwidth by using an early approximate depth test, similar to hierarchical Z on an AMD card. Add a new ioctl that exposes zcull information that has been read from the hardware. Userspace uses each of these parameters either in a heuristic for determining zcull region parameters or in the calculation of a buffer size. It appears the hardware hasn't changed its structure for these values since FERMI_C (circa 2011), so the assumption is that it won't change on us too quickly, and is therefore reasonable to include in UAPI. This bypasses the nvif layer and instead accesses nvkm_gr directly, which mirrors existing usage of nvkm_gr_units(). There is no nvif object for nvkm_gr yet, and adding one is not trivial. Signed-off-by: Mel Henning Link: https://patch.msgid.link/20260219-zcull3-v3-2-dbe6a716f104@darkrefraction.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 29 +++++++++++++++ drivers/gpu/drm/nouveau/nouveau_abi16.h | 1 + drivers/gpu/drm/nouveau/nouveau_drm.c | 1 + include/uapi/drm/nouveau_drm.h | 66 +++++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index f9201f2e73a3..7860877d909b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -333,6 +333,35 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) return 0; } +int +nouveau_abi16_ioctl_get_zcull_info(ABI16_IOCTL_ARGS) +{ + struct nouveau_drm *drm = nouveau_drm(dev); + struct nvkm_gr *gr = nvxx_gr(drm); + struct drm_nouveau_get_zcull_info *out = data; + + if (gr->has_zcull_info) { + const struct nvkm_gr_zcull_info *i = &gr->zcull_info; + + out->width_align_pixels = i->width_align_pixels; + out->height_align_pixels = i->height_align_pixels; + out->pixel_squares_by_aliquots = i->pixel_squares_by_aliquots; + out->aliquot_total = i->aliquot_total; + out->zcull_region_byte_multiplier = i->zcull_region_byte_multiplier; + out->zcull_region_header_size = i->zcull_region_header_size; + out->zcull_subregion_header_size = i->zcull_subregion_header_size; + out->subregion_count = i->subregion_count; + out->subregion_width_align_pixels = i->subregion_width_align_pixels; + out->subregion_height_align_pixels = i->subregion_height_align_pixels; + out->ctxsw_size = i->ctxsw_size; + out->ctxsw_align = i->ctxsw_align; + + return 0; + } else { + return -ENOTTY; + } +} + int nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) { diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index af6b4e1cefd2..134b3ab58719 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -6,6 +6,7 @@ struct drm_device *dev, void *data, struct drm_file *file_priv int nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS); +int nouveau_abi16_ioctl_get_zcull_info(ABI16_IOCTL_ARGS); int nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS); int nouveau_abi16_ioctl_channel_free(ABI16_IOCTL_ARGS); int nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 17c114645d9f..5d8475e4895e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1304,6 +1304,7 @@ nouveau_ioctls[] = { DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(NOUVEAU_GET_ZCULL_INFO, nouveau_abi16_ioctl_get_zcull_info, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_BIND, nouveau_svmm_bind, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_RENDER_ALLOW), diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index dd87f8f30793..1fa82fa6af38 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -432,6 +432,69 @@ struct drm_nouveau_exec { __u64 push_ptr; }; +struct drm_nouveau_get_zcull_info { + /** + * @width_align_pixels: required alignment for region widths, in pixels + * (typically #TPC's * 16). + */ + __u32 width_align_pixels; + /** + * @height_align_pixels: required alignment for region heights, in + * pixels (typically 32). + */ + __u32 height_align_pixels; + /** + * @pixel_squares_by_aliquots: the pixel area covered by an aliquot + * (typically #Zcull_banks * 16 * 16). + */ + __u32 pixel_squares_by_aliquots; + /** + * @aliquot_total: the total aliquot pool available in hardware + */ + __u32 aliquot_total; + /** + * @zcull_region_byte_multiplier: the size of an aliquot in bytes, which + * is used for save/restore operations on a region + */ + __u32 zcull_region_byte_multiplier; + /** + * @zcull_region_header_size: the region header size in bytes, which is + * used for save/restore operations on a region + */ + __u32 zcull_region_header_size; + /** + * @zcull_subregion_header_size: the subregion header size in bytes, + * which is used for save/restore operations on a region + */ + __u32 zcull_subregion_header_size; + /** + * @subregion_count: the total number of subregions the hardware + * supports + */ + __u32 subregion_count; + /** + * @subregion_width_align_pixels: required alignment for subregion + * widths, in pixels (typically #TPC's * 16). + */ + __u32 subregion_width_align_pixels; + /** + * @subregion_height_align_pixels: required alignment for subregion + * heights, in pixels + */ + __u32 subregion_height_align_pixels; + + /** + * @ctxsw_size: the size, in bytes, of a zcull context switching region. + * Will be zero if the kernel does not support zcull context switching. + */ + __u32 ctxsw_size; + /** + * @ctxsw_align: the alignment, in bytes, of a zcull context switching + * region + */ + __u32 ctxsw_align; +}; + #define DRM_NOUVEAU_GETPARAM 0x00 #define DRM_NOUVEAU_SETPARAM 0x01 /* deprecated */ #define DRM_NOUVEAU_CHANNEL_ALLOC 0x02 @@ -445,6 +508,7 @@ struct drm_nouveau_exec { #define DRM_NOUVEAU_VM_INIT 0x10 #define DRM_NOUVEAU_VM_BIND 0x11 #define DRM_NOUVEAU_EXEC 0x12 +#define DRM_NOUVEAU_GET_ZCULL_INFO 0x13 #define DRM_NOUVEAU_GEM_NEW 0x40 #define DRM_NOUVEAU_GEM_PUSHBUF 0x41 #define DRM_NOUVEAU_GEM_CPU_PREP 0x42 @@ -513,6 +577,8 @@ struct drm_nouveau_svm_bind { #define DRM_IOCTL_NOUVEAU_VM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_INIT, struct drm_nouveau_vm_init) #define DRM_IOCTL_NOUVEAU_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_BIND, struct drm_nouveau_vm_bind) #define DRM_IOCTL_NOUVEAU_EXEC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_EXEC, struct drm_nouveau_exec) + +#define DRM_IOCTL_NOUVEAU_GET_ZCULL_INFO DRM_IOR (DRM_COMMAND_BASE + DRM_NOUVEAU_GET_ZCULL_INFO, struct drm_nouveau_get_zcull_info) #if defined(__cplusplus) } #endif -- cgit v1.2.3