diff options
| -rw-r--r-- | drivers/gpu/drm/xe/regs/xe_gt_regs.h | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_bo.c | 17 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_gt.c | 32 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pat.c | 52 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pat.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pt_types.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 13 |
7 files changed, 109 insertions, 14 deletions
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 93643da57428..24fc64fc832e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -89,6 +89,7 @@ #define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) #define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) +#define EN_CMP_1WCOH REG_BIT(15) #define CG_DIS_CNTLBUS REG_BIT(6) #define CCS_AUX_INV XE_REG(0x4208) @@ -101,6 +102,11 @@ #define XE2_LMEM_CFG XE_REG(0x48b0) +#define XE2_GAMWALK_CTRL 0x47e4 +#define XE2_GAMWALK_CTRL_MEDIA XE_REG(XE2_GAMWALK_CTRL + MEDIA_GT_GSI_OFFSET) +#define XE2_GAMWALK_CTRL_3D XE_REG_MCR(XE2_GAMWALK_CTRL) +#define EN_CMP_1WCOH_GW REG_BIT(14) + #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) #define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 6ab52fa397e3..408c74216fdf 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -29,6 +29,7 @@ #include "xe_gt.h" #include "xe_map.h" #include "xe_migrate.h" +#include "xe_pat.h" #include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_pxp.h" @@ -3522,16 +3523,16 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM)) return false; + /* Check if userspace explicitly requested no compression */ + if (bo->flags & XE_BO_FLAG_NO_COMPRESSION) + return false; + /* - * Compression implies coh_none, therefore we know for sure that WB - * memory can't currently use compression, which is likely one of the - * common cases. - * Additionally, userspace may explicitly request no compression via the - * DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION flag, which should also disable - * CCS usage. + * For WB (Write-Back) CPU caching mode, check if the device + * supports WB compression with coherency. */ - if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB || - bo->flags & XE_BO_FLAG_NO_COMPRESSION) + if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB && + xe->pat.idx[XE_CACHE_WB_COMPRESSION] == XE_PAT_INVALID_IDX) return false; return true; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 313ce83ab0e5..04dbf995a18b 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -140,6 +140,36 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } +static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref; + u32 reg; + + if (IS_SRIOV_VF(xe)) + return; + + if (GRAPHICS_VER(xe) >= 30 && xe->info.has_flat_ccs) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return; + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); + reg |= EN_CMP_1WCOH; + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); + + if (xe_gt_is_media_type(gt)) { + xe_mmio_rmw32(>->mmio, XE2_GAMWALK_CTRL_MEDIA, 0, EN_CMP_1WCOH_GW); + } else { + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMWALK_CTRL_3D); + reg |= EN_CMP_1WCOH_GW; + xe_gt_mcr_multicast_write(gt, XE2_GAMWALK_CTRL_3D, reg); + } + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + } +} + static void gt_reset_worker(struct work_struct *w); static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, @@ -466,6 +496,7 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt) xe_gt_topology_init(gt); xe_gt_mcr_init(gt); xe_gt_enable_host_l2_vram(gt); + xe_gt_enable_comp_1wcoh(gt); if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); @@ -745,6 +776,7 @@ static int do_gt_restart(struct xe_gt *gt) xe_pat_init(gt); xe_gt_enable_host_l2_vram(gt); + xe_gt_enable_comp_1wcoh(gt); xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 2c3375e0250b..14d0dce5190a 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -132,9 +132,10 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * in the table. * * Note: There is an implicit assumption in the driver that compression and - * coh_1way+ are mutually exclusive. If this is ever not true then userptr - * and imported dma-buf from external device will have uncleared ccs state. See - * also xe_bo_needs_ccs_pages(). + * coh_1way+ are mutually exclusive for platforms prior to Xe3. Starting + * with Xe3, compression can be combined with coherency. If using compression + * with coherency, userptr and imported dma-buf from external device will + * have uncleared ccs state. See also xe_bo_needs_ccs_pages(). */ #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ { \ @@ -144,8 +145,7 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ - .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \ - XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \ + .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \ .valid = 1 \ } @@ -181,6 +181,38 @@ static const struct xe_pat_table_entry xe2_pat_table[] = { [31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ), }; +static const struct xe_pat_table_entry xe3_lpg_pat_table[] = { + [ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ), + [ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ), + [ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ), + [ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ), + [ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ), + [ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ), + [ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ), + [ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ), + [ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ), + [ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ), + [10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ), + [11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ), + [12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ), + [13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ), + [14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ), + [15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ), + [16] = XE2_PAT( 0, 1, 0, 0, 3, 2 ), + /* 17..19 are reserved; leave set to all 0's */ + [20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ), + [21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ), + [22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ), + [23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ), + [24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ), + [25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ), + [26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ), + [27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ), + [28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ), + [29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ), + [30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ), + [31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ), +}; /* Special PAT values programmed outside the main table */ static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 ); static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 0, 0, 3, 0 ); @@ -490,6 +522,7 @@ static const struct xe_pat_ops xe3p_xpc_pat_ops = { void xe_pat_init_early(struct xe_device *xe) { + xe->pat.idx[XE_CACHE_WB_COMPRESSION] = XE_PAT_INVALID_IDX; if (GRAPHICS_VERx100(xe) == 3511) { xe->pat.ops = &xe3p_xpc_pat_ops; xe->pat.table = xe3p_xpc_pat_table; @@ -501,7 +534,12 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.idx[XE_CACHE_WB] = 2; } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; - xe->pat.table = xe2_pat_table; + if (GRAPHICS_VER(xe) == 30) { + xe->pat.table = xe3_lpg_pat_table; + xe->pat.idx[XE_CACHE_WB_COMPRESSION] = 16; + } else { + xe->pat.table = xe2_pat_table; + } xe->pat.pat_ats = &xe2_pat_ats; if (IS_DGFX(xe)) xe->pat.pat_pta = &xe2_pat_pta; @@ -658,6 +696,8 @@ int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) if (GRAPHICS_VER(xe) >= 20) { drm_printf(p, "IDX[XE_CACHE_NONE_COMPRESSION] = %d\n", xe->pat.idx[XE_CACHE_NONE_COMPRESSION]); + drm_printf(p, "IDX[XE_CACHE_WB_COMPRESSION] = %d\n", + xe->pat.idx[XE_CACHE_WB_COMPRESSION]); } return 0; diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index d5dadfb7f924..c7e2a53d8cee 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -12,6 +12,8 @@ struct drm_printer; struct xe_device; struct xe_gt; +#define XE_PAT_INVALID_IDX U16_MAX + /** * struct xe_pat_table_entry - The pat_index encoding and other meta information. */ diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 88fabf8e2655..84b51d3762a4 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -20,6 +20,7 @@ enum xe_cache_level { XE_CACHE_WT, XE_CACHE_WB, XE_CACHE_NONE_COMPRESSION, /*UC + COH_NONE + COMPRESSION */ + XE_CACHE_WB_COMPRESSION, __XE_CACHE_LEVEL_COUNT, }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2e07e60f47fa..001bc36da5ef 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3405,6 +3405,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; u16 pat_index = (*bind_ops)[i].pat_index; u16 coh_mode; + bool comp_en; if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror && (!xe_vm_in_fault_mode(vm) || @@ -3421,6 +3422,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, pat_index = array_index_nospec(pat_index, xe->pat.n_entries); (*bind_ops)[i].pat_index = pat_index; coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + comp_en = xe_pat_index_get_comp_en(xe, pat_index); if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ err = -EINVAL; goto free_bind_ops; @@ -3451,6 +3453,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, comp_en && + op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && !IS_ENABLED(CONFIG_DRM_GPUSVM)) || XE_IOCTL_DBG(xe, obj && @@ -3529,6 +3533,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, u16 pat_index, u32 op, u32 bind_flags) { u16 coh_mode; + bool comp_en; if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && xe_pat_index_get_comp_en(xe, pat_index))) @@ -3574,6 +3579,14 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, return -EINVAL; } + /* + * Ensures that imported buffer objects (dma-bufs) are not mapped + * with a PAT index that enables compression. + */ + comp_en = xe_pat_index_get_comp_en(xe, pat_index); + if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en)) + return -EINVAL; + /* If a BO is protected it can only be mapped if the key is still valid */ if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) |
