diff options
| author | Brian Nguyen <brian3.nguyen@intel.com> | 2025-12-13 00:32:35 +0300 |
|---|---|---|
| committer | Matthew Brost <matthew.brost@intel.com> | 2025-12-13 03:59:10 +0300 |
| commit | 7c52f13b76c531ee2c503baafe52d357cab0c54a (patch) | |
| tree | d70372b1dbb7dbd0844f76bbfe9724825d6b1c87 | |
| parent | 684965d96a918f78c3fbd3ef55444aa9cdd7c5f6 (diff) | |
| download | linux-7c52f13b76c531ee2c503baafe52d357cab0c54a.tar.xz | |
drm/xe: Optimize flushing of L2$ by skipping unnecessary page reclaim
There are additional hardware managed L2$ flushing such as the
transient display. In those scenarios, page reclamation is
unnecessary resulting in redundant cacheline flushes, so skip
over those corresponding ranges.
v2:
- Elaborated on reasoning for page reclamation skip based on
Tejas's discussion. (Matthew A, Tejas)
v3:
- Removed MEDIA_IS_ON due to racy condition resulting in removal of
relevant registers and values. (Matthew A)
- Moved l3 policy access to xe_pat. (Matthew A)
v4:
- Updated comments based on previous change. (Tejas)
- Move back PAT index macros to xe_pat.c.
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251212213225.3564537-21-brian3.nguyen@intel.com
| -rw-r--r-- | drivers/gpu/drm/xe/xe_page_reclaim.c | 32 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_page_reclaim.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pat.c | 8 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pat.h | 10 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pt.c | 3 |
5 files changed, 55 insertions, 1 deletions
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c index 0cce5ad2e33b..fd8c33761127 100644 --- a/drivers/gpu/drm/xe/xe_page_reclaim.c +++ b/drivers/gpu/drm/xe/xe_page_reclaim.c @@ -13,8 +13,40 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_pat.h" #include "xe_sa.h" #include "xe_tlb_inval_types.h" +#include "xe_vm.h" + +/** + * xe_page_reclaim_skip() - Decide whether PRL should be skipped for a VMA + * @tile: Tile owning the VMA + * @vma: VMA under consideration + * + * PPC flushing may be handled by HW for specific PAT encodings. + * Skip PPC flushing/Page Reclaim for scenarios below due to redundant + * flushes. + * - pat_index is transient display (1) + * + * Return: true when page reclamation is unnecessary, false otherwise. + */ +bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma) +{ + u8 l3_policy; + + l3_policy = xe_pat_index_get_l3_policy(tile->xe, vma->attr.pat_index); + + /* + * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC + * Transient display flushes is taken care by HW, l3_policy = 1. + * + * HW will sequence these transient flushes at various sync points so + * any event of page reclamation will hit these sync points before + * page reclamation could execute. + */ + return (l3_policy == XE_L3_POLICY_XD); +} /** * xe_page_reclaim_create_prl_bo() - Back a PRL with a suballocated GGTT BO diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.h b/drivers/gpu/drm/xe/xe_page_reclaim.h index ded098298d72..a4f58e0ce9b4 100644 --- a/drivers/gpu/drm/xe/xe_page_reclaim.h +++ b/drivers/gpu/drm/xe/xe_page_reclaim.h @@ -18,6 +18,8 @@ struct xe_tlb_inval; struct xe_tlb_inval_fence; +struct xe_tile; +struct xe_vma; struct xe_guc_page_reclaim_entry { u64 qw; @@ -68,6 +70,7 @@ static inline bool xe_page_reclaim_list_valid(struct xe_page_reclaim_list *prl) prl->num_entries != XE_PAGE_RECLAIM_INVALID_LIST; } +bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma); struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval, struct xe_page_reclaim_list *prl, struct xe_tlb_inval_fence *fence); diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 6f48d34711a6..2c3375e0250b 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -9,6 +9,7 @@ #include <generated/xe_wa_oob.h> +#include "regs/xe_gt_regs.h" #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" @@ -231,6 +232,13 @@ bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index) return !!(xe->pat.table[pat_index].value & XE2_COMP_EN); } +u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index) +{ + WARN_ON(pat_index >= xe->pat.n_entries); + + return REG_FIELD_GET(XE2_L3_POLICY, xe->pat.table[pat_index].value); +} + static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries) { diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index 5749a488d9a9..d5dadfb7f924 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -69,4 +69,14 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index); */ bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index); +#define XE_L3_POLICY_WB 0 /* Write-back */ +#define XE_L3_POLICY_XD 1 /* WB - Transient Display */ +#define XE_L3_POLICY_UC 3 /* Uncached */ +/** + * xe_pat_index_get_l3_policy - Extract the L3 policy for the given pat_index. + * @xe: xe device + * @pat_index: The pat_index to query + */ +u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index); + #endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 6e01675213c7..6cd78bb2b652 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2030,7 +2030,8 @@ static int unbind_op_prepare(struct xe_tile *tile, xe_page_reclaim_list_alloc_entries(&pt_update_ops->prl); /* Page reclaim may not be needed due to other features, so skip the corresponding VMA */ - pt_op->prl = (xe_page_reclaim_list_valid(&pt_update_ops->prl)) ? &pt_update_ops->prl : NULL; + pt_op->prl = (xe_page_reclaim_list_valid(&pt_update_ops->prl) && + !xe_page_reclaim_skip(tile, vma)) ? &pt_update_ops->prl : NULL; err = vma_reserve_fences(tile_to_xe(tile), vma); if (err) |
