diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-15 20:46:16 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-15 20:46:16 +0300 |
commit | 93b694d096cc10994c817730d4d50288f9ae3d66 (patch) | |
tree | 5bd967686d0003f7dbbe1da49f5399cb4a92f074 /drivers/gpu/drm/i915 | |
parent | 726eb70e0d34dc4bc4dada71f52bba8ed638431e (diff) | |
parent | 640eee067d9aae0bb98d8706001976ff1affaf00 (diff) | |
download | linux-93b694d096cc10994c817730d4d50288f9ae3d66.tar.xz |
Merge tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Not a major amount of change, the i915 trees got split into display
and gt trees to better facilitate higher level review, and there's a
major refactoring of i915 GEM locking to use more core kernel concepts
(like ww-mutexes). msm gets per-process pagetables, older AMD SI cards
get DC support, nouveau got a bump in displayport support with common
code extraction from i915.
Outside of drm this contains a couple of patches for hexint
moduleparams which you've acked, and a virtio common code tree that
you should also get via it's regular path.
New driver:
- Cadence MHDP8546 DisplayPort bridge driver
core:
- cross-driver scatterlist cleanups
- devm_drm conversions
- remove drm_dev_init
- devm_drm_dev_alloc conversion
ttm:
- lots of refactoring and cleanups
bridges:
- chained bridge support in more drivers
panel:
- misc new panels
scheduler:
- cleanup priority levels
displayport:
- refactor i915 code into helpers for nouveau
i915:
- split into display and GT trees
- WW locking refactoring in GEM
- execbuf2 extension mechanism
- syncobj timeline support
- GEN 12 HOBL display powersaving
- Rocket Lake display additions
- Disable FBC on Tigerlake
- Tigerlake Type-C + DP improvements
- Hotplug interrupt refactoring
amdgpu:
- Sienna Cichlid updates
- Navy Flounder updates
- DCE6 (SI) support for DC
- Plane rotation enabled
- TMZ state info ioctl
- PCIe DPC recovery support
- DC interrupt handling refactor
- OLED panel fixes
amdkfd:
- add SMI events for thermal throttling
- SMI interface events ioctl update
- process eviction counters
radeon:
- move to dma_ for allocations
- expose sclk via sysfs
msm:
- DSI support for sm8150/sm8250
- per-process GPU pagetable support
- Displayport support
mediatek:
- move HDMI phy driver to PHY
- convert mtk-dpi to bridge API
- disable mt2701 tmds
tegra:
- bridge support
exynos:
- misc cleanups
vc4:
- dual display cleanups
ast:
- cleanups
gma500:
- conversion to GPIOd API
hisilicon:
- misc reworks
ingenic:
- clock handling and format improvements
mcde:
- DSI support
mgag200:
- desktop g200 support
mxsfb:
- i.MX7 + i.MX8M
- alpha plane support
panfrost:
- devfreq support
- amlogic SoC support
ps8640:
- EDID from eDP retrieval
tidss:
- AM65xx YUV workaround
virtio:
- virtio-gpu exported resources
rcar-du:
- R8A7742, R8A774E1 and R8A77961 support
- YUV planar format fixes
- non-visible plane handling
- VSP device reference count fix
- Kconfig fix to avoid displaying disabled options in .config"
* tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm: (1494 commits)
drm/ingenic: Fix bad revert
drm/amdgpu: Fix invalid number of character '{' in amdgpu_acpi_init
drm/amdgpu: Remove warning for virtual_display
drm/amdgpu: kfd_initialized can be static
drm/amd/pm: setup APU dpm clock table in SMU HW initialization
drm/amdgpu: prevent spurious warning
drm/amdgpu/swsmu: fix ARC build errors
drm/amd/display: Fix OPTC_DATA_FORMAT programming
drm/amd/display: Don't allow pstate if no support in blank
drm/panfrost: increase readl_relaxed_poll_timeout values
MAINTAINERS: Update entry for st7703 driver after the rename
Revert "gpu/drm: ingenic: Add option to mmap GEM buffers cached"
drm/amd/display: HDMI remote sink need mode validation for Linux
drm/amd/display: Change to correct unit on audio rate
drm/amd/display: Avoid set zero in the requested clk
drm/amdgpu: align frag_end to covered address space
drm/amdgpu: fix NULL pointer dereference for Renoir
drm/vmwgfx: fix regression in thp code due to ttm init refactor.
drm/amdgpu/swsmu: add interrupt work handler for smu11 parts
drm/amdgpu/swsmu: add interrupt work function
...
Diffstat (limited to 'drivers/gpu/drm/i915')
153 files changed, 6521 insertions, 4115 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index bda4c0e408f8..e5574e506a5c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -234,6 +234,7 @@ i915-y += \ display/intel_ddi.o \ display/intel_dp.o \ display/intel_dp_aux_backlight.o \ + display/intel_dp_hdcp.o \ display/intel_dp_link_training.o \ display/intel_dp_mst.o \ display/intel_dsi.o \ diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index f4053dd6bde9..520715b7d5b5 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1646,6 +1646,7 @@ static const struct drm_encoder_funcs gen11_dsi_encoder_funcs = { }; static const struct drm_connector_funcs gen11_dsi_connector_funcs = { + .detect = intel_panel_detect, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_connector_destroy, diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 630f49b7aa01..86be032bcf96 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -527,8 +527,6 @@ void intel_atomic_state_clear(struct drm_atomic_state *s) intel_atomic_clear_global_state(state); state->dpll_set = state->modeset = false; - state->global_state_changed = false; - state->active_pipes = 0; } struct intel_crtc_state * @@ -542,40 +540,3 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, return to_intel_crtc_state(crtc_state); } - -int _intel_atomic_lock_global_state(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - state->global_state_changed = true; - - for_each_intel_crtc(&dev_priv->drm, crtc) { - int ret; - - ret = drm_modeset_lock(&crtc->base.mutex, - state->base.acquire_ctx); - if (ret) - return ret; - } - - return 0; -} - -int _intel_atomic_serialize_global_state(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - state->global_state_changed = true; - - for_each_intel_crtc(&dev_priv->drm, crtc) { - struct intel_crtc_state *crtc_state; - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - } - - return 0; -} diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h index 11146292b06f..285de07011dc 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.h +++ b/drivers/gpu/drm/i915/display/intel_atomic.h @@ -56,8 +56,4 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state); -int _intel_atomic_lock_global_state(struct intel_atomic_state *state); - -int _intel_atomic_serialize_global_state(struct intel_atomic_state *state); - #endif /* __INTEL_ATOMIC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index ad4aa66fd676..f7de55707746 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -958,13 +958,8 @@ static int glk_force_audio_cdclk_commit(struct intel_atomic_state *state, if (IS_ERR(cdclk_state)) return PTR_ERR(cdclk_state); - cdclk_state->force_min_cdclk_changed = true; cdclk_state->force_min_cdclk = enable ? 2 * 96000 : 0; - ret = intel_atomic_lock_global_state(&cdclk_state->base); - if (ret) - return ret; - return drm_atomic_commit(&state->base); } diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index a0a41ec5c341..4716484af62d 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1656,6 +1656,8 @@ static enum port dvo_port_to_port(struct drm_i915_private *dev_priv, [PORT_E] = { DVO_PORT_HDMIE, DVO_PORT_DPE, DVO_PORT_CRT }, [PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1 }, [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1 }, + [PORT_H] = { DVO_PORT_HDMIH, DVO_PORT_DPH, -1 }, + [PORT_I] = { DVO_PORT_HDMII, DVO_PORT_DPI, -1 }, }; /* * Bspec lists the ports as A, B, C, D - however internally in our @@ -2133,7 +2135,7 @@ void intel_bios_init(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->vbt.display_devices); - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) { + if (!HAS_DISPLAY(dev_priv)) { drm_dbg_kms(&dev_priv->drm, "Skipping VBT init due to disabled display.\n"); return; @@ -2650,6 +2652,12 @@ enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, case DP_AUX_G: aux_ch = AUX_CH_G; break; + case DP_AUX_H: + aux_ch = AUX_CH_H; + break; + case DP_AUX_I: + aux_ch = AUX_CH_I; + break; default: MISSING_CASE(info->alternate_aux_channel); aux_ch = AUX_CH_A; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 91a8161e7c05..cb93f6cf6d37 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2426,7 +2426,6 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa if (!cdclk_state) return NULL; - cdclk_state->force_min_cdclk_changed = false; cdclk_state->pipe = INVALID_PIPE; return &cdclk_state->base; @@ -2501,6 +2500,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) if (ret) return ret; } else if (old_cdclk_state->active_pipes != new_cdclk_state->active_pipes || + old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk || intel_cdclk_changed(&old_cdclk_state->logical, &new_cdclk_state->logical)) { ret = intel_atomic_lock_global_state(&new_cdclk_state->base); @@ -2677,7 +2677,7 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv) */ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) intel_de_write(dev_priv, GMBUSFREQ_VLV, - DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); + DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); } static int cnp_rawclk(struct drm_i915_private *dev_priv) @@ -2903,9 +2903,10 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.get_cdclk = i85x_get_cdclk; else if (IS_I845G(dev_priv)) dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; - else { /* 830 */ - drm_WARN(&dev_priv->drm, !IS_I830(dev_priv), - "Unknown platform. Assuming 133 MHz CDCLK\n"); + else if (IS_I830(dev_priv)) + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; + + if (drm_WARN(&dev_priv->drm, !dev_priv->display.get_cdclk, + "Unknown platform. Assuming 133 MHz CDCLK\n")) dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; - } } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 5731806e4cee..b34eb00fb327 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -17,8 +17,8 @@ struct intel_atomic_state; struct intel_crtc_state; struct intel_cdclk_vals { - u16 refclk; u32 cdclk; + u16 refclk; u8 divider; /* CD2X divider * 2 */ u8 ratio; }; @@ -49,7 +49,6 @@ struct intel_cdclk_state { /* forced minimum cdclk for glk+ audio w/a */ int force_min_cdclk; - bool force_min_cdclk_changed; /* bitmask of active pipes */ u8 active_pipes; diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 5b4510ce5693..4934edd51cb0 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -833,6 +833,9 @@ intel_crt_detect(struct drm_connector *connector, connector->base.id, connector->name, force); + if (!INTEL_DISPLAY_ENABLED(dev_priv)) + return connector_status_disconnected; + if (dev_priv->params.load_detect_test) { wakeref = intel_display_power_get(dev_priv, intel_encoder->power_domain); diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index f22a7645c249..d5db16764619 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -40,12 +40,12 @@ #define GEN12_CSR_MAX_FW_SIZE ICL_CSR_MAX_FW_SIZE -#define RKL_CSR_PATH "i915/rkl_dmc_ver2_01.bin" -#define RKL_CSR_VERSION_REQUIRED CSR_VERSION(2, 1) +#define RKL_CSR_PATH "i915/rkl_dmc_ver2_02.bin" +#define RKL_CSR_VERSION_REQUIRED CSR_VERSION(2, 2) MODULE_FIRMWARE(RKL_CSR_PATH); -#define TGL_CSR_PATH "i915/tgl_dmc_ver2_06.bin" -#define TGL_CSR_VERSION_REQUIRED CSR_VERSION(2, 6) +#define TGL_CSR_PATH "i915/tgl_dmc_ver2_08.bin" +#define TGL_CSR_VERSION_REQUIRED CSR_VERSION(2, 8) #define TGL_CSR_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(TGL_CSR_PATH); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index a49ff3a1a63c..4d06178cd76c 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -572,13 +572,13 @@ static const struct cnl_ddi_buf_trans ehl_combo_phy_ddi_translations_dp[] = { /* NT mV Trans mV db */ { 0xA, 0x33, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ { 0xA, 0x47, 0x36, 0x00, 0x09 }, /* 350 500 3.1 */ - { 0xC, 0x64, 0x30, 0x00, 0x0F }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 350 900 8.2 */ + { 0xC, 0x64, 0x34, 0x00, 0x0B }, /* 350 700 6.0 */ + { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 350 900 8.2 */ { 0xA, 0x46, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x64, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ + { 0xC, 0x64, 0x38, 0x00, 0x07 }, /* 500 700 2.9 */ + { 0x6, 0x7F, 0x32, 0x00, 0x0D }, /* 500 900 5.1 */ { 0xC, 0x61, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 600 900 3.5 */ + { 0x6, 0x7F, 0x38, 0x00, 0x07 }, /* 600 900 3.5 */ { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ }; @@ -706,6 +706,42 @@ static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr2[] = { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ }; +static const struct cnl_ddi_buf_trans tgl_uy_combo_phy_ddi_translations_dp_hbr2[] = { + /* NT mV Trans mV db */ + { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ + { 0xA, 0x4F, 0x36, 0x00, 0x09 }, /* 350 500 3.1 */ + { 0xC, 0x60, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ + { 0xC, 0x7F, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ + { 0xC, 0x47, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ + { 0xC, 0x6F, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ + { 0x6, 0x7D, 0x32, 0x00, 0x0D }, /* 500 900 5.1 */ + { 0x6, 0x60, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ + { 0x6, 0x7F, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ +}; + +/* + * Cloned the HOBL entry to comply with the voltage and pre-emphasis entries + * that DisplayPort specification requires + */ +static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_edp_hbr2_hobl[] = { + /* VS pre-emp */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 0 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 1 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 2 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 3 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 0 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 1 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 2 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 2 0 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 2 1 */ +}; + +static bool is_hobl_buf_trans(const struct cnl_ddi_buf_trans *table) +{ + return table == tgl_combo_phy_ddi_translations_edp_hbr2_hobl; +} + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) { @@ -1038,27 +1074,74 @@ static const struct cnl_ddi_buf_trans * ehl_get_combo_buf_trans(struct intel_encoder *encoder, int type, int rate, int *n_entries) { - if (type != INTEL_OUTPUT_HDMI && type != INTEL_OUTPUT_EDP) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + switch (type) { + case INTEL_OUTPUT_HDMI: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); + return icl_combo_phy_ddi_translations_hdmi; + case INTEL_OUTPUT_EDP: + if (dev_priv->vbt.edp.low_vswing) { + if (rate > 540000) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); + return icl_combo_phy_ddi_translations_edp_hbr3; + } else { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); + return icl_combo_phy_ddi_translations_edp_hbr2; + } + } + /* fall through */ + default: + /* All combo DP and eDP ports that do not support low_vswing */ *n_entries = ARRAY_SIZE(ehl_combo_phy_ddi_translations_dp); return ehl_combo_phy_ddi_translations_dp; } - - return icl_get_combo_buf_trans(encoder, type, rate, n_entries); } static const struct cnl_ddi_buf_trans * tgl_get_combo_buf_trans(struct intel_encoder *encoder, int type, int rate, int *n_entries) { - if (type == INTEL_OUTPUT_HDMI || type == INTEL_OUTPUT_EDP) { - return icl_get_combo_buf_trans(encoder, type, rate, n_entries); - } else if (rate > 270000) { - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr2); - return tgl_combo_phy_ddi_translations_dp_hbr2; - } + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + switch (type) { + case INTEL_OUTPUT_HDMI: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); + return icl_combo_phy_ddi_translations_hdmi; + case INTEL_OUTPUT_EDP: + if (dev_priv->vbt.edp.hobl) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (!intel_dp->hobl_failed && rate <= 540000) { + /* Same table applies to TGL, RKL and DG1 */ + *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_edp_hbr2_hobl); + return tgl_combo_phy_ddi_translations_edp_hbr2_hobl; + } + } + + if (rate > 540000) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); + return icl_combo_phy_ddi_translations_edp_hbr3; + } else if (dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); + return icl_combo_phy_ddi_translations_edp_hbr2; + } + /* fall through */ + default: + /* All combo DP and eDP ports that do not support low_vswing */ + if (rate > 270000) { + if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { + *n_entries = ARRAY_SIZE(tgl_uy_combo_phy_ddi_translations_dp_hbr2); + return tgl_uy_combo_phy_ddi_translations_dp_hbr2; + } + + *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr2); + return tgl_combo_phy_ddi_translations_dp_hbr2; + } - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr); - return tgl_combo_phy_ddi_translations_dp_hbr; + *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr); + return tgl_combo_phy_ddi_translations_dp_hbr; + } } static const struct tgl_dkl_phy_ddi_buf_trans * @@ -1738,6 +1821,8 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state ctl = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder)); + drm_WARN_ON(crtc->base.dev, ctl & TRANS_DDI_HDCP_SIGNALLING); + ctl &= ~TRANS_DDI_FUNC_ENABLE; if (IS_GEN_RANGE(dev_priv, 8, 10)) @@ -1765,12 +1850,12 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state } int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, + enum transcoder cpu_transcoder, bool enable) { struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); intel_wakeref_t wakeref; - enum pipe pipe = 0; int ret = 0; u32 tmp; @@ -1779,19 +1864,12 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, if (drm_WARN_ON(dev, !wakeref)) return -ENXIO; - if (drm_WARN_ON(dev, - !intel_encoder->get_hw_state(intel_encoder, &pipe))) { - ret = -EIO; - goto out; - } - - tmp = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(pipe)); + tmp = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (enable) tmp |= TRANS_DDI_HDCP_SIGNALLING; else tmp &= ~TRANS_DDI_HDCP_SIGNALLING; - intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe), tmp); -out: + intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), tmp); intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); return ret; } @@ -2392,6 +2470,15 @@ static void icl_ddi_combo_vswing_program(struct intel_encoder *encoder, level = n_entries - 1; } + if (type == INTEL_OUTPUT_EDP) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + val = EDP4K2K_MODE_OVRD_EN | EDP4K2K_MODE_OVRD_OPTIMIZED; + intel_dp->hobl_active = is_hobl_buf_trans(ddi_translations); + intel_de_rmw(dev_priv, ICL_PORT_CL_DW10(phy), val, + intel_dp->hobl_active ? val : 0); + } + /* Set PORT_TX_DW5 */ val = intel_de_read(dev_priv, ICL_PORT_TX_DW5_LN0(phy)); val &= ~(SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK | @@ -2802,7 +2889,9 @@ hsw_set_signal_levels(struct intel_dp *intel_dp) static u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, enum phy phy) { - if (intel_phy_is_combo(dev_priv, phy)) { + if (IS_ROCKETLAKE(dev_priv)) { + return RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); + } else if (intel_phy_is_combo(dev_priv, phy)) { return ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); } else if (intel_phy_is_tc(dev_priv, phy)) { enum tc_port tc_port = intel_port_to_tc(dev_priv, @@ -2829,6 +2918,16 @@ static void icl_map_plls_to_ports(struct intel_encoder *encoder, (val & icl_dpclka_cfgcr0_clk_off(dev_priv, phy)) == 0); if (intel_phy_is_combo(dev_priv, phy)) { + u32 mask, sel; + + if (IS_ROCKETLAKE(dev_priv)) { + mask = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + sel = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); + } else { + mask = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + sel = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); + } + /* * Even though this register references DDIs, note that we * want to pass the PHY rather than the port (DDI). For @@ -2839,8 +2938,8 @@ static void icl_map_plls_to_ports(struct intel_encoder *encoder, * Clock Select chooses the PLL for both DDIA and DDID and * drives port A in all cases." */ - val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); - val |= ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); + val &= ~mask; + val |= sel; intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val); intel_de_posting_read(dev_priv, ICL_DPCLKA_CFGCR0); } @@ -3371,6 +3470,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_ddi_init_dp_buf_reg(encoder); if (!is_mst) intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_configure_protocol_converter(intel_dp); intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); intel_dp_sink_set_fec_ready(intel_dp, crtc_state); @@ -3482,19 +3582,17 @@ static void intel_ddi_pre_enable(struct intel_atomic_state *state, intel_ddi_pre_enable_hdmi(state, encoder, crtc_state, conn_state); } else { - struct intel_lspcon *lspcon = - enc_to_intel_lspcon(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); intel_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); - if (lspcon->active) { - struct intel_digital_port *dig_port = - enc_to_dig_port(encoder); + /* FIXME precompute everything properly */ + /* FIXME how do we turn infoframes off again? */ + if (dig_port->lspcon.active && dig_port->dp.has_hdmi_sink) dig_port->set_infoframes(encoder, crtc_state->has_infoframe, crtc_state, conn_state); - } } } @@ -3938,18 +4036,19 @@ static void intel_ddi_update_pipe_dp(struct intel_atomic_state *state, intel_psr_update(intel_dp, crtc_state, conn_state); intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); - intel_edp_drrs_enable(intel_dp, crtc_state); + intel_edp_drrs_update(intel_dp, crtc_state); intel_panel_update_backlight(state, encoder, crtc_state, conn_state); } -static void intel_ddi_update_pipe(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +void intel_ddi_update_pipe(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && + !intel_encoder_is_mst(encoder)) intel_ddi_update_pipe_dp(state, encoder, crtc_state, conn_state); @@ -4037,8 +4136,7 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) intel_wait_ddi_buf_idle(dev_priv, port); } - dp_tp_ctl = DP_TP_CTL_ENABLE | - DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE; + dp_tp_ctl = DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1; if (intel_dp->link_mst) dp_tp_ctl |= DP_TP_CTL_MODE_MST; else { @@ -4061,16 +4159,10 @@ static void intel_ddi_set_link_train(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); - enum port port = dp_to_dig_port(intel_dp)->base.port; u32 temp; temp = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl); - if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) - temp |= DP_TP_CTL_SCRAMBLE_DISABLE; - else - temp &= ~DP_TP_CTL_SCRAMBLE_DISABLE; - temp &= ~DP_TP_CTL_LINK_TRAIN_MASK; switch (dp_train_pat & train_pat_mask) { case DP_TRAINING_PATTERN_DISABLE: @@ -4091,9 +4183,6 @@ static void intel_ddi_set_link_train(struct intel_dp *intel_dp, } intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, temp); - - intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP); - intel_de_posting_read(dev_priv, DDI_BUF_CTL(port)); } static void intel_ddi_set_idle_link_train(struct intel_dp *intel_dp) @@ -4878,6 +4967,64 @@ intel_ddi_max_lanes(struct intel_digital_port *dig_port) return max_lanes; } +static bool hti_uses_phy(struct drm_i915_private *i915, enum phy phy) +{ + return i915->hti_state & HDPORT_ENABLED && + (i915->hti_state & HDPORT_PHY_USED_DP(phy) || + i915->hti_state & HDPORT_PHY_USED_HDMI(phy)); +} + +static enum hpd_pin tgl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (port >= PORT_D) + return HPD_PORT_TC1 + port - PORT_D; + else + return HPD_PORT_A + port - PORT_A; +} + +static enum hpd_pin rkl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (HAS_PCH_TGP(dev_priv)) + return tgl_hpd_pin(dev_priv, port); + + if (port >= PORT_D) + return HPD_PORT_C + port - PORT_D; + else + return HPD_PORT_A + port - PORT_A; +} + +static enum hpd_pin icl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (port >= PORT_C) + return HPD_PORT_TC1 + port - PORT_C; + else + return HPD_PORT_A + port - PORT_A; +} + +static enum hpd_pin ehl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (port == PORT_D) + return HPD_PORT_A; + + if (HAS_PCH_MCC(dev_priv)) + return icl_hpd_pin(dev_priv, port); + + return HPD_PORT_A + port - PORT_A; +} + +static enum hpd_pin cnl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (port == PORT_F) + return HPD_PORT_E; + + return HPD_PORT_A + port - PORT_A; +} + void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *dig_port; @@ -4885,6 +5032,18 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) bool init_hdmi, init_dp, init_lspcon = false; enum phy phy = intel_port_to_phy(dev_priv, port); + /* + * On platforms with HTI (aka HDPORT), if it's enabled at boot it may + * have taken over some of the PHYs and made them unavailable to the + * driver. In that case we should skip initializing the corresponding + * outputs. + */ + if (hti_uses_phy(dev_priv, phy)) { + drm_dbg_kms(&dev_priv->drm, "PORT %c / PHY %c reserved by HTI\n", + port_name(port), phy_name(phy)); + return; + } + init_hdmi = intel_bios_port_supports_dvi(dev_priv, port) || intel_bios_port_supports_hdmi(dev_priv, port); init_dp = intel_bios_port_supports_dp(dev_priv, port); @@ -4918,6 +5077,9 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) drm_encoder_init(&dev_priv->drm, &encoder->base, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); + mutex_init(&dig_port->hdcp_mutex); + dig_port->num_hdcp_streams = 0; + encoder->hotplug = intel_ddi_hotplug; encoder->compute_output_type = intel_ddi_compute_output_type; encoder->compute_config = intel_ddi_compute_config; @@ -4939,6 +5101,19 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->cloneable = 0; encoder->pipe_mask = ~0; + if (IS_ROCKETLAKE(dev_priv)) + encoder->hpd_pin = rkl_hpd_pin(dev_priv, port); + else if (INTEL_GEN(dev_priv) >= 12) + encoder->hpd_pin = tgl_hpd_pin(dev_priv, port); + else if (IS_ELKHARTLAKE(dev_priv)) + encoder->hpd_pin = ehl_hpd_pin(dev_priv, port); + else if (IS_GEN(dev_priv, 11)) + encoder->hpd_pin = icl_hpd_pin(dev_priv, port); + else if (IS_GEN(dev_priv, 10)) + encoder->hpd_pin = cnl_hpd_pin(dev_priv, port); + else + encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); + if (INTEL_GEN(dev_priv) >= 11) dig_port->saved_port_bits = intel_de_read(dev_priv, DDI_BUF_CTL(port)) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 077e9dbbe367..f5fb62fc9400 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -16,6 +16,7 @@ struct intel_crtc_state; struct intel_dp; struct intel_dpll_hw_state; struct intel_encoder; +enum transcoder; void intel_ddi_fdi_post_disable(struct intel_atomic_state *state, struct intel_encoder *intel_encoder, @@ -43,6 +44,7 @@ void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, u32 bxt_signal_levels(struct intel_dp *intel_dp); u32 ddi_signal_levels(struct intel_dp *intel_dp); int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, + enum transcoder cpu_transcoder, bool enable); void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b18c5ac2934d..631b4338224e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -47,6 +47,7 @@ #include "display/intel_ddi.h" #include "display/intel_dp.h" #include "display/intel_dp_mst.h" +#include "display/intel_dpll_mgr.h" #include "display/intel_dsi.h" #include "display/intel_dvo.h" #include "display/intel_gmbus.h" @@ -66,6 +67,7 @@ #include "intel_bw.h" #include "intel_cdclk.h" #include "intel_color.h" +#include "intel_csr.h" #include "intel_display_types.h" #include "intel_dp_link_training.h" #include "intel_fbc.h" @@ -2310,7 +2312,7 @@ err: void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - i915_gem_object_lock(vma->obj); + i915_gem_object_lock(vma->obj, NULL); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); @@ -3450,7 +3452,7 @@ initial_plane_vma(struct drm_i915_private *i915, if (IS_ERR(vma)) goto err_obj; - if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base)) + if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base)) goto err_obj; if (i915_gem_object_is_tiled(obj) && @@ -3761,6 +3763,44 @@ static int glk_max_plane_width(const struct drm_framebuffer *fb, } } +static int icl_min_plane_width(const struct drm_framebuffer *fb) +{ + /* Wa_14011264657, Wa_14011050563: gen11+ */ + switch (fb->format->format) { + case DRM_FORMAT_C8: + return 18; + case DRM_FORMAT_RGB565: + return 10; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ARGB2101010: + case DRM_FORMAT_ABGR2101010: + case DRM_FORMAT_XVYU2101010: + case DRM_FORMAT_Y212: + case DRM_FORMAT_Y216: + return 6; + case DRM_FORMAT_NV12: + return 20; + case DRM_FORMAT_P010: + case DRM_FORMAT_P012: + case DRM_FORMAT_P016: + return 12; + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_ABGR16161616F: + case DRM_FORMAT_XVYU12_16161616: + case DRM_FORMAT_XVYU16161616: + return 4; + default: + return 1; + } +} + static int icl_max_plane_width(const struct drm_framebuffer *fb, int color_plane, unsigned int rotation) @@ -3843,29 +3883,31 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) int y = plane_state->uapi.src.y1 >> 16; int w = drm_rect_width(&plane_state->uapi.src) >> 16; int h = drm_rect_height(&plane_state->uapi.src) >> 16; - int max_width; - int max_height; - u32 alignment; - u32 offset; + int max_width, min_width, max_height; + u32 alignment, offset; int aux_plane = intel_main_to_aux_plane(fb, 0); u32 aux_offset = plane_state->color_plane[aux_plane].offset; - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 11) { max_width = icl_max_plane_width(fb, 0, rotation); - else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + min_width = icl_min_plane_width(fb); + } else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { max_width = glk_max_plane_width(fb, 0, rotation); - else + min_width = 1; + } else { max_width = skl_max_plane_width(fb, 0, rotation); + min_width = 1; + } if (INTEL_GEN(dev_priv) >= 11) max_height = icl_max_plane_height(); else max_height = skl_max_plane_height(); - if (w > max_width || h > max_height) { + if (w > max_width || w < min_width || h > max_height) { drm_dbg_kms(&dev_priv->drm, - "requested Y/RGB source size %dx%d too big (limit %dx%d)\n", - w, h, max_width, max_height); + "requested Y/RGB source size %dx%d outside limits (min: %dx1 max: %dx%d)\n", + w, h, min_width, max_width, max_height); return -EINVAL; } @@ -7290,6 +7332,10 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port) return POWER_DOMAIN_PORT_DDI_F_LANES; case PORT_G: return POWER_DOMAIN_PORT_DDI_G_LANES; + case PORT_H: + return POWER_DOMAIN_PORT_DDI_H_LANES; + case PORT_I: + return POWER_DOMAIN_PORT_DDI_I_LANES; default: MISSING_CASE(port); return POWER_DOMAIN_PORT_OTHER; @@ -7315,6 +7361,10 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) return POWER_DOMAIN_AUX_F_TBT; case AUX_CH_G: return POWER_DOMAIN_AUX_G_TBT; + case AUX_CH_H: + return POWER_DOMAIN_AUX_H_TBT; + case AUX_CH_I: + return POWER_DOMAIN_AUX_I_TBT; default: MISSING_CASE(dig_port->aux_ch); return POWER_DOMAIN_AUX_C_TBT; @@ -7346,6 +7396,10 @@ intel_legacy_aux_to_power_domain(enum aux_ch aux_ch) return POWER_DOMAIN_AUX_F; case AUX_CH_G: return POWER_DOMAIN_AUX_G; + case AUX_CH_H: + return POWER_DOMAIN_AUX_H; + case AUX_CH_I: + return POWER_DOMAIN_AUX_I; default: MISSING_CASE(aux_ch); return POWER_DOMAIN_AUX_A; @@ -8114,7 +8168,7 @@ static void compute_m_n(unsigned int m, unsigned int n, * which the devices expect also in synchronous clock mode. */ if (constant_n) - *ret_n = 0x8000; + *ret_n = DP_LINK_CONSTANT_N_VALUE; else *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); @@ -10802,9 +10856,18 @@ static void icl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, u32 temp; if (intel_phy_is_combo(dev_priv, phy)) { - temp = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0) & - ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); - id = temp >> ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + u32 mask, shift; + + if (IS_ROCKETLAKE(dev_priv)) { + mask = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + shift = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + } else { + mask = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + shift = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + } + + temp = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0) & mask; + id = temp >> shift; port_dpll_id = ICL_PORT_DPLL_DEFAULT; } else if (intel_phy_is_tc(dev_priv, phy)) { u32 clk_sel = intel_de_read(dev_priv, DDI_CLK_SEL(port)) & DDI_CLK_SEL_MASK; @@ -12760,6 +12823,9 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, } + if (!mode_changed) + intel_psr2_sel_fetch_update(state, crtc); + return 0; } @@ -13418,12 +13484,6 @@ encoder_retry: "hw max bpp: %i, pipe bpp: %i, dithering: %i\n", base_bpp, pipe_config->pipe_bpp, pipe_config->dither); - /* - * Make drm_calc_timestamping_constants in - * drm_atomic_helper_update_legacy_modeset_state() happy - */ - pipe_config->uapi.adjusted_mode = pipe_config->hw.adjusted_mode; - return 0; } @@ -14244,7 +14304,6 @@ verify_crtc_state(struct intel_crtc *crtc, struct intel_encoder *encoder; struct intel_crtc_state *pipe_config = old_crtc_state; struct drm_atomic_state *state = old_crtc_state->uapi.state; - bool active; __drm_atomic_helper_crtc_destroy_state(&old_crtc_state->uapi); intel_crtc_free_hw_state(old_crtc_state); @@ -14254,16 +14313,19 @@ verify_crtc_state(struct intel_crtc *crtc, drm_dbg_kms(&dev_priv->drm, "[CRTC:%d:%s]\n", crtc->base.base.id, crtc->base.name); - active = dev_priv->display.get_pipe_config(crtc, pipe_config); + pipe_config->hw.enable = new_crtc_state->hw.enable; + + pipe_config->hw.active = + dev_priv->display.get_pipe_config(crtc, pipe_config); /* we keep both pipes enabled on 830 */ - if (IS_I830(dev_priv)) - active = new_crtc_state->hw.active; + if (IS_I830(dev_priv) && pipe_config->hw.active) + pipe_config->hw.active = new_crtc_state->hw.active; - I915_STATE_WARN(new_crtc_state->hw.active != active, + I915_STATE_WARN(new_crtc_state->hw.active != pipe_config->hw.active, "crtc active state doesn't match with hw state " "(expected %i, found %i)\n", - new_crtc_state->hw.active, active); + new_crtc_state->hw.active, pipe_config->hw.active); I915_STATE_WARN(crtc->active != new_crtc_state->hw.active, "transitional active state does not match atomic hw state " @@ -14272,6 +14334,7 @@ verify_crtc_state(struct intel_crtc *crtc, for_each_encoder_on_crtc(dev, &crtc->base, encoder) { enum pipe pipe; + bool active; active = encoder->get_hw_state(encoder, &pipe); I915_STATE_WARN(active != new_crtc_state->hw.active, @@ -14583,16 +14646,8 @@ u8 intel_calc_active_pipes(struct intel_atomic_state *state, static int intel_modeset_checks(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int ret; state->modeset = true; - state->active_pipes = intel_calc_active_pipes(state, dev_priv->active_pipes); - - if (state->active_pipes != dev_priv->active_pipes) { - ret = _intel_atomic_lock_global_state(state); - if (ret) - return ret; - } if (IS_HASWELL(dev_priv)) return hsw_mode_set_planes_workaround(state); @@ -14736,7 +14791,8 @@ static int intel_atomic_check_cdclk(struct intel_atomic_state *state, bool *need_cdclk_calc) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_cdclk_state *new_cdclk_state; + const struct intel_cdclk_state *old_cdclk_state; + const struct intel_cdclk_state *new_cdclk_state; struct intel_plane_state *plane_state; struct intel_bw_state *new_bw_state; struct intel_plane *plane; @@ -14755,9 +14811,11 @@ static int intel_atomic_check_cdclk(struct intel_atomic_state *state, return ret; } + old_cdclk_state = intel_atomic_get_old_cdclk_state(state); new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - if (new_cdclk_state && new_cdclk_state->force_min_cdclk_changed) + if (new_cdclk_state && + old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk) *need_cdclk_calc = true; ret = dev_priv->display.bw_calc_min_cdclk(state); @@ -15134,6 +15192,8 @@ static void commit_pipe_config(struct intel_atomic_state *state, if (new_crtc_state->update_pipe) intel_pipe_fastset(old_crtc_state, new_crtc_state); + + intel_psr2_program_trans_man_trk_ctl(new_crtc_state); } if (dev_priv->display.atomic_update_watermarks) @@ -15702,14 +15762,6 @@ static void intel_atomic_track_fbs(struct intel_atomic_state *state) plane->frontbuffer_bit); } -static void assert_global_state_locked(struct drm_i915_private *dev_priv) -{ - struct intel_crtc *crtc; - - for_each_intel_crtc(&dev_priv->drm, crtc) - drm_modeset_lock_assert_held(&crtc->base.mutex); -} - static int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, bool nonblock) @@ -15785,12 +15837,6 @@ static int intel_atomic_commit(struct drm_device *dev, intel_shared_dpll_swap_state(state); intel_atomic_track_fbs(state); - if (state->global_state_changed) { - assert_global_state_locked(dev_priv); - - dev_priv->active_pipes = state->active_pipes; - } - drm_atomic_state_get(&state->base); INIT_WORK(&state->base.commit_work, intel_atomic_commit_work); @@ -16837,7 +16883,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_pps_init(dev_priv); - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) + if (!HAS_DISPLAY(dev_priv)) return; if (IS_ROCKETLAKE(dev_priv)) { @@ -17137,7 +17183,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (!intel_fb->frontbuffer) return -ENOMEM; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); tiling = i915_gem_object_get_tiling(obj); stride = i915_gem_object_get_stride(obj); i915_gem_object_unlock(obj); @@ -17823,6 +17869,27 @@ int intel_modeset_init_noirq(struct drm_i915_private *i915) { int ret; + if (i915_inject_probe_failure(i915)) + return -ENODEV; + + if (HAS_DISPLAY(i915)) { + ret = drm_vblank_init(&i915->drm, + INTEL_NUM_PIPES(i915)); + if (ret) + return ret; + } + + intel_bios_init(i915); + + ret = intel_vga_register(i915); + if (ret) + goto cleanup_bios; + + /* FIXME: completely on the wrong abstraction layer */ + intel_power_domains_init_hw(i915, false); + + intel_csr_ucode_init(i915); + i915->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); i915->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI | WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); @@ -17831,15 +17898,15 @@ int intel_modeset_init_noirq(struct drm_i915_private *i915) ret = intel_cdclk_init(i915); if (ret) - return ret; + goto cleanup_vga_client_pw_domain_csr; ret = intel_dbuf_init(i915); if (ret) - return ret; + goto cleanup_vga_client_pw_domain_csr; ret = intel_bw_init(i915); if (ret) - return ret; + goto cleanup_vga_client_pw_domain_csr; init_llist_head(&i915->atomic_helper.free_list); INIT_WORK(&i915->atomic_helper.free_work, @@ -17850,10 +17917,19 @@ int intel_modeset_init_noirq(struct drm_i915_private *i915) intel_fbc_init(i915); return 0; + +cleanup_vga_client_pw_domain_csr: + intel_csr_ucode_fini(i915); + intel_power_domains_driver_remove(i915); + intel_vga_unregister(i915); +cleanup_bios: + intel_bios_driver_remove(i915); + + return ret; } -/* part #2: call after irq install */ -int intel_modeset_init(struct drm_i915_private *i915) +/* part #2: call after irq install, but before gem init */ +int intel_modeset_init_nogem(struct drm_i915_private *i915) { struct drm_device *dev = &i915->drm; enum pipe pipe; @@ -17870,7 +17946,7 @@ int intel_modeset_init(struct drm_i915_private *i915) INTEL_NUM_PIPES(i915), INTEL_NUM_PIPES(i915) > 1 ? "s" : ""); - if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { + if (HAS_DISPLAY(i915)) { for_each_pipe(i915, pipe) { ret = intel_crtc_init(i915, pipe); if (ret) { @@ -17892,6 +17968,13 @@ int intel_modeset_init(struct drm_i915_private *i915) if (i915->max_cdclk_freq == 0) intel_update_max_cdclk(i915); + /* + * If the platform has HTI, we need to find out whether it has reserved + * any display resources before we create our display outputs. + */ + if (INTEL_INFO(i915)->display.has_hti) + i915->hti_state = intel_de_read(i915, HDPORT_STATE); + /* Just disable it once at startup */ intel_vga_disable(i915); intel_setup_outputs(i915); @@ -17945,6 +18028,30 @@ int intel_modeset_init(struct drm_i915_private *i915) return 0; } +/* part #3: call after gem init */ +int intel_modeset_init(struct drm_i915_private *i915) +{ + int ret; + + intel_overlay_setup(i915); + + if (!HAS_DISPLAY(i915)) + return 0; + + ret = intel_fbdev_init(&i915->drm); + if (ret) + return ret; + + /* Only enable hotplug handling once the fbdev is fully set up. */ + intel_hpd_init(i915); + + intel_init_ipc(i915); + + intel_psr_set_force_mode_changed(i915->psr.dp); + + return 0; +} + void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -18829,6 +18936,18 @@ void intel_modeset_driver_remove_noirq(struct drm_i915_private *i915) intel_fbc_cleanup_cfb(i915); } +/* part #3: call after gem init */ +void intel_modeset_driver_remove_nogem(struct drm_i915_private *i915) +{ + intel_csr_ucode_fini(i915); + + intel_power_domains_driver_remove(i915); + + intel_vga_unregister(i915); + + intel_bios_driver_remove(i915); +} + #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) struct intel_display_error_state { @@ -18889,7 +19008,7 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) BUILD_BUG_ON(ARRAY_SIZE(transcoders) != ARRAY_SIZE(error->transcoder)); - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) + if (!HAS_DISPLAY(dev_priv)) return NULL; error = kzalloc(sizeof(*error), GFP_ATOMIC); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index e890c8fb779b..d10b7c8cde3f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -272,8 +272,6 @@ enum dpio_phy { DPIO_PHY2, }; -#define I915_NUM_PHYS_VLV 2 - enum aux_ch { AUX_CH_A, AUX_CH_B, @@ -282,6 +280,8 @@ enum aux_ch { AUX_CH_E, /* ICL+ */ AUX_CH_F, AUX_CH_G, + AUX_CH_H, + AUX_CH_I, }; #define aux_ch_name(a) ((a) + 'A') @@ -629,9 +629,11 @@ intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info, /* modesetting */ void intel_modeset_init_hw(struct drm_i915_private *i915); int intel_modeset_init_noirq(struct drm_i915_private *i915); +int intel_modeset_init_nogem(struct drm_i915_private *i915); int intel_modeset_init(struct drm_i915_private *i915); void intel_modeset_driver_remove(struct drm_i915_private *i915); void intel_modeset_driver_remove_noirq(struct drm_i915_private *i915); +void intel_modeset_driver_remove_nogem(struct drm_i915_private *i915); void intel_display_resume(struct drm_device *dev); void intel_init_pch_refclk(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 5a5cfe25085b..0bf31f9a8af5 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -417,6 +417,9 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) su_blocks = su_blocks >> PSR2_SU_STATUS_SHIFT(frame); seq_printf(m, "%d\t%d\n", frame, su_blocks); } + + seq_printf(m, "PSR2 selective fetch: %s\n", + enableddisabled(psr->psr2_sel_fetch_enabled)); } unlock: @@ -598,6 +601,11 @@ static void intel_hdcp_info(struct seq_file *m, { bool hdcp_cap, hdcp2_cap; + if (!intel_connector->hdcp.shim) { + seq_puts(m, "No Connector Support"); + goto out; + } + hdcp_cap = intel_hdcp_capable(intel_connector); hdcp2_cap = intel_hdcp2_capable(intel_connector); @@ -609,6 +617,7 @@ static void intel_hdcp_info(struct seq_file *m, if (!hdcp_cap && !hdcp2_cap) seq_puts(m, "None"); +out: seq_puts(m, "\n"); } @@ -617,6 +626,7 @@ static void intel_dp_info(struct seq_file *m, { struct intel_encoder *intel_encoder = intel_attached_encoder(intel_connector); struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder); + const struct drm_property_blob *edid = intel_connector->base.edid_blob_ptr; seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]); seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio)); @@ -624,11 +634,7 @@ static void intel_dp_info(struct seq_file *m, intel_panel_info(m, &intel_connector->panel); drm_dp_downstream_debug(m, intel_dp->dpcd, intel_dp->downstream_ports, - &intel_dp->aux); - if (intel_connector->hdcp.shim) { - seq_puts(m, "\tHDCP version: "); - intel_hdcp_info(m, intel_connector); - } + edid ? edid->data : NULL, &intel_dp->aux); } static void intel_dp_mst_info(struct seq_file *m, @@ -646,10 +652,6 @@ static void intel_hdmi_info(struct seq_file *m, struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(intel_encoder); seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio)); - if (intel_connector->hdcp.shim) { - seq_puts(m, "\tHDCP version: "); - intel_hdcp_info(m, intel_connector); - } } static void intel_lvds_info(struct seq_file *m, @@ -705,6 +707,9 @@ static void intel_connector_info(struct seq_file *m, break; } + seq_puts(m, "\tHDCP version: "); + intel_hdcp_info(m, intel_connector); + seq_printf(m, "\tmodes:\n"); list_for_each_entry(mode, &connector->modes, head) intel_seq_print_mode(m, 2, mode); @@ -1066,10 +1071,18 @@ static void drrs_status_per_crtc(struct seq_file *m, drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { + bool supported = false; + if (connector->state->crtc != &intel_crtc->base) continue; seq_printf(m, "%s:\n", connector->name); + + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP && + drrs->type == SEAMLESS_DRRS_SUPPORT) + supported = true; + + seq_printf(m, "\tDRRS Supported: %s\n", yesno(supported)); } drm_connector_list_iter_end(&conn_iter); @@ -1080,7 +1093,7 @@ static void drrs_status_per_crtc(struct seq_file *m, mutex_lock(&drrs->mutex); /* DRRS Supported */ - seq_puts(m, "\tDRRS Supported: Yes\n"); + seq_puts(m, "\tDRRS Enabled: Yes\n"); /* disable_drrs() will make drrs->dp NULL */ if (!drrs->dp) { @@ -1115,7 +1128,7 @@ static void drrs_status_per_crtc(struct seq_file *m, mutex_unlock(&drrs->mutex); } else { /* DRRS not supported. Print the VBT parameter*/ - seq_puts(m, "\tDRRS Supported : No"); + seq_puts(m, "\tDRRS Enabled : No"); } seq_puts(m, "\n"); } @@ -2026,10 +2039,6 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) if (connector->status != connector_status_connected) return -ENODEV; - /* HDCP is supported by connector */ - if (!intel_connector->hdcp.shim) - return -EINVAL; - seq_printf(m, "%s:%d HDCP version: ", connector->name, connector->base.id); intel_hdcp_info(m, intel_connector); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index e0fcb89c736b..7277e58b01f1 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -3927,12 +3927,13 @@ tgl_tc_cold_request(struct drm_i915_private *i915, bool block) int ret; while (1) { - u32 low_val = 0, high_val; + u32 low_val; + u32 high_val = 0; if (block) - high_val = TGL_PCODE_EXIT_TCCOLD_DATA_H_BLOCK_REQ; + low_val = TGL_PCODE_EXIT_TCCOLD_DATA_L_BLOCK_REQ; else - high_val = TGL_PCODE_EXIT_TCCOLD_DATA_H_UNBLOCK_REQ; + low_val = TGL_PCODE_EXIT_TCCOLD_DATA_L_UNBLOCK_REQ; /* * Spec states that we should timeout the request after 200us @@ -3951,8 +3952,7 @@ tgl_tc_cold_request(struct drm_i915_private *i915, bool block) if (++tries == 3) break; - if (ret == -EAGAIN) - msleep(1); + msleep(1); } if (ret) @@ -5263,7 +5263,7 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) unsigned long abox_mask = INTEL_INFO(dev_priv)->abox_mask; int config, i; - if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B0)) + if (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B0)) /* Wa_1409767108: tgl */ table = wa_1409767108_buddy_page_masks; else @@ -5302,6 +5302,12 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + /* Wa_14011294188:ehl,jsl,tgl,rkl */ + if (INTEL_PCH_TYPE(dev_priv) >= PCH_JSP && + INTEL_PCH_TYPE(dev_priv) < PCH_DG1) + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, 0, + PCH_DPMGUNIT_CLOCK_GATE_DISABLE); + /* 1. Enable PCH reset handshake. */ intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e8f809161c75..3d4bf9b6a0a2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -28,6 +28,7 @@ #include <linux/async.h> #include <linux/i2c.h> +#include <linux/pwm.h> #include <linux/sched/clock.h> #include <drm/drm_atomic.h> @@ -223,6 +224,7 @@ struct intel_panel { bool util_pin_active_low; /* bxt+ */ u8 controller; /* bxt+ only */ struct pwm_device *pwm; + struct pwm_state pwm_state; /* DPCD backlight */ u8 pwmgen_bit_count; @@ -314,10 +316,12 @@ struct intel_hdcp_shim { /* Enables HDCP signalling on the port */ int (*toggle_signalling)(struct intel_digital_port *dig_port, + enum transcoder cpu_transcoder, bool enable); /* Ensures the link is still protected */ - bool (*check_link)(struct intel_digital_port *dig_port); + bool (*check_link)(struct intel_digital_port *dig_port, + struct intel_connector *connector); /* Detects panel's hdcp capability. This is optional for HDMI. */ int (*hdcp_capable)(struct intel_digital_port *dig_port, @@ -479,8 +483,6 @@ struct intel_atomic_state { bool dpll_set, modeset; - u8 active_pipes; - struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; /* @@ -491,11 +493,6 @@ struct intel_atomic_state { bool rps_interactive; - /* - * active_pipes - */ - bool global_state_changed; - struct i915_sw_fence commit_ready; struct llist_node freed; @@ -931,6 +928,7 @@ struct intel_crtc_state { bool has_psr; bool has_psr2; + bool enable_psr2_sel_fetch; u32 dc3co_exitline; /* @@ -1073,6 +1071,8 @@ struct intel_crtc_state { /* For DSB related info */ struct intel_dsb *dsb; + + u32 psr2_man_track_ctl; }; enum intel_pipe_crc_source { @@ -1272,6 +1272,7 @@ struct intel_dp { u8 sink_count; bool link_mst; bool link_trained; + bool has_hdmi_sink; bool has_audio; bool reset_link_params; u8 dpcd[DP_RECEIVER_CAP_SIZE]; @@ -1373,8 +1374,19 @@ struct intel_dp { /* Displayport compliance testing */ struct intel_dp_compliance compliance; + /* Downstream facing port caps */ + struct { + int min_tmds_clock, max_tmds_clock; + int max_dotclock; + u8 max_bpc; + bool ycbcr_444_to_420; + } dfp; + /* Display stream compression testing */ bool force_dsc_en; + + bool hobl_failed; + bool hobl_active; }; enum lspcon_vendor { @@ -1409,6 +1421,11 @@ struct intel_digital_port { enum phy_fia tc_phy_fia; u8 tc_phy_fia_idx; + /* protects num_hdcp_streams reference count */ + struct mutex hdcp_mutex; + /* the number of pipes using HDCP signalling out of this port */ + unsigned int num_hdcp_streams; + void (*write_infoframe)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, unsigned int type, @@ -1519,6 +1536,18 @@ static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder) } } +static inline bool intel_encoder_is_mst(struct intel_encoder *encoder) +{ + return encoder->type == INTEL_OUTPUT_DP_MST; +} + +static inline struct intel_dp_mst_encoder * +enc_to_mst(struct intel_encoder *encoder) +{ + return container_of(&encoder->base, struct intel_dp_mst_encoder, + base.base); +} + static inline struct intel_digital_port * enc_to_dig_port(struct intel_encoder *encoder) { @@ -1527,6 +1556,8 @@ enc_to_dig_port(struct intel_encoder *encoder) if (intel_encoder_is_dig_port(intel_encoder)) return container_of(&encoder->base, struct intel_digital_port, base.base); + else if (intel_encoder_is_mst(intel_encoder)) + return enc_to_mst(encoder)->primary; else return NULL; } @@ -1537,13 +1568,6 @@ intel_attached_dig_port(struct intel_connector *connector) return enc_to_dig_port(intel_attached_encoder(connector)); } -static inline struct intel_dp_mst_encoder * -enc_to_mst(struct intel_encoder *encoder) -{ - return container_of(&encoder->base, struct intel_dp_mst_encoder, - base.base); -} - static inline struct intel_dp *enc_to_intel_dp(struct intel_encoder *encoder) { return &enc_to_dig_port(encoder)->dp; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d6295eb20b63..bf1e9cf1c0f3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -38,7 +38,6 @@ #include <drm/drm_crtc.h> #include <drm/drm_dp_helper.h> #include <drm/drm_edid.h> -#include <drm/drm_hdcp.h> #include <drm/drm_probe_helper.h> #include "i915_debugfs.h" @@ -248,29 +247,6 @@ intel_dp_max_data_rate(int max_link_clock, int max_lanes) return max_link_clock * max_lanes; } -static int -intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *encoder = &dig_port->base; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - int max_dotclk = dev_priv->max_dotclk_freq; - int ds_max_dotclk; - - int type = intel_dp->downstream_ports[0] & DP_DS_PORT_TYPE_MASK; - - if (type != DP_DS_PORT_TYPE_VGA) - return max_dotclk; - - ds_max_dotclk = drm_dp_downstream_max_clock(intel_dp->dpcd, - intel_dp->downstream_ports); - - if (ds_max_dotclk != 0) - max_dotclk = min(max_dotclk, ds_max_dotclk); - - return max_dotclk; -} - static int cnl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -636,6 +612,34 @@ static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv, } static enum drm_mode_status +intel_dp_mode_valid_downstream(struct intel_connector *connector, + const struct drm_display_mode *mode, + int target_clock) +{ + struct intel_dp *intel_dp = intel_attached_dp(connector); + const struct drm_display_info *info = &connector->base.display_info; + int tmds_clock; + + if (intel_dp->dfp.max_dotclock && + target_clock > intel_dp->dfp.max_dotclock) + return MODE_CLOCK_HIGH; + + /* Assume 8bpc for the DP++/HDMI/DVI TMDS clock check */ + tmds_clock = target_clock; + if (drm_mode_is_420_only(info, mode)) + tmds_clock /= 2; + + if (intel_dp->dfp.min_tmds_clock && + tmds_clock < intel_dp->dfp.min_tmds_clock) + return MODE_CLOCK_LOW; + if (intel_dp->dfp.max_tmds_clock && + tmds_clock > intel_dp->dfp.max_tmds_clock) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + +static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { @@ -645,15 +649,14 @@ intel_dp_mode_valid(struct drm_connector *connector, struct drm_i915_private *dev_priv = to_i915(connector->dev); int target_clock = mode->clock; int max_rate, mode_rate, max_lanes, max_link_clock; - int max_dotclk; + int max_dotclk = dev_priv->max_dotclk_freq; u16 dsc_max_output_bpp = 0; u8 dsc_slice_count = 0; + enum drm_mode_status status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; - max_dotclk = intel_dp_downstream_max_dotclock(intel_dp); - if (intel_dp_is_edp(intel_dp) && fixed_mode) { if (mode->hdisplay > fixed_mode->hdisplay) return MODE_PANEL; @@ -709,6 +712,11 @@ intel_dp_mode_valid(struct drm_connector *connector, if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; + status = intel_dp_mode_valid_downstream(intel_connector, + mode, target_clock); + if (status != MODE_OK) + return status; + return intel_mode_valid_max_plane_size(dev_priv, mode); } @@ -1563,6 +1571,20 @@ intel_dp_aux_header(u8 txbuf[HEADER_SIZE], txbuf[3] = msg->size - 1; } +static u32 intel_dp_aux_xfer_flags(const struct drm_dp_aux_msg *msg) +{ + /* + * If we're trying to send the HDCP Aksv, we need to set a the Aksv + * select bit to inform the hardware to send the Aksv after our header + * since we can't access that data from software. + */ + if ((msg->request & ~DP_AUX_I2C_MOT) == DP_AUX_NATIVE_WRITE && + msg->address == DP_AUX_HDCP_AKSV) + return DP_AUX_CH_CTL_AUX_AKSV_SELECT; + + return 0; +} + static ssize_t intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -1570,6 +1592,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) struct drm_i915_private *i915 = dp_to_i915(intel_dp); u8 txbuf[20], rxbuf[20]; size_t txsize, rxsize; + u32 flags = intel_dp_aux_xfer_flags(msg); int ret; intel_dp_aux_header(txbuf, msg); @@ -1590,7 +1613,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, - rxbuf, rxsize, 0); + rxbuf, rxsize, flags); if (ret > 0) { msg->reply = rxbuf[0] >> 4; @@ -1613,7 +1636,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return -E2BIG; ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, - rxbuf, rxsize, 0); + rxbuf, rxsize, flags); if (ret > 0) { msg->reply = rxbuf[0] >> 4; /* @@ -1954,19 +1977,72 @@ static bool intel_dp_supports_dsc(struct intel_dp *intel_dp, drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd); } -static int intel_dp_compute_bpp(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config) +static bool intel_dp_hdmi_ycbcr420(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + return crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 || + (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444 && + intel_dp->dfp.ycbcr_444_to_420); +} + +static int intel_dp_hdmi_tmds_clock(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, int bpc) +{ + int clock = crtc_state->hw.adjusted_mode.crtc_clock * bpc / 8; + + if (intel_dp_hdmi_ycbcr420(intel_dp, crtc_state)) + clock /= 2; + + return clock; +} + +static bool intel_dp_hdmi_tmds_clock_valid(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, int bpc) +{ + int tmds_clock = intel_dp_hdmi_tmds_clock(intel_dp, crtc_state, bpc); + + if (intel_dp->dfp.min_tmds_clock && + tmds_clock < intel_dp->dfp.min_tmds_clock) + return false; + + if (intel_dp->dfp.max_tmds_clock && + tmds_clock > intel_dp->dfp.max_tmds_clock) + return false; + + return true; +} + +static bool intel_dp_hdmi_deep_color_possible(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + int bpc) +{ + + return intel_hdmi_deep_color_possible(crtc_state, bpc, + intel_dp->has_hdmi_sink, + intel_dp_hdmi_ycbcr420(intel_dp, crtc_state)) && + intel_dp_hdmi_tmds_clock_valid(intel_dp, crtc_state, bpc); +} + +static int intel_dp_max_bpp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_connector *intel_connector = intel_dp->attached_connector; int bpp, bpc; - bpp = pipe_config->pipe_bpp; - bpc = drm_dp_downstream_max_bpc(intel_dp->dpcd, intel_dp->downstream_ports); + bpc = crtc_state->pipe_bpp / 3; + + if (intel_dp->dfp.max_bpc) + bpc = min_t(int, bpc, intel_dp->dfp.max_bpc); - if (bpc > 0) - bpp = min(bpp, 3*bpc); + if (intel_dp->dfp.min_tmds_clock) { + for (; bpc >= 10; bpc -= 2) { + if (intel_dp_hdmi_deep_color_possible(intel_dp, crtc_state, bpc)) + break; + } + } + bpp = bpc * 3; if (intel_dp_is_edp(intel_dp)) { /* Get bpp from vbt only for panels that dont have bpp in edid */ if (intel_connector->base.display_info.bpc == 0 && @@ -2288,7 +2364,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, limits.max_lane_count = intel_dp_max_lane_count(intel_dp); limits.min_bpp = intel_dp_min_bpp(pipe_config); - limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config); + limits.max_bpp = intel_dp_max_bpp(intel_dp, pipe_config); if (intel_dp_is_edp(intel_dp)) { /* @@ -2363,10 +2439,16 @@ intel_dp_ycbcr420_config(struct intel_dp *intel_dp, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - if (!drm_mode_is_420_only(info, adjusted_mode) || - !intel_dp_get_colorimetry_status(intel_dp) || - !connector->ycbcr_420_allowed) + if (!connector->ycbcr_420_allowed) + return 0; + + if (!drm_mode_is_420_only(info, adjusted_mode)) + return 0; + + if (intel_dp->dfp.ycbcr_444_to_420) { + crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR444; return 0; + } crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; @@ -2575,6 +2657,34 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA); } +static void +intel_dp_drrs_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + int output_bpp, bool constant_n) +{ + struct intel_connector *intel_connector = intel_dp->attached_connector; + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + /* + * DRRS and PSR can't be enable together, so giving preference to PSR + * as it allows more power-savings by complete shutting down display, + * so to guarantee this, intel_dp_drrs_compute_config() must be called + * after intel_psr_compute_config(). + */ + if (pipe_config->has_psr) + return; + + if (!intel_connector->panel.downclock_mode || + dev_priv->drrs.type != SEAMLESS_DRRS_SUPPORT) + return; + + pipe_config->has_drrs = true; + intel_link_compute_m_n(output_bpp, pipe_config->lane_count, + intel_connector->panel.downclock_mode->clock, + pipe_config->port_clock, &pipe_config->dp_m2_n2, + constant_n, pipe_config->fec_enable); +} + int intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -2605,7 +2715,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (ret) return ret; - pipe_config->has_drrs = false; if (!intel_dp_port_has_audio(dev_priv, port)) pipe_config->has_audio = false; else if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) @@ -2657,21 +2766,12 @@ intel_dp_compute_config(struct intel_encoder *encoder, &pipe_config->dp_m_n, constant_n, pipe_config->fec_enable); - if (intel_connector->panel.downclock_mode != NULL && - dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { - pipe_config->has_drrs = true; - intel_link_compute_m_n(output_bpp, - pipe_config->lane_count, - intel_connector->panel.downclock_mode->clock, - pipe_config->port_clock, - &pipe_config->dp_m2_n2, - constant_n, pipe_config->fec_enable); - } - if (!HAS_DDI(dev_priv)) intel_dp_set_clock(encoder, pipe_config); intel_psr_compute_config(intel_dp, pipe_config); + intel_dp_drrs_compute_config(intel_dp, pipe_config, output_bpp, + constant_n); intel_dp_compute_vsc_sdp(intel_dp, pipe_config, conn_state); intel_dp_compute_hdr_metadata_infoframe_sdp(intel_dp, pipe_config, conn_state); @@ -3752,6 +3852,43 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp, intel_de_posting_read(dev_priv, intel_dp->output_reg); } +void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + u8 tmp; + + if (intel_dp->dpcd[DP_DPCD_REV] < 0x13) + return; + + if (!drm_dp_is_branch(intel_dp->dpcd)) + return; + + tmp = intel_dp->has_hdmi_sink ? + DP_HDMI_DVI_OUTPUT_CONFIG : 0; + + if (drm_dp_dpcd_writeb(&intel_dp->aux, + DP_PROTOCOL_CONVERTER_CONTROL_0, tmp) != 1) + drm_dbg_kms(&i915->drm, "Failed to set protocol converter HDMI mode to %s\n", + enableddisabled(intel_dp->has_hdmi_sink)); + + tmp = intel_dp->dfp.ycbcr_444_to_420 ? + DP_CONVERSION_TO_YCBCR420_ENABLE : 0; + + if (drm_dp_dpcd_writeb(&intel_dp->aux, + DP_PROTOCOL_CONVERTER_CONTROL_1, tmp) != 1) + drm_dbg_kms(&i915->drm, + "Failed to set protocol converter YCbCr 4:2:0 conversion mode to %s\n", + enableddisabled(intel_dp->dfp.ycbcr_444_to_420)); + + tmp = 0; + + if (drm_dp_dpcd_writeb(&intel_dp->aux, + DP_PROTOCOL_CONVERTER_CONTROL_2, tmp) <= 0) + drm_dbg_kms(&i915->drm, + "Failed to set protocol converter YCbCr 4:2:2 conversion mode to %s\n", + enableddisabled(false)); +} + static void intel_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, @@ -3789,6 +3926,7 @@ static void intel_enable_dp(struct intel_atomic_state *state, } intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_configure_protocol_converter(intel_dp); intel_dp_start_link_train(intel_dp); intel_dp_stop_link_train(intel_dp); @@ -4449,62 +4587,6 @@ intel_dp_link_down(struct intel_encoder *encoder, } } -static void -intel_dp_extended_receiver_capabilities(struct intel_dp *intel_dp) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 dpcd_ext[6]; - - /* - * Prior to DP1.3 the bit represented by - * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved. - * if it is set DP_DPCD_REV at 0000h could be at a value less than - * the true capability of the panel. The only way to check is to - * then compare 0000h and 2200h. - */ - if (!(intel_dp->dpcd[DP_TRAINING_AUX_RD_INTERVAL] & - DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT)) - return; - - if (drm_dp_dpcd_read(&intel_dp->aux, DP_DP13_DPCD_REV, - &dpcd_ext, sizeof(dpcd_ext)) != sizeof(dpcd_ext)) { - drm_err(&i915->drm, - "DPCD failed read at extended capabilities\n"); - return; - } - - if (intel_dp->dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) { - drm_dbg_kms(&i915->drm, - "DPCD extended DPCD rev less than base DPCD rev\n"); - return; - } - - if (!memcmp(intel_dp->dpcd, dpcd_ext, sizeof(dpcd_ext))) - return; - - drm_dbg_kms(&i915->drm, "Base DPCD: %*ph\n", - (int)sizeof(intel_dp->dpcd), intel_dp->dpcd); - - memcpy(intel_dp->dpcd, dpcd_ext, sizeof(dpcd_ext)); -} - -bool -intel_dp_read_dpcd(struct intel_dp *intel_dp) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - - if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd, - sizeof(intel_dp->dpcd)) < 0) - return false; /* aux transfer failed */ - - intel_dp_extended_receiver_capabilities(intel_dp); - - drm_dbg_kms(&i915->drm, "DPCD: %*ph\n", (int)sizeof(intel_dp->dpcd), - intel_dp->dpcd); - - return intel_dp->dpcd[DP_DPCD_REV] != 0; -} - bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) { u8 dprx = 0; @@ -4563,7 +4645,7 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) /* this function is meant to be called only once */ drm_WARN_ON(&dev_priv->drm, intel_dp->dpcd[DP_DPCD_REV] != 0); - if (!intel_dp_read_dpcd(intel_dp)) + if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd) != 0) return false; drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, @@ -4634,11 +4716,23 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) return true; } +static bool +intel_dp_has_sink_count(struct intel_dp *intel_dp) +{ + if (!intel_dp->attached_connector) + return false; + + return drm_dp_read_sink_count_cap(&intel_dp->attached_connector->base, + intel_dp->dpcd, + &intel_dp->desc); +} static bool intel_dp_get_dpcd(struct intel_dp *intel_dp) { - if (!intel_dp_read_dpcd(intel_dp)) + int ret; + + if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd)) return false; /* @@ -4653,18 +4747,9 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_dp_set_common_rates(intel_dp); } - /* - * Some eDP panels do not set a valid value for sink count, that is why - * it don't care about read it here and in intel_edp_init_dpcd(). - */ - if (!intel_dp_is_edp(intel_dp) && - !drm_dp_has_quirk(&intel_dp->desc, 0, - DP_DPCD_QUIRK_NO_SINK_COUNT)) { - u8 count; - ssize_t r; - - r = drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, &count); - if (r < 1) + if (intel_dp_has_sink_count(intel_dp)) { + ret = drm_dp_read_sink_count(&intel_dp->aux); + if (ret < 0) return false; /* @@ -4672,7 +4757,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) * a member variable in intel_dp will track any changes * between short pulse interrupts. */ - intel_dp->sink_count = DP_GET_SINK_COUNT(count); + intel_dp->sink_count = ret; /* * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that @@ -4685,32 +4770,8 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) return false; } - if (!drm_dp_is_branch(intel_dp->dpcd)) - return true; /* native DP sink */ - - if (intel_dp->dpcd[DP_DPCD_REV] == 0x10) - return true; /* no per-port downstream info */ - - if (drm_dp_dpcd_read(&intel_dp->aux, DP_DOWNSTREAM_PORT_0, - intel_dp->downstream_ports, - DP_MAX_DOWNSTREAM_PORTS) < 0) - return false; /* downstream port status fetch failed */ - - return true; -} - -static bool -intel_dp_sink_can_mst(struct intel_dp *intel_dp) -{ - u8 mstm_cap; - - if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) - return false; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_MSTM_CAP, &mstm_cap) != 1) - return false; - - return mstm_cap & DP_MST_CAP; + return drm_dp_read_downstream_info(&intel_dp->aux, intel_dp->dpcd, + intel_dp->downstream_ports) == 0; } static bool @@ -4720,7 +4781,7 @@ intel_dp_can_mst(struct intel_dp *intel_dp) return i915->params.enable_dp_mst && intel_dp->can_mst && - intel_dp_sink_can_mst(intel_dp); + drm_dp_read_mst_cap(&intel_dp->aux, intel_dp->dpcd); } static void @@ -4729,7 +4790,7 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; - bool sink_can_mst = intel_dp_sink_can_mst(intel_dp); + bool sink_can_mst = drm_dp_read_mst_cap(&intel_dp->aux, intel_dp->dpcd); drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n", @@ -5963,9 +6024,8 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) return connector_status_connected; /* If we're HPD-aware, SINK_COUNT changes dynamically */ - if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && + if (intel_dp_has_sink_count(intel_dp) && intel_dp->downstream_ports[0] & DP_DS_PORT_HPD) { - return intel_dp->sink_count ? connector_status_connected : connector_status_disconnected; } @@ -6106,16 +6166,103 @@ intel_dp_get_edid(struct intel_dp *intel_dp) } static void +intel_dp_update_dfp(struct intel_dp *intel_dp, + const struct edid *edid) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + + intel_dp->dfp.max_bpc = + drm_dp_downstream_max_bpc(intel_dp->dpcd, + intel_dp->downstream_ports, edid); + + intel_dp->dfp.max_dotclock = + drm_dp_downstream_max_dotclock(intel_dp->dpcd, + intel_dp->downstream_ports); + + intel_dp->dfp.min_tmds_clock = + drm_dp_downstream_min_tmds_clock(intel_dp->dpcd, + intel_dp->downstream_ports, + edid); + intel_dp->dfp.max_tmds_clock = + drm_dp_downstream_max_tmds_clock(intel_dp->dpcd, + intel_dp->downstream_ports, + edid); + + drm_dbg_kms(&i915->drm, + "[CONNECTOR:%d:%s] DFP max bpc %d, max dotclock %d, TMDS clock %d-%d\n", + connector->base.base.id, connector->base.name, + intel_dp->dfp.max_bpc, + intel_dp->dfp.max_dotclock, + intel_dp->dfp.min_tmds_clock, + intel_dp->dfp.max_tmds_clock); +} + +static void +intel_dp_update_420(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + bool is_branch, ycbcr_420_passthrough, ycbcr_444_to_420; + + /* No YCbCr output support on gmch platforms */ + if (HAS_GMCH(i915)) + return; + + /* + * ILK doesn't seem capable of DP YCbCr output. The + * displayed image is severly corrupted. SNB+ is fine. + */ + if (IS_GEN(i915, 5)) + return; + + is_branch = drm_dp_is_branch(intel_dp->dpcd); + ycbcr_420_passthrough = + drm_dp_downstream_420_passthrough(intel_dp->dpcd, + intel_dp->downstream_ports); + ycbcr_444_to_420 = + drm_dp_downstream_444_to_420_conversion(intel_dp->dpcd, + intel_dp->downstream_ports); + + if (INTEL_GEN(i915) >= 11) { + /* Prefer 4:2:0 passthrough over 4:4:4->4:2:0 conversion */ + intel_dp->dfp.ycbcr_444_to_420 = + ycbcr_444_to_420 && !ycbcr_420_passthrough; + + connector->base.ycbcr_420_allowed = + !is_branch || ycbcr_444_to_420 || ycbcr_420_passthrough; + } else { + /* 4:4:4->4:2:0 conversion is the only way */ + intel_dp->dfp.ycbcr_444_to_420 = ycbcr_444_to_420; + + connector->base.ycbcr_420_allowed = ycbcr_444_to_420; + } + + drm_dbg_kms(&i915->drm, + "[CONNECTOR:%d:%s] YCbCr 4:2:0 allowed? %s, YCbCr 4:4:4->4:2:0 conversion? %s\n", + connector->base.base.id, connector->base.name, + yesno(connector->base.ycbcr_420_allowed), + yesno(intel_dp->dfp.ycbcr_444_to_420)); +} + +static void intel_dp_set_edid(struct intel_dp *intel_dp) { - struct intel_connector *intel_connector = intel_dp->attached_connector; + struct intel_connector *connector = intel_dp->attached_connector; struct edid *edid; intel_dp_unset_edid(intel_dp); edid = intel_dp_get_edid(intel_dp); - intel_connector->detect_edid = edid; + connector->detect_edid = edid; + + intel_dp_update_dfp(intel_dp, edid); + intel_dp_update_420(intel_dp); + + if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) { + intel_dp->has_hdmi_sink = drm_detect_hdmi_monitor(edid); + intel_dp->has_audio = drm_detect_monitor_audio(edid); + } - intel_dp->has_audio = drm_detect_monitor_audio(edid); drm_dp_cec_set_edid(&intel_dp->aux, edid); intel_dp->edid_quirks = drm_dp_get_edid_quirks(edid); } @@ -6123,14 +6270,23 @@ intel_dp_set_edid(struct intel_dp *intel_dp) static void intel_dp_unset_edid(struct intel_dp *intel_dp) { - struct intel_connector *intel_connector = intel_dp->attached_connector; + struct intel_connector *connector = intel_dp->attached_connector; drm_dp_cec_unset_edid(&intel_dp->aux); - kfree(intel_connector->detect_edid); - intel_connector->detect_edid = NULL; + kfree(connector->detect_edid); + connector->detect_edid = NULL; + intel_dp->has_hdmi_sink = false; intel_dp->has_audio = false; intel_dp->edid_quirks = 0; + + intel_dp->dfp.max_bpc = 0; + intel_dp->dfp.max_dotclock = 0; + intel_dp->dfp.min_tmds_clock = 0; + intel_dp->dfp.max_tmds_clock = 0; + + intel_dp->dfp.ycbcr_444_to_420 = false; + connector->base.ycbcr_420_allowed = false; } static int @@ -6149,6 +6305,9 @@ intel_dp_detect(struct drm_connector *connector, drm_WARN_ON(&dev_priv->drm, !drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); + if (!INTEL_DISPLAY_ENABLED(dev_priv)) + return connector_status_disconnected; + /* Can't disconnect eDP */ if (intel_dp_is_edp(intel_dp)) status = edp_detect(intel_dp); @@ -6243,6 +6402,11 @@ out: */ intel_display_power_flush_work(dev_priv); + if (!intel_dp_is_edp(intel_dp)) + drm_dp_set_subconnector_property(connector, + status, + intel_dp->dpcd, + intel_dp->downstream_ports); return status; } @@ -6284,7 +6448,7 @@ static int intel_dp_get_modes(struct drm_connector *connector) } /* if eDP has no EDID, fall back to fixed mode */ - if (intel_dp_is_edp(intel_attached_dp(to_intel_connector(connector))) && + if (intel_dp_is_edp(intel_attached_dp(intel_connector)) && intel_connector->panel.fixed_mode) { struct drm_display_mode *mode; @@ -6296,6 +6460,19 @@ static int intel_dp_get_modes(struct drm_connector *connector) } } + if (!edid) { + struct intel_dp *intel_dp = intel_attached_dp(intel_connector); + struct drm_display_mode *mode; + + mode = drm_dp_downstream_mode(connector->dev, + intel_dp->dpcd, + intel_dp->downstream_ports); + if (mode) { + drm_mode_probed_add(connector, mode); + return 1; + } + } + return 0; } @@ -6381,628 +6558,6 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) edp_panel_vdd_off_sync(intel_dp); } -static void intel_dp_hdcp_wait_for_cp_irq(struct intel_hdcp *hdcp, int timeout) -{ - long ret; - -#define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) - ret = wait_event_interruptible_timeout(hdcp->cp_irq_queue, C, - msecs_to_jiffies(timeout)); - - if (!ret) - DRM_DEBUG_KMS("Timedout at waiting for CP_IRQ\n"); -} - -static -int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *dig_port, - u8 *an) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(&dig_port->base.base)); - static const struct drm_dp_aux_msg msg = { - .request = DP_AUX_NATIVE_WRITE, - .address = DP_AUX_HDCP_AKSV, - .size = DRM_HDCP_KSV_LEN, - }; - u8 txbuf[HEADER_SIZE + DRM_HDCP_KSV_LEN] = {}, rxbuf[2], reply = 0; - ssize_t dpcd_ret; - int ret; - - /* Output An first, that's easy */ - dpcd_ret = drm_dp_dpcd_write(&dig_port->dp.aux, DP_AUX_HDCP_AN, - an, DRM_HDCP_AN_LEN); - if (dpcd_ret != DRM_HDCP_AN_LEN) { - drm_dbg_kms(&i915->drm, - "Failed to write An over DP/AUX (%zd)\n", - dpcd_ret); - return dpcd_ret >= 0 ? -EIO : dpcd_ret; - } - - /* - * Since Aksv is Oh-So-Secret, we can't access it in software. So in - * order to get it on the wire, we need to create the AUX header as if - * we were writing the data, and then tickle the hardware to output the - * data once the header is sent out. - */ - intel_dp_aux_header(txbuf, &msg); - - ret = intel_dp_aux_xfer(intel_dp, txbuf, HEADER_SIZE + msg.size, - rxbuf, sizeof(rxbuf), - DP_AUX_CH_CTL_AUX_AKSV_SELECT); - if (ret < 0) { - drm_dbg_kms(&i915->drm, - "Write Aksv over DP/AUX failed (%d)\n", ret); - return ret; - } else if (ret == 0) { - drm_dbg_kms(&i915->drm, "Aksv write over DP/AUX was empty\n"); - return -EIO; - } - - reply = (rxbuf[0] >> 4) & DP_AUX_NATIVE_REPLY_MASK; - if (reply != DP_AUX_NATIVE_REPLY_ACK) { - drm_dbg_kms(&i915->drm, - "Aksv write: no DP_AUX_NATIVE_REPLY_ACK %x\n", - reply); - return -EIO; - } - return 0; -} - -static int intel_dp_hdcp_read_bksv(struct intel_digital_port *dig_port, - u8 *bksv) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BKSV, bksv, - DRM_HDCP_KSV_LEN); - if (ret != DRM_HDCP_KSV_LEN) { - drm_dbg_kms(&i915->drm, - "Read Bksv from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - return 0; -} - -static int intel_dp_hdcp_read_bstatus(struct intel_digital_port *dig_port, - u8 *bstatus) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - /* - * For some reason the HDMI and DP HDCP specs call this register - * definition by different names. In the HDMI spec, it's called BSTATUS, - * but in DP it's called BINFO. - */ - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BINFO, - bstatus, DRM_HDCP_BSTATUS_LEN); - if (ret != DRM_HDCP_BSTATUS_LEN) { - drm_dbg_kms(&i915->drm, - "Read bstatus from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - return 0; -} - -static -int intel_dp_hdcp_read_bcaps(struct intel_digital_port *dig_port, - u8 *bcaps) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BCAPS, - bcaps, 1); - if (ret != 1) { - drm_dbg_kms(&i915->drm, - "Read bcaps from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - - return 0; -} - -static -int intel_dp_hdcp_repeater_present(struct intel_digital_port *dig_port, - bool *repeater_present) -{ - ssize_t ret; - u8 bcaps; - - ret = intel_dp_hdcp_read_bcaps(dig_port, &bcaps); - if (ret) - return ret; - - *repeater_present = bcaps & DP_BCAPS_REPEATER_PRESENT; - return 0; -} - -static -int intel_dp_hdcp_read_ri_prime(struct intel_digital_port *dig_port, - u8 *ri_prime) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_RI_PRIME, - ri_prime, DRM_HDCP_RI_LEN); - if (ret != DRM_HDCP_RI_LEN) { - drm_dbg_kms(&i915->drm, "Read Ri' from DP/AUX failed (%zd)\n", - ret); - return ret >= 0 ? -EIO : ret; - } - return 0; -} - -static -int intel_dp_hdcp_read_ksv_ready(struct intel_digital_port *dig_port, - bool *ksv_ready) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - u8 bstatus; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, - &bstatus, 1); - if (ret != 1) { - drm_dbg_kms(&i915->drm, - "Read bstatus from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - *ksv_ready = bstatus & DP_BSTATUS_READY; - return 0; -} - -static -int intel_dp_hdcp_read_ksv_fifo(struct intel_digital_port *dig_port, - int num_downstream, u8 *ksv_fifo) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - int i; - - /* KSV list is read via 15 byte window (3 entries @ 5 bytes each) */ - for (i = 0; i < num_downstream; i += 3) { - size_t len = min(num_downstream - i, 3) * DRM_HDCP_KSV_LEN; - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_AUX_HDCP_KSV_FIFO, - ksv_fifo + i * DRM_HDCP_KSV_LEN, - len); - if (ret != len) { - drm_dbg_kms(&i915->drm, - "Read ksv[%d] from DP/AUX failed (%zd)\n", - i, ret); - return ret >= 0 ? -EIO : ret; - } - } - return 0; -} - -static -int intel_dp_hdcp_read_v_prime_part(struct intel_digital_port *dig_port, - int i, u32 *part) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - if (i >= DRM_HDCP_V_PRIME_NUM_PARTS) - return -EINVAL; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_AUX_HDCP_V_PRIME(i), part, - DRM_HDCP_V_PRIME_PART_LEN); - if (ret != DRM_HDCP_V_PRIME_PART_LEN) { - drm_dbg_kms(&i915->drm, - "Read v'[%d] from DP/AUX failed (%zd)\n", i, ret); - return ret >= 0 ? -EIO : ret; - } - return 0; -} - -static -int intel_dp_hdcp_toggle_signalling(struct intel_digital_port *dig_port, - bool enable) -{ - /* Not used for single stream DisplayPort setups */ - return 0; -} - -static -bool intel_dp_hdcp_check_link(struct intel_digital_port *dig_port) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - u8 bstatus; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, - &bstatus, 1); - if (ret != 1) { - drm_dbg_kms(&i915->drm, - "Read bstatus from DP/AUX failed (%zd)\n", ret); - return false; - } - - return !(bstatus & (DP_BSTATUS_LINK_FAILURE | DP_BSTATUS_REAUTH_REQ)); -} - -static -int intel_dp_hdcp_capable(struct intel_digital_port *dig_port, - bool *hdcp_capable) -{ - ssize_t ret; - u8 bcaps; - - ret = intel_dp_hdcp_read_bcaps(dig_port, &bcaps); - if (ret) - return ret; - - *hdcp_capable = bcaps & DP_BCAPS_HDCP_CAPABLE; - return 0; -} - -struct hdcp2_dp_errata_stream_type { - u8 msg_id; - u8 stream_type; -} __packed; - -struct hdcp2_dp_msg_data { - u8 msg_id; - u32 offset; - bool msg_detectable; - u32 timeout; - u32 timeout2; /* Added for non_paired situation */ -}; - -static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = { - { HDCP_2_2_AKE_INIT, DP_HDCP_2_2_AKE_INIT_OFFSET, false, 0, 0 }, - { HDCP_2_2_AKE_SEND_CERT, DP_HDCP_2_2_AKE_SEND_CERT_OFFSET, - false, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, - { HDCP_2_2_AKE_NO_STORED_KM, DP_HDCP_2_2_AKE_NO_STORED_KM_OFFSET, - false, 0, 0 }, - { HDCP_2_2_AKE_STORED_KM, DP_HDCP_2_2_AKE_STORED_KM_OFFSET, - false, 0, 0 }, - { HDCP_2_2_AKE_SEND_HPRIME, DP_HDCP_2_2_AKE_SEND_HPRIME_OFFSET, - true, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, - HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS }, - { HDCP_2_2_AKE_SEND_PAIRING_INFO, - DP_HDCP_2_2_AKE_SEND_PAIRING_INFO_OFFSET, true, - HDCP_2_2_PAIRING_TIMEOUT_MS, 0 }, - { HDCP_2_2_LC_INIT, DP_HDCP_2_2_LC_INIT_OFFSET, false, 0, 0 }, - { HDCP_2_2_LC_SEND_LPRIME, DP_HDCP_2_2_LC_SEND_LPRIME_OFFSET, - false, HDCP_2_2_DP_LPRIME_TIMEOUT_MS, 0 }, - { HDCP_2_2_SKE_SEND_EKS, DP_HDCP_2_2_SKE_SEND_EKS_OFFSET, false, - 0, 0 }, - { HDCP_2_2_REP_SEND_RECVID_LIST, - DP_HDCP_2_2_REP_SEND_RECVID_LIST_OFFSET, true, - HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 }, - { HDCP_2_2_REP_SEND_ACK, DP_HDCP_2_2_REP_SEND_ACK_OFFSET, false, - 0, 0 }, - { HDCP_2_2_REP_STREAM_MANAGE, - DP_HDCP_2_2_REP_STREAM_MANAGE_OFFSET, false, - 0, 0 }, - { HDCP_2_2_REP_STREAM_READY, DP_HDCP_2_2_REP_STREAM_READY_OFFSET, - false, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 }, -/* local define to shovel this through the write_2_2 interface */ -#define HDCP_2_2_ERRATA_DP_STREAM_TYPE 50 - { HDCP_2_2_ERRATA_DP_STREAM_TYPE, - DP_HDCP_2_2_REG_STREAM_TYPE_OFFSET, false, - 0, 0 }, -}; - -static int -intel_dp_hdcp2_read_rx_status(struct intel_digital_port *dig_port, - u8 *rx_status) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_HDCP_2_2_REG_RXSTATUS_OFFSET, rx_status, - HDCP_2_2_DP_RXSTATUS_LEN); - if (ret != HDCP_2_2_DP_RXSTATUS_LEN) { - drm_dbg_kms(&i915->drm, - "Read bstatus from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - - return 0; -} - -static -int hdcp2_detect_msg_availability(struct intel_digital_port *dig_port, - u8 msg_id, bool *msg_ready) -{ - u8 rx_status; - int ret; - - *msg_ready = false; - ret = intel_dp_hdcp2_read_rx_status(dig_port, &rx_status); - if (ret < 0) - return ret; - - switch (msg_id) { - case HDCP_2_2_AKE_SEND_HPRIME: - if (HDCP_2_2_DP_RXSTATUS_H_PRIME(rx_status)) - *msg_ready = true; - break; - case HDCP_2_2_AKE_SEND_PAIRING_INFO: - if (HDCP_2_2_DP_RXSTATUS_PAIRING(rx_status)) - *msg_ready = true; - break; - case HDCP_2_2_REP_SEND_RECVID_LIST: - if (HDCP_2_2_DP_RXSTATUS_READY(rx_status)) - *msg_ready = true; - break; - default: - DRM_ERROR("Unidentified msg_id: %d\n", msg_id); - return -EINVAL; - } - - return 0; -} - -static ssize_t -intel_dp_hdcp2_wait_for_msg(struct intel_digital_port *dig_port, - const struct hdcp2_dp_msg_data *hdcp2_msg_data) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - struct intel_dp *dp = &dig_port->dp; - struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; - u8 msg_id = hdcp2_msg_data->msg_id; - int ret, timeout; - bool msg_ready = false; - - if (msg_id == HDCP_2_2_AKE_SEND_HPRIME && !hdcp->is_paired) - timeout = hdcp2_msg_data->timeout2; - else - timeout = hdcp2_msg_data->timeout; - - /* - * There is no way to detect the CERT, LPRIME and STREAM_READY - * availability. So Wait for timeout and read the msg. - */ - if (!hdcp2_msg_data->msg_detectable) { - mdelay(timeout); - ret = 0; - } else { - /* - * As we want to check the msg availability at timeout, Ignoring - * the timeout at wait for CP_IRQ. - */ - intel_dp_hdcp_wait_for_cp_irq(hdcp, timeout); - ret = hdcp2_detect_msg_availability(dig_port, - msg_id, &msg_ready); - if (!msg_ready) - ret = -ETIMEDOUT; - } - - if (ret) - drm_dbg_kms(&i915->drm, - "msg_id %d, ret %d, timeout(mSec): %d\n", - hdcp2_msg_data->msg_id, ret, timeout); - - return ret; -} - -static const struct hdcp2_dp_msg_data *get_hdcp2_dp_msg_data(u8 msg_id) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(hdcp2_dp_msg_data); i++) - if (hdcp2_dp_msg_data[i].msg_id == msg_id) - return &hdcp2_dp_msg_data[i]; - - return NULL; -} - -static -int intel_dp_hdcp2_write_msg(struct intel_digital_port *dig_port, - void *buf, size_t size) -{ - struct intel_dp *dp = &dig_port->dp; - struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; - unsigned int offset; - u8 *byte = buf; - ssize_t ret, bytes_to_write, len; - const struct hdcp2_dp_msg_data *hdcp2_msg_data; - - hdcp2_msg_data = get_hdcp2_dp_msg_data(*byte); - if (!hdcp2_msg_data) - return -EINVAL; - - offset = hdcp2_msg_data->offset; - - /* No msg_id in DP HDCP2.2 msgs */ - bytes_to_write = size - 1; - byte++; - - hdcp->cp_irq_count_cached = atomic_read(&hdcp->cp_irq_count); - - while (bytes_to_write) { - len = bytes_to_write > DP_AUX_MAX_PAYLOAD_BYTES ? - DP_AUX_MAX_PAYLOAD_BYTES : bytes_to_write; - - ret = drm_dp_dpcd_write(&dig_port->dp.aux, - offset, (void *)byte, len); - if (ret < 0) - return ret; - - bytes_to_write -= ret; - byte += ret; - offset += ret; - } - - return size; -} - -static -ssize_t get_receiver_id_list_size(struct intel_digital_port *dig_port) -{ - u8 rx_info[HDCP_2_2_RXINFO_LEN]; - u32 dev_cnt; - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_HDCP_2_2_REG_RXINFO_OFFSET, - (void *)rx_info, HDCP_2_2_RXINFO_LEN); - if (ret != HDCP_2_2_RXINFO_LEN) - return ret >= 0 ? -EIO : ret; - - dev_cnt = (HDCP_2_2_DEV_COUNT_HI(rx_info[0]) << 4 | - HDCP_2_2_DEV_COUNT_LO(rx_info[1])); - - if (dev_cnt > HDCP_2_2_MAX_DEVICE_COUNT) - dev_cnt = HDCP_2_2_MAX_DEVICE_COUNT; - - ret = sizeof(struct hdcp2_rep_send_receiverid_list) - - HDCP_2_2_RECEIVER_IDS_MAX_LEN + - (dev_cnt * HDCP_2_2_RECEIVER_ID_LEN); - - return ret; -} - -static -int intel_dp_hdcp2_read_msg(struct intel_digital_port *dig_port, - u8 msg_id, void *buf, size_t size) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - unsigned int offset; - u8 *byte = buf; - ssize_t ret, bytes_to_recv, len; - const struct hdcp2_dp_msg_data *hdcp2_msg_data; - - hdcp2_msg_data = get_hdcp2_dp_msg_data(msg_id); - if (!hdcp2_msg_data) - return -EINVAL; - offset = hdcp2_msg_data->offset; - - ret = intel_dp_hdcp2_wait_for_msg(dig_port, hdcp2_msg_data); - if (ret < 0) - return ret; - - if (msg_id == HDCP_2_2_REP_SEND_RECVID_LIST) { - ret = get_receiver_id_list_size(dig_port); - if (ret < 0) - return ret; - - size = ret; - } - bytes_to_recv = size - 1; - - /* DP adaptation msgs has no msg_id */ - byte++; - - while (bytes_to_recv) { - len = bytes_to_recv > DP_AUX_MAX_PAYLOAD_BYTES ? - DP_AUX_MAX_PAYLOAD_BYTES : bytes_to_recv; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, offset, - (void *)byte, len); - if (ret < 0) { - drm_dbg_kms(&i915->drm, "msg_id %d, ret %zd\n", - msg_id, ret); - return ret; - } - - bytes_to_recv -= ret; - byte += ret; - offset += ret; - } - byte = buf; - *byte = msg_id; - - return size; -} - -static -int intel_dp_hdcp2_config_stream_type(struct intel_digital_port *dig_port, - bool is_repeater, u8 content_type) -{ - int ret; - struct hdcp2_dp_errata_stream_type stream_type_msg; - - if (is_repeater) - return 0; - - /* - * Errata for DP: As Stream type is used for encryption, Receiver - * should be communicated with stream type for the decryption of the - * content. - * Repeater will be communicated with stream type as a part of it's - * auth later in time. - */ - stream_type_msg.msg_id = HDCP_2_2_ERRATA_DP_STREAM_TYPE; - stream_type_msg.stream_type = content_type; - - ret = intel_dp_hdcp2_write_msg(dig_port, &stream_type_msg, - sizeof(stream_type_msg)); - - return ret < 0 ? ret : 0; - -} - -static -int intel_dp_hdcp2_check_link(struct intel_digital_port *dig_port) -{ - u8 rx_status; - int ret; - - ret = intel_dp_hdcp2_read_rx_status(dig_port, &rx_status); - if (ret) - return ret; - - if (HDCP_2_2_DP_RXSTATUS_REAUTH_REQ(rx_status)) - ret = HDCP_REAUTH_REQUEST; - else if (HDCP_2_2_DP_RXSTATUS_LINK_FAILED(rx_status)) - ret = HDCP_LINK_INTEGRITY_FAILURE; - else if (HDCP_2_2_DP_RXSTATUS_READY(rx_status)) - ret = HDCP_TOPOLOGY_CHANGE; - - return ret; -} - -static -int intel_dp_hdcp2_capable(struct intel_digital_port *dig_port, - bool *capable) -{ - u8 rx_caps[3]; - int ret; - - *capable = false; - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_HDCP_2_2_REG_RX_CAPS_OFFSET, - rx_caps, HDCP_2_2_RXCAPS_LEN); - if (ret != HDCP_2_2_RXCAPS_LEN) - return ret >= 0 ? -EIO : ret; - - if (rx_caps[0] == HDCP_2_2_RX_CAPS_VERSION_VAL && - HDCP_2_2_DP_HDCP_CAPABLE(rx_caps[2])) - *capable = true; - - return 0; -} - -static const struct intel_hdcp_shim intel_dp_hdcp_shim = { - .write_an_aksv = intel_dp_hdcp_write_an_aksv, - .read_bksv = intel_dp_hdcp_read_bksv, - .read_bstatus = intel_dp_hdcp_read_bstatus, - .repeater_present = intel_dp_hdcp_repeater_present, - .read_ri_prime = intel_dp_hdcp_read_ri_prime, - .read_ksv_ready = intel_dp_hdcp_read_ksv_ready, - .read_ksv_fifo = intel_dp_hdcp_read_ksv_fifo, - .read_v_prime_part = intel_dp_hdcp_read_v_prime_part, - .toggle_signalling = intel_dp_hdcp_toggle_signalling, - .check_link = intel_dp_hdcp_check_link, - .hdcp_capable = intel_dp_hdcp_capable, - .write_2_2_msg = intel_dp_hdcp2_write_msg, - .read_2_2_msg = intel_dp_hdcp2_read_msg, - .config_stream_type = intel_dp_hdcp2_config_stream_type, - .check_2_2_link = intel_dp_hdcp2_check_link, - .hdcp_2_2_capable = intel_dp_hdcp2_capable, - .protocol = HDCP_PROTOCOL_DP, -}; - static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -7312,6 +6867,9 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect struct drm_i915_private *dev_priv = to_i915(connector->dev); enum port port = dp_to_dig_port(intel_dp)->base.port; + if (!intel_dp_is_edp(intel_dp)) + drm_connector_attach_dp_subconnector_property(connector); + if (!IS_G4X(dev_priv) && port != PORT_A) intel_attach_force_audio_property(connector); @@ -7710,6 +7268,15 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, refresh_rate); } +static void +intel_edp_drrs_enable_locked(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + dev_priv->drrs.busy_frontbuffer_bits = 0; + dev_priv->drrs.dp = intel_dp; +} + /** * intel_edp_drrs_enable - init drrs struct if supported * @intel_dp: DP struct @@ -7722,31 +7289,40 @@ void intel_edp_drrs_enable(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (!crtc_state->has_drrs) { - drm_dbg_kms(&dev_priv->drm, "Panel doesn't support DRRS\n"); + if (!crtc_state->has_drrs) return; - } - if (dev_priv->psr.enabled) { - drm_dbg_kms(&dev_priv->drm, - "PSR enabled. Not enabling DRRS.\n"); - return; - } + drm_dbg_kms(&dev_priv->drm, "Enabling DRRS\n"); mutex_lock(&dev_priv->drrs.mutex); + if (dev_priv->drrs.dp) { - drm_dbg_kms(&dev_priv->drm, "DRRS already enabled\n"); + drm_warn(&dev_priv->drm, "DRRS already enabled\n"); goto unlock; } - dev_priv->drrs.busy_frontbuffer_bits = 0; - - dev_priv->drrs.dp = intel_dp; + intel_edp_drrs_enable_locked(intel_dp); unlock: mutex_unlock(&dev_priv->drrs.mutex); } +static void +intel_edp_drrs_disable_locked(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + if (dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) { + int refresh; + + refresh = drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode); + intel_dp_set_drrs_state(dev_priv, crtc_state, refresh); + } + + dev_priv->drrs.dp = NULL; +} + /** * intel_edp_drrs_disable - Disable DRRS * @intel_dp: DP struct @@ -7767,16 +7343,45 @@ void intel_edp_drrs_disable(struct intel_dp *intel_dp, return; } - if (dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) - intel_dp_set_drrs_state(dev_priv, old_crtc_state, - drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode)); - - dev_priv->drrs.dp = NULL; + intel_edp_drrs_disable_locked(intel_dp, old_crtc_state); mutex_unlock(&dev_priv->drrs.mutex); cancel_delayed_work_sync(&dev_priv->drrs.work); } +/** + * intel_edp_drrs_update - Update DRRS state + * @intel_dp: Intel DP + * @crtc_state: new CRTC state + * + * This function will update DRRS states, disabling or enabling DRRS when + * executing fastsets. For full modeset, intel_edp_drrs_disable() and + * intel_edp_drrs_enable() should be called instead. + */ +void +intel_edp_drrs_update(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + if (dev_priv->drrs.type != SEAMLESS_DRRS_SUPPORT) + return; + + mutex_lock(&dev_priv->drrs.mutex); + + /* New state matches current one? */ + if (crtc_state->has_drrs == !!dev_priv->drrs.dp) + goto unlock; + + if (crtc_state->has_drrs) + intel_edp_drrs_enable_locked(intel_dp); + else + intel_edp_drrs_disable_locked(intel_dp, crtc_state); + +unlock: + mutex_unlock(&dev_priv->drrs.mutex); +} + static void intel_edp_drrs_downclock_work(struct work_struct *work) { struct drm_i915_private *dev_priv = @@ -8208,10 +7813,6 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; - if (INTEL_GEN(dev_priv) >= 11) - connector->ycbcr_420_allowed = true; - - intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); intel_connector->polled = DRM_CONNECTOR_POLL_HPD; intel_dp_aux_init(intel_dp); @@ -8236,7 +7837,7 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, intel_dp_add_properties(intel_dp, connector); if (is_hdcp_supported(dev_priv, port) && !intel_dp_is_edp(intel_dp)) { - int ret = intel_hdcp_init(intel_connector, &intel_dp_hdcp_shim); + int ret = intel_dp_init_hdcp(dig_port, intel_connector); if (ret) drm_dbg_kms(&dev_priv->drm, "HDCP init failed, skipping.\n"); @@ -8280,6 +7881,8 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder = &dig_port->base; encoder = &intel_encoder->base; + mutex_init(&dig_port->hdcp_mutex); + if (drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_dp_enc_funcs, DRM_MODE_ENCODER_TMDS, "DP %c", port_name(port))) @@ -8354,6 +7957,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, } intel_encoder->cloneable = 0; intel_encoder->port = port; + intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); dig_port->hpd_pulse = intel_dp_hpd_pulse; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index b901ab850cbd..08a1c0aa8b94 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -17,6 +17,7 @@ struct drm_encoder; struct drm_i915_private; struct drm_modeset_acquire_ctx; struct drm_dp_vsc_sdp; +struct intel_atomic_state; struct intel_connector; struct intel_crtc_state; struct intel_digital_port; @@ -50,6 +51,7 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); +void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp); void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, bool enable); @@ -81,6 +83,8 @@ void intel_edp_drrs_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_edp_drrs_disable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); +void intel_edp_drrs_update(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state); void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, unsigned int frontbuffer_bits); void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, @@ -99,7 +103,6 @@ bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool intel_dp_get_link_status(struct intel_dp *intel_dp, u8 *link_status); -bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); @@ -128,4 +131,12 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) u32 intel_dp_mode_to_fec_clock(u32 mode_clock); +void intel_ddi_update_pipe(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); + +int intel_dp_init_hdcp(struct intel_digital_port *dig_port, + struct intel_connector *intel_connector); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c new file mode 100644 index 000000000000..03424d20e9f7 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -0,0 +1,703 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020 Google, Inc. + * + * Authors: + * Sean Paul <seanpaul@chromium.org> + */ + +#include <drm/drm_dp_helper.h> +#include <drm/drm_dp_mst_helper.h> +#include <drm/drm_hdcp.h> +#include <drm/drm_print.h> + +#include "intel_display_types.h" +#include "intel_ddi.h" +#include "intel_dp.h" +#include "intel_hdcp.h" + +static void intel_dp_hdcp_wait_for_cp_irq(struct intel_hdcp *hdcp, int timeout) +{ + long ret; + +#define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) + ret = wait_event_interruptible_timeout(hdcp->cp_irq_queue, C, + msecs_to_jiffies(timeout)); + + if (!ret) + DRM_DEBUG_KMS("Timedout at waiting for CP_IRQ\n"); +} + +static +int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *dig_port, + u8 *an) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + u8 aksv[DRM_HDCP_KSV_LEN] = {}; + ssize_t dpcd_ret; + + /* Output An first, that's easy */ + dpcd_ret = drm_dp_dpcd_write(&dig_port->dp.aux, DP_AUX_HDCP_AN, + an, DRM_HDCP_AN_LEN); + if (dpcd_ret != DRM_HDCP_AN_LEN) { + drm_dbg_kms(&i915->drm, + "Failed to write An over DP/AUX (%zd)\n", + dpcd_ret); + return dpcd_ret >= 0 ? -EIO : dpcd_ret; + } + + /* + * Since Aksv is Oh-So-Secret, we can't access it in software. So we + * send an empty buffer of the correct length through the DP helpers. On + * the other side, in the transfer hook, we'll generate a flag based on + * the destination address which will tickle the hardware to output the + * Aksv on our behalf after the header is sent. + */ + dpcd_ret = drm_dp_dpcd_write(&dig_port->dp.aux, DP_AUX_HDCP_AKSV, + aksv, DRM_HDCP_KSV_LEN); + if (dpcd_ret != DRM_HDCP_KSV_LEN) { + drm_dbg_kms(&i915->drm, + "Failed to write Aksv over DP/AUX (%zd)\n", + dpcd_ret); + return dpcd_ret >= 0 ? -EIO : dpcd_ret; + } + return 0; +} + +static int intel_dp_hdcp_read_bksv(struct intel_digital_port *dig_port, + u8 *bksv) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BKSV, bksv, + DRM_HDCP_KSV_LEN); + if (ret != DRM_HDCP_KSV_LEN) { + drm_dbg_kms(&i915->drm, + "Read Bksv from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static int intel_dp_hdcp_read_bstatus(struct intel_digital_port *dig_port, + u8 *bstatus) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + /* + * For some reason the HDMI and DP HDCP specs call this register + * definition by different names. In the HDMI spec, it's called BSTATUS, + * but in DP it's called BINFO. + */ + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BINFO, + bstatus, DRM_HDCP_BSTATUS_LEN); + if (ret != DRM_HDCP_BSTATUS_LEN) { + drm_dbg_kms(&i915->drm, + "Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_read_bcaps(struct intel_digital_port *dig_port, + u8 *bcaps) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BCAPS, + bcaps, 1); + if (ret != 1) { + drm_dbg_kms(&i915->drm, + "Read bcaps from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static +int intel_dp_hdcp_repeater_present(struct intel_digital_port *dig_port, + bool *repeater_present) +{ + ssize_t ret; + u8 bcaps; + + ret = intel_dp_hdcp_read_bcaps(dig_port, &bcaps); + if (ret) + return ret; + + *repeater_present = bcaps & DP_BCAPS_REPEATER_PRESENT; + return 0; +} + +static +int intel_dp_hdcp_read_ri_prime(struct intel_digital_port *dig_port, + u8 *ri_prime) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_RI_PRIME, + ri_prime, DRM_HDCP_RI_LEN); + if (ret != DRM_HDCP_RI_LEN) { + drm_dbg_kms(&i915->drm, "Read Ri' from DP/AUX failed (%zd)\n", + ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_read_ksv_ready(struct intel_digital_port *dig_port, + bool *ksv_ready) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + u8 bstatus; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, + &bstatus, 1); + if (ret != 1) { + drm_dbg_kms(&i915->drm, + "Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + *ksv_ready = bstatus & DP_BSTATUS_READY; + return 0; +} + +static +int intel_dp_hdcp_read_ksv_fifo(struct intel_digital_port *dig_port, + int num_downstream, u8 *ksv_fifo) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + int i; + + /* KSV list is read via 15 byte window (3 entries @ 5 bytes each) */ + for (i = 0; i < num_downstream; i += 3) { + size_t len = min(num_downstream - i, 3) * DRM_HDCP_KSV_LEN; + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_AUX_HDCP_KSV_FIFO, + ksv_fifo + i * DRM_HDCP_KSV_LEN, + len); + if (ret != len) { + drm_dbg_kms(&i915->drm, + "Read ksv[%d] from DP/AUX failed (%zd)\n", + i, ret); + return ret >= 0 ? -EIO : ret; + } + } + return 0; +} + +static +int intel_dp_hdcp_read_v_prime_part(struct intel_digital_port *dig_port, + int i, u32 *part) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + if (i >= DRM_HDCP_V_PRIME_NUM_PARTS) + return -EINVAL; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_AUX_HDCP_V_PRIME(i), part, + DRM_HDCP_V_PRIME_PART_LEN); + if (ret != DRM_HDCP_V_PRIME_PART_LEN) { + drm_dbg_kms(&i915->drm, + "Read v'[%d] from DP/AUX failed (%zd)\n", i, ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_toggle_signalling(struct intel_digital_port *dig_port, + enum transcoder cpu_transcoder, + bool enable) +{ + /* Not used for single stream DisplayPort setups */ + return 0; +} + +static +bool intel_dp_hdcp_check_link(struct intel_digital_port *dig_port, + struct intel_connector *connector) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + u8 bstatus; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, + &bstatus, 1); + if (ret != 1) { + drm_dbg_kms(&i915->drm, + "Read bstatus from DP/AUX failed (%zd)\n", ret); + return false; + } + + return !(bstatus & (DP_BSTATUS_LINK_FAILURE | DP_BSTATUS_REAUTH_REQ)); +} + +static +int intel_dp_hdcp_capable(struct intel_digital_port *dig_port, + bool *hdcp_capable) +{ + ssize_t ret; + u8 bcaps; + + ret = intel_dp_hdcp_read_bcaps(dig_port, &bcaps); + if (ret) + return ret; + + *hdcp_capable = bcaps & DP_BCAPS_HDCP_CAPABLE; + return 0; +} + +struct hdcp2_dp_errata_stream_type { + u8 msg_id; + u8 stream_type; +} __packed; + +struct hdcp2_dp_msg_data { + u8 msg_id; + u32 offset; + bool msg_detectable; + u32 timeout; + u32 timeout2; /* Added for non_paired situation */ +}; + +static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = { + { HDCP_2_2_AKE_INIT, DP_HDCP_2_2_AKE_INIT_OFFSET, false, 0, 0 }, + { HDCP_2_2_AKE_SEND_CERT, DP_HDCP_2_2_AKE_SEND_CERT_OFFSET, + false, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, + { HDCP_2_2_AKE_NO_STORED_KM, DP_HDCP_2_2_AKE_NO_STORED_KM_OFFSET, + false, 0, 0 }, + { HDCP_2_2_AKE_STORED_KM, DP_HDCP_2_2_AKE_STORED_KM_OFFSET, + false, 0, 0 }, + { HDCP_2_2_AKE_SEND_HPRIME, DP_HDCP_2_2_AKE_SEND_HPRIME_OFFSET, + true, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, + HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS }, + { HDCP_2_2_AKE_SEND_PAIRING_INFO, + DP_HDCP_2_2_AKE_SEND_PAIRING_INFO_OFFSET, true, + HDCP_2_2_PAIRING_TIMEOUT_MS, 0 }, + { HDCP_2_2_LC_INIT, DP_HDCP_2_2_LC_INIT_OFFSET, false, 0, 0 }, + { HDCP_2_2_LC_SEND_LPRIME, DP_HDCP_2_2_LC_SEND_LPRIME_OFFSET, + false, HDCP_2_2_DP_LPRIME_TIMEOUT_MS, 0 }, + { HDCP_2_2_SKE_SEND_EKS, DP_HDCP_2_2_SKE_SEND_EKS_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_SEND_RECVID_LIST, + DP_HDCP_2_2_REP_SEND_RECVID_LIST_OFFSET, true, + HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 }, + { HDCP_2_2_REP_SEND_ACK, DP_HDCP_2_2_REP_SEND_ACK_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_STREAM_MANAGE, + DP_HDCP_2_2_REP_STREAM_MANAGE_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_STREAM_READY, DP_HDCP_2_2_REP_STREAM_READY_OFFSET, + false, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 }, +/* local define to shovel this through the write_2_2 interface */ +#define HDCP_2_2_ERRATA_DP_STREAM_TYPE 50 + { HDCP_2_2_ERRATA_DP_STREAM_TYPE, + DP_HDCP_2_2_REG_STREAM_TYPE_OFFSET, false, + 0, 0 }, +}; + +static int +intel_dp_hdcp2_read_rx_status(struct intel_digital_port *dig_port, + u8 *rx_status) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_HDCP_2_2_REG_RXSTATUS_OFFSET, rx_status, + HDCP_2_2_DP_RXSTATUS_LEN); + if (ret != HDCP_2_2_DP_RXSTATUS_LEN) { + drm_dbg_kms(&i915->drm, + "Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static +int hdcp2_detect_msg_availability(struct intel_digital_port *dig_port, + u8 msg_id, bool *msg_ready) +{ + u8 rx_status; + int ret; + + *msg_ready = false; + ret = intel_dp_hdcp2_read_rx_status(dig_port, &rx_status); + if (ret < 0) + return ret; + + switch (msg_id) { + case HDCP_2_2_AKE_SEND_HPRIME: + if (HDCP_2_2_DP_RXSTATUS_H_PRIME(rx_status)) + *msg_ready = true; + break; + case HDCP_2_2_AKE_SEND_PAIRING_INFO: + if (HDCP_2_2_DP_RXSTATUS_PAIRING(rx_status)) + *msg_ready = true; + break; + case HDCP_2_2_REP_SEND_RECVID_LIST: + if (HDCP_2_2_DP_RXSTATUS_READY(rx_status)) + *msg_ready = true; + break; + default: + DRM_ERROR("Unidentified msg_id: %d\n", msg_id); + return -EINVAL; + } + + return 0; +} + +static ssize_t +intel_dp_hdcp2_wait_for_msg(struct intel_digital_port *dig_port, + const struct hdcp2_dp_msg_data *hdcp2_msg_data) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; + u8 msg_id = hdcp2_msg_data->msg_id; + int ret, timeout; + bool msg_ready = false; + + if (msg_id == HDCP_2_2_AKE_SEND_HPRIME && !hdcp->is_paired) + timeout = hdcp2_msg_data->timeout2; + else + timeout = hdcp2_msg_data->timeout; + + /* + * There is no way to detect the CERT, LPRIME and STREAM_READY + * availability. So Wait for timeout and read the msg. + */ + if (!hdcp2_msg_data->msg_detectable) { + mdelay(timeout); + ret = 0; + } else { + /* + * As we want to check the msg availability at timeout, Ignoring + * the timeout at wait for CP_IRQ. + */ + intel_dp_hdcp_wait_for_cp_irq(hdcp, timeout); + ret = hdcp2_detect_msg_availability(dig_port, + msg_id, &msg_ready); + if (!msg_ready) + ret = -ETIMEDOUT; + } + + if (ret) + drm_dbg_kms(&i915->drm, + "msg_id %d, ret %d, timeout(mSec): %d\n", + hdcp2_msg_data->msg_id, ret, timeout); + + return ret; +} + +static const struct hdcp2_dp_msg_data *get_hdcp2_dp_msg_data(u8 msg_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(hdcp2_dp_msg_data); i++) + if (hdcp2_dp_msg_data[i].msg_id == msg_id) + return &hdcp2_dp_msg_data[i]; + + return NULL; +} + +static +int intel_dp_hdcp2_write_msg(struct intel_digital_port *dig_port, + void *buf, size_t size) +{ + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; + unsigned int offset; + u8 *byte = buf; + ssize_t ret, bytes_to_write, len; + const struct hdcp2_dp_msg_data *hdcp2_msg_data; + + hdcp2_msg_data = get_hdcp2_dp_msg_data(*byte); + if (!hdcp2_msg_data) + return -EINVAL; + + offset = hdcp2_msg_data->offset; + + /* No msg_id in DP HDCP2.2 msgs */ + bytes_to_write = size - 1; + byte++; + + hdcp->cp_irq_count_cached = atomic_read(&hdcp->cp_irq_count); + + while (bytes_to_write) { + len = bytes_to_write > DP_AUX_MAX_PAYLOAD_BYTES ? + DP_AUX_MAX_PAYLOAD_BYTES : bytes_to_write; + + ret = drm_dp_dpcd_write(&dig_port->dp.aux, + offset, (void *)byte, len); + if (ret < 0) + return ret; + + bytes_to_write -= ret; + byte += ret; + offset += ret; + } + + return size; +} + +static +ssize_t get_receiver_id_list_size(struct intel_digital_port *dig_port) +{ + u8 rx_info[HDCP_2_2_RXINFO_LEN]; + u32 dev_cnt; + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_HDCP_2_2_REG_RXINFO_OFFSET, + (void *)rx_info, HDCP_2_2_RXINFO_LEN); + if (ret != HDCP_2_2_RXINFO_LEN) + return ret >= 0 ? -EIO : ret; + + dev_cnt = (HDCP_2_2_DEV_COUNT_HI(rx_info[0]) << 4 | + HDCP_2_2_DEV_COUNT_LO(rx_info[1])); + + if (dev_cnt > HDCP_2_2_MAX_DEVICE_COUNT) + dev_cnt = HDCP_2_2_MAX_DEVICE_COUNT; + + ret = sizeof(struct hdcp2_rep_send_receiverid_list) - + HDCP_2_2_RECEIVER_IDS_MAX_LEN + + (dev_cnt * HDCP_2_2_RECEIVER_ID_LEN); + + return ret; +} + +static +int intel_dp_hdcp2_read_msg(struct intel_digital_port *dig_port, + u8 msg_id, void *buf, size_t size) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + unsigned int offset; + u8 *byte = buf; + ssize_t ret, bytes_to_recv, len; + const struct hdcp2_dp_msg_data *hdcp2_msg_data; + + hdcp2_msg_data = get_hdcp2_dp_msg_data(msg_id); + if (!hdcp2_msg_data) + return -EINVAL; + offset = hdcp2_msg_data->offset; + + ret = intel_dp_hdcp2_wait_for_msg(dig_port, hdcp2_msg_data); + if (ret < 0) + return ret; + + if (msg_id == HDCP_2_2_REP_SEND_RECVID_LIST) { + ret = get_receiver_id_list_size(dig_port); + if (ret < 0) + return ret; + + size = ret; + } + bytes_to_recv = size - 1; + + /* DP adaptation msgs has no msg_id */ + byte++; + + while (bytes_to_recv) { + len = bytes_to_recv > DP_AUX_MAX_PAYLOAD_BYTES ? + DP_AUX_MAX_PAYLOAD_BYTES : bytes_to_recv; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, offset, + (void *)byte, len); + if (ret < 0) { + drm_dbg_kms(&i915->drm, "msg_id %d, ret %zd\n", + msg_id, ret); + return ret; + } + + bytes_to_recv -= ret; + byte += ret; + offset += ret; + } + byte = buf; + *byte = msg_id; + + return size; +} + +static +int intel_dp_hdcp2_config_stream_type(struct intel_digital_port *dig_port, + bool is_repeater, u8 content_type) +{ + int ret; + struct hdcp2_dp_errata_stream_type stream_type_msg; + + if (is_repeater) + return 0; + + /* + * Errata for DP: As Stream type is used for encryption, Receiver + * should be communicated with stream type for the decryption of the + * content. + * Repeater will be communicated with stream type as a part of it's + * auth later in time. + */ + stream_type_msg.msg_id = HDCP_2_2_ERRATA_DP_STREAM_TYPE; + stream_type_msg.stream_type = content_type; + + ret = intel_dp_hdcp2_write_msg(dig_port, &stream_type_msg, + sizeof(stream_type_msg)); + + return ret < 0 ? ret : 0; + +} + +static +int intel_dp_hdcp2_check_link(struct intel_digital_port *dig_port) +{ + u8 rx_status; + int ret; + + ret = intel_dp_hdcp2_read_rx_status(dig_port, &rx_status); + if (ret) + return ret; + + if (HDCP_2_2_DP_RXSTATUS_REAUTH_REQ(rx_status)) + ret = HDCP_REAUTH_REQUEST; + else if (HDCP_2_2_DP_RXSTATUS_LINK_FAILED(rx_status)) + ret = HDCP_LINK_INTEGRITY_FAILURE; + else if (HDCP_2_2_DP_RXSTATUS_READY(rx_status)) + ret = HDCP_TOPOLOGY_CHANGE; + + return ret; +} + +static +int intel_dp_hdcp2_capable(struct intel_digital_port *dig_port, + bool *capable) +{ + u8 rx_caps[3]; + int ret; + + *capable = false; + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_HDCP_2_2_REG_RX_CAPS_OFFSET, + rx_caps, HDCP_2_2_RXCAPS_LEN); + if (ret != HDCP_2_2_RXCAPS_LEN) + return ret >= 0 ? -EIO : ret; + + if (rx_caps[0] == HDCP_2_2_RX_CAPS_VERSION_VAL && + HDCP_2_2_DP_HDCP_CAPABLE(rx_caps[2])) + *capable = true; + + return 0; +} + +static const struct intel_hdcp_shim intel_dp_hdcp_shim = { + .write_an_aksv = intel_dp_hdcp_write_an_aksv, + .read_bksv = intel_dp_hdcp_read_bksv, + .read_bstatus = intel_dp_hdcp_read_bstatus, + .repeater_present = intel_dp_hdcp_repeater_present, + .read_ri_prime = intel_dp_hdcp_read_ri_prime, + .read_ksv_ready = intel_dp_hdcp_read_ksv_ready, + .read_ksv_fifo = intel_dp_hdcp_read_ksv_fifo, + .read_v_prime_part = intel_dp_hdcp_read_v_prime_part, + .toggle_signalling = intel_dp_hdcp_toggle_signalling, + .check_link = intel_dp_hdcp_check_link, + .hdcp_capable = intel_dp_hdcp_capable, + .write_2_2_msg = intel_dp_hdcp2_write_msg, + .read_2_2_msg = intel_dp_hdcp2_read_msg, + .config_stream_type = intel_dp_hdcp2_config_stream_type, + .check_2_2_link = intel_dp_hdcp2_check_link, + .hdcp_2_2_capable = intel_dp_hdcp2_capable, + .protocol = HDCP_PROTOCOL_DP, +}; + +static int +intel_dp_mst_hdcp_toggle_signalling(struct intel_digital_port *dig_port, + enum transcoder cpu_transcoder, + bool enable) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + int ret; + + if (!enable) + usleep_range(6, 60); /* Bspec says >= 6us */ + + ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, + cpu_transcoder, enable); + if (ret) + drm_dbg_kms(&i915->drm, "%s HDCP signalling failed (%d)\n", + enable ? "Enable" : "Disable", ret); + return ret; +} + +static +bool intel_dp_mst_hdcp_check_link(struct intel_digital_port *dig_port, + struct intel_connector *connector) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_dp *intel_dp = &dig_port->dp; + struct drm_dp_query_stream_enc_status_ack_reply reply; + int ret; + + if (!intel_dp_hdcp_check_link(dig_port, connector)) + return false; + + ret = drm_dp_send_query_stream_enc_status(&intel_dp->mst_mgr, + connector->port, &reply); + if (ret) { + drm_dbg_kms(&i915->drm, + "[CONNECTOR:%d:%s] failed QSES ret=%d\n", + connector->base.base.id, connector->base.name, ret); + return false; + } + + return reply.auth_completed && reply.encryption_enabled; +} + +static const struct intel_hdcp_shim intel_dp_mst_hdcp_shim = { + .write_an_aksv = intel_dp_hdcp_write_an_aksv, + .read_bksv = intel_dp_hdcp_read_bksv, + .read_bstatus = intel_dp_hdcp_read_bstatus, + .repeater_present = intel_dp_hdcp_repeater_present, + .read_ri_prime = intel_dp_hdcp_read_ri_prime, + .read_ksv_ready = intel_dp_hdcp_read_ksv_ready, + .read_ksv_fifo = intel_dp_hdcp_read_ksv_fifo, + .read_v_prime_part = intel_dp_hdcp_read_v_prime_part, + .toggle_signalling = intel_dp_mst_hdcp_toggle_signalling, + .check_link = intel_dp_mst_hdcp_check_link, + .hdcp_capable = intel_dp_hdcp_capable, + + .protocol = HDCP_PROTOCOL_DP, +}; + +int intel_dp_init_hdcp(struct intel_digital_port *dig_port, + struct intel_connector *intel_connector) +{ + struct drm_device *dev = intel_connector->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_encoder *intel_encoder = &dig_port->base; + enum port port = intel_encoder->port; + struct intel_dp *intel_dp = &dig_port->dp; + + if (!is_hdcp_supported(dev_priv, port)) + return 0; + + if (intel_connector->mst_port) + return intel_hdcp_init(intel_connector, port, + &intel_dp_mst_hdcp_shim); + else if (!intel_dp_is_edp(intel_dp)) + return intel_hdcp_init(intel_connector, port, + &intel_dp_hdcp_shim); + + return 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index a23ed7290843..f2c8b56be9ea 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -410,10 +410,17 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) intel_connector->base.base.id, intel_connector->base.name, intel_dp->link_rate, intel_dp->lane_count); - if (!intel_dp_get_link_train_fallback_values(intel_dp, - intel_dp->link_rate, - intel_dp->lane_count)) - /* Schedule a Hotplug Uevent to userspace to start modeset */ - schedule_work(&intel_connector->modeset_retry_work); - return; + + if (intel_dp->hobl_active) { + drm_dbg_kms(&dp_to_i915(intel_dp)->drm, + "Link Training failed with HOBL active, not enabling it from now on"); + intel_dp->hobl_failed = true; + } else if (intel_dp_get_link_train_fallback_values(intel_dp, + intel_dp->link_rate, + intel_dp->lane_count)) { + return; + } + + /* Schedule a Hotplug Uevent to userspace to start modeset */ + schedule_work(&intel_connector->modeset_retry_work); } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a2d91a499700..64d885539e94 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -37,6 +37,7 @@ #include "intel_dp.h" #include "intel_dp_mst.h" #include "intel_dpio_phy.h" +#include "intel_hdcp.h" static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, @@ -352,6 +353,8 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state, drm_dbg_kms(&i915->drm, "active links %d\n", intel_dp->active_mst_links); + intel_hdcp_disable(intel_mst->connector); + drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, connector->port); ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); @@ -556,6 +559,13 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); + + /* Enable hdcp if it's desired */ + if (conn_state->content_protection == + DRM_MODE_CONTENT_PROTECTION_DESIRED) + intel_hdcp_enable(to_intel_connector(conn_state->connector), + pipe_config->cpu_transcoder, + (u8)conn_state->hdcp_content_type); } static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, @@ -709,9 +719,13 @@ static int intel_dp_mst_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + if (drm_connector_is_unregistered(connector)) return connector_status_disconnected; @@ -799,6 +813,14 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); + + /* TODO: Figure out how to make HDCP work on GEN12+ */ + if (INTEL_GEN(dev_priv) < 12) { + ret = intel_dp_init_hdcp(dig_port, intel_connector); + if (ret) + DRM_DEBUG_KMS("HDCP init failed, skipping.\n"); + } + /* * Reuse the prop from the SST connector because we're * not allowed to create new props after device registration. @@ -865,6 +887,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe intel_encoder->compute_config_late = intel_dp_mst_compute_config_late; intel_encoder->disable = intel_mst_disable_dp; intel_encoder->post_disable = intel_mst_post_disable_dp; + intel_encoder->update_pipe = intel_ddi_update_pipe; intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp; intel_encoder->pre_enable = intel_mst_pre_enable_dp; intel_encoder->enable = intel_mst_enable_dp; diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index afa7a378b31d..e08684e34078 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -147,6 +147,18 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv, pll->info->name, onoff(state), onoff(cur_state)); } +static i915_reg_t +intel_combo_pll_enable_reg(struct drm_i915_private *i915, + struct intel_shared_dpll *pll) +{ + + if (IS_ELKHARTLAKE(i915) && (pll->info->id == DPLL_ID_EHL_DPLL4)) + return MG_PLL_ENABLE(0); + + return CNL_DPLL_ENABLE(pll->info->id); + + +} /** * intel_prepare_shared_dpll - call a dpll's prepare hook * @crtc_state: CRTC, and its state, which has a shared dpll @@ -3475,6 +3487,14 @@ static void icl_update_active_dpll(struct intel_atomic_state *state, icl_set_active_port_dpll(crtc_state, port_dpll_id); } +static u32 intel_get_hti_plls(struct drm_i915_private *i915) +{ + if (!(i915->hti_state & HDPORT_ENABLED)) + return 0; + + return REG_FIELD_GET(HDPORT_DPLL_USED_MASK, i915->hti_state); +} + static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_encoder *encoder) @@ -3504,13 +3524,22 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, icl_calc_dpll_state(dev_priv, &pll_params, &port_dpll->hw_state); - if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) + if (IS_ROCKETLAKE(dev_priv)) { dpll_mask = BIT(DPLL_ID_EHL_DPLL4) | BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0); - else + } else if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) { + dpll_mask = + BIT(DPLL_ID_EHL_DPLL4) | + BIT(DPLL_ID_ICL_DPLL1) | + BIT(DPLL_ID_ICL_DPLL0); + } else { dpll_mask = BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0); + } + + /* Eliminate DPLLs from consideration if reserved by HTI */ + dpll_mask &= ~intel_get_hti_plls(dev_priv); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, @@ -3791,7 +3820,12 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, if (!(val & PLL_ENABLE)) goto out; - if (INTEL_GEN(dev_priv) >= 12) { + if (IS_ROCKETLAKE(dev_priv)) { + hw_state->cfgcr0 = intel_de_read(dev_priv, + RKL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = intel_de_read(dev_priv, + RKL_DPLL_CFGCR1(id)); + } else if (INTEL_GEN(dev_priv) >= 12) { hw_state->cfgcr0 = intel_de_read(dev_priv, TGL_DPLL_CFGCR0(id)); hw_state->cfgcr1 = intel_de_read(dev_priv, @@ -3820,12 +3854,7 @@ static bool combo_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); - - if (IS_ELKHARTLAKE(dev_priv) && - pll->info->id == DPLL_ID_EHL_DPLL4) { - enable_reg = MG_PLL_ENABLE(0); - } + i915_reg_t enable_reg = intel_combo_pll_enable_reg(dev_priv, pll); return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg); } @@ -3844,7 +3873,10 @@ static void icl_dpll_write(struct drm_i915_private *dev_priv, const enum intel_dpll_id id = pll->info->id; i915_reg_t cfgcr0_reg, cfgcr1_reg; - if (INTEL_GEN(dev_priv) >= 12) { + if (IS_ROCKETLAKE(dev_priv)) { + cfgcr0_reg = RKL_DPLL_CFGCR0(id); + cfgcr1_reg = RKL_DPLL_CFGCR1(id); + } else if (INTEL_GEN(dev_priv) >= 12) { cfgcr0_reg = TGL_DPLL_CFGCR0(id); cfgcr1_reg = TGL_DPLL_CFGCR1(id); } else { @@ -4020,11 +4052,10 @@ static void icl_pll_enable(struct drm_i915_private *dev_priv, static void combo_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + i915_reg_t enable_reg = intel_combo_pll_enable_reg(dev_priv, pll); if (IS_ELKHARTLAKE(dev_priv) && pll->info->id == DPLL_ID_EHL_DPLL4) { - enable_reg = MG_PLL_ENABLE(0); /* * We need to disable DC states when this DPLL is enabled. @@ -4132,19 +4163,14 @@ static void icl_pll_disable(struct drm_i915_private *dev_priv, static void combo_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + i915_reg_t enable_reg = intel_combo_pll_enable_reg(dev_priv, pll); - if (IS_ELKHARTLAKE(dev_priv) && - pll->info->id == DPLL_ID_EHL_DPLL4) { - enable_reg = MG_PLL_ENABLE(0); - icl_pll_disable(dev_priv, pll, enable_reg); + icl_pll_disable(dev_priv, pll, enable_reg); + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) intel_display_power_put(dev_priv, POWER_DOMAIN_DPLL_DC_OFF, pll->wakeref); - return; - } - - icl_pll_disable(dev_priv, pll, enable_reg); } static void tbt_pll_disable(struct drm_i915_private *dev_priv, @@ -4276,6 +4302,21 @@ static const struct intel_dpll_mgr tgl_pll_mgr = { .dump_hw_state = icl_dump_hw_state, }; +static const struct dpll_info rkl_plls[] = { + { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, + { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "DPLL 4", &combo_pll_funcs, DPLL_ID_EHL_DPLL4, 0 }, + { }, +}; + +static const struct intel_dpll_mgr rkl_pll_mgr = { + .dpll_info = rkl_plls, + .get_dplls = icl_get_dplls, + .put_dplls = icl_put_dplls, + .update_ref_clks = icl_update_dpll_ref_clks, + .dump_hw_state = icl_dump_hw_state, +}; + /** * intel_shared_dpll_init - Initialize shared DPLLs * @dev: drm device @@ -4289,7 +4330,9 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (INTEL_GEN(dev_priv) >= 12) + if (IS_ROCKETLAKE(dev_priv)) + dpll_mgr = &rkl_pll_mgr; + else if (INTEL_GEN(dev_priv) >= 12) dpll_mgr = &tgl_pll_mgr; else if (IS_ELKHARTLAKE(dev_priv)) dpll_mgr = &ehl_pll_mgr; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 307ed8ae9a19..237dbb1ba0ee 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -313,9 +313,15 @@ static void intel_dvo_pre_enable(struct intel_atomic_state *state, static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector, bool force) { + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector)); + DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); + + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev); } diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 24c3a0f212c6..135f5e8a4d70 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -424,6 +424,14 @@ static void intel_fbc_deactivate(struct drm_i915_private *dev_priv, fbc->no_fbc_reason = reason; } +static u64 intel_fbc_cfb_base_max(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 5 || IS_G4X(i915)) + return BIT_ULL(28); + else + return BIT_ULL(32); +} + static int find_compression_threshold(struct drm_i915_private *dev_priv, struct drm_mm_node *node, unsigned int size, @@ -442,6 +450,8 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, else end = U64_MAX; + end = min(end, intel_fbc_cfb_base_max(dev_priv)); + /* HACK: This code depends on what we will do in *_enable_fbc. If that * code changes, this code needs to change as well. * @@ -1416,6 +1426,13 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (!HAS_FBC(dev_priv)) return 0; + /* + * Fbc is causing random underruns in CI execution on TGL platforms. + * Disabling the same while the problem is being debugged and analyzed. + */ + if (IS_TIGERLAKE(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index bd39eb6a21b8..842c04e63214 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -451,8 +451,7 @@ int intel_fbdev_init(struct drm_device *dev) struct intel_fbdev *ifbdev; int ret; - if (drm_WARN_ON(dev, !HAS_DISPLAY(dev_priv) || - !INTEL_DISPLAY_ENABLED(dev_priv))) + if (drm_WARN_ON(dev, !HAS_DISPLAY(dev_priv))) return -ENODEV; ifbdev = kzalloc(sizeof(struct intel_fbdev), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 2979ed2588eb..d898b370d7a4 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -232,6 +232,8 @@ static void frontbuffer_release(struct kref *ref) RCU_INIT_POINTER(obj->frontbuffer, NULL); spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock); + i915_active_fini(&front->write); + i915_gem_object_put(obj); kfree_rcu(front, rcu); } diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index a8d119b6b45c..e6b8d6dfb598 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -834,7 +834,7 @@ int intel_gmbus_setup(struct drm_i915_private *dev_priv) unsigned int pin; int ret; - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) + if (!HAS_DISPLAY(dev_priv)) return 0; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 1a0d49af2a08..5492076d1ae0 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -148,9 +148,8 @@ static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *dig_port, static bool hdcp_key_loadable(struct drm_i915_private *dev_priv) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *power_well; enum i915_power_well_id id; + intel_wakeref_t wakeref; bool enabled = false; /* @@ -162,17 +161,9 @@ static bool hdcp_key_loadable(struct drm_i915_private *dev_priv) else id = SKL_DISP_PW_1; - mutex_lock(&power_domains->lock); - /* PG1 (power well #1) needs to be enabled */ - for_each_power_well(dev_priv, power_well) { - if (power_well->desc->id == id) { - enabled = power_well->desc->ops->is_enabled(dev_priv, - power_well); - break; - } - } - mutex_unlock(&power_domains->lock); + with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) + enabled = intel_display_power_well_is_enabled(dev_priv, id); /* * Another req for hdcp key loadability is enabled state of pll for @@ -713,7 +704,7 @@ static int intel_hdcp_auth(struct intel_connector *connector) intel_de_write(dev_priv, HDCP_REP_CTL, intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, port)); - ret = shim->toggle_signalling(dig_port, true); + ret = shim->toggle_signalling(dig_port, cpu_transcoder, true); if (ret) return ret; @@ -801,6 +792,19 @@ static int _intel_hdcp_disable(struct intel_connector *connector) drm_dbg_kms(&dev_priv->drm, "[%s:%d] HDCP is being disabled...\n", connector->base.name, connector->base.base.id); + /* + * If there are other connectors on this port using HDCP, don't disable + * it. Instead, toggle the HDCP signalling off on that particular + * connector/pipe and exit. + */ + if (dig_port->num_hdcp_streams > 0) { + ret = hdcp->shim->toggle_signalling(dig_port, + cpu_transcoder, false); + if (ret) + DRM_ERROR("Failed to disable HDCP signalling\n"); + return ret; + } + hdcp->hdcp_encrypted = false; intel_de_write(dev_priv, HDCP_CONF(dev_priv, cpu_transcoder, port), 0); if (intel_de_wait_for_clear(dev_priv, @@ -816,7 +820,7 @@ static int _intel_hdcp_disable(struct intel_connector *connector) intel_de_write(dev_priv, HDCP_REP_CTL, intel_de_read(dev_priv, HDCP_REP_CTL) & ~repeater_ctl); - ret = hdcp->shim->toggle_signalling(dig_port, false); + ret = hdcp->shim->toggle_signalling(dig_port, cpu_transcoder, false); if (ret) { drm_err(&dev_priv->drm, "Failed to disable HDCP signalling\n"); return ret; @@ -876,6 +880,34 @@ static struct intel_connector *intel_hdcp_to_connector(struct intel_hdcp *hdcp) return container_of(hdcp, struct intel_connector, hdcp); } +static void intel_hdcp_update_value(struct intel_connector *connector, + u64 value, bool update_property) +{ + struct drm_device *dev = connector->base.dev; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_hdcp *hdcp = &connector->hdcp; + + drm_WARN_ON(connector->base.dev, !mutex_is_locked(&hdcp->mutex)); + + if (hdcp->value == value) + return; + + drm_WARN_ON(dev, !mutex_is_locked(&dig_port->hdcp_mutex)); + + if (hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED) { + if (!drm_WARN_ON(dev, dig_port->num_hdcp_streams == 0)) + dig_port->num_hdcp_streams--; + } else if (value == DRM_MODE_CONTENT_PROTECTION_ENABLED) { + dig_port->num_hdcp_streams++; + } + + hdcp->value = value; + if (update_property) { + drm_connector_get(&connector->base); + schedule_work(&hdcp->prop_work); + } +} + /* Implements Part 3 of the HDCP authorization procedure */ static int intel_hdcp_check_link(struct intel_connector *connector) { @@ -887,6 +919,8 @@ static int intel_hdcp_check_link(struct intel_connector *connector) int ret = 0; mutex_lock(&hdcp->mutex); + mutex_lock(&dig_port->hdcp_mutex); + cpu_transcoder = hdcp->cpu_transcoder; /* Check_link valid only when HDCP1.4 is enabled */ @@ -903,15 +937,16 @@ static int intel_hdcp_check_link(struct intel_connector *connector) connector->base.name, connector->base.base.id, intel_de_read(dev_priv, HDCP_STATUS(dev_priv, cpu_transcoder, port))); ret = -ENXIO; - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } - if (hdcp->shim->check_link(dig_port)) { + if (hdcp->shim->check_link(dig_port, connector)) { if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { - hdcp->value = DRM_MODE_CONTENT_PROTECTION_ENABLED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, true); } goto out; } @@ -923,20 +958,23 @@ static int intel_hdcp_check_link(struct intel_connector *connector) ret = _intel_hdcp_disable(connector); if (ret) { drm_err(&dev_priv->drm, "Failed to disable hdcp (%d)\n", ret); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } ret = _intel_hdcp_enable(connector); if (ret) { drm_err(&dev_priv->drm, "Failed to enable hdcp (%d)\n", ret); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } out: + mutex_unlock(&dig_port->hdcp_mutex); mutex_unlock(&hdcp->mutex); return ret; } @@ -962,6 +1000,8 @@ static void intel_hdcp_prop_work(struct work_struct *work) mutex_unlock(&hdcp->mutex); drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex); + + drm_connector_put(&connector->base); } bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port) @@ -1600,7 +1640,8 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) intel_de_read(dev_priv, HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & LINK_ENCRYPTION_STATUS); if (hdcp->shim->toggle_signalling) { - ret = hdcp->shim->toggle_signalling(dig_port, true); + ret = hdcp->shim->toggle_signalling(dig_port, cpu_transcoder, + true); if (ret) { drm_err(&dev_priv->drm, "Failed to enable HDCP signalling. %d\n", @@ -1650,7 +1691,8 @@ static int hdcp2_disable_encryption(struct intel_connector *connector) drm_dbg_kms(&dev_priv->drm, "Disable Encryption Timedout"); if (hdcp->shim->toggle_signalling) { - ret = hdcp->shim->toggle_signalling(dig_port, false); + ret = hdcp->shim->toggle_signalling(dig_port, cpu_transcoder, + false); if (ret) { drm_err(&dev_priv->drm, "Failed to disable HDCP signalling. %d\n", @@ -1766,16 +1808,18 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) "HDCP2.2 link stopped the encryption, %x\n", intel_de_read(dev_priv, HDCP2_STATUS(dev_priv, cpu_transcoder, port))); ret = -ENXIO; - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } ret = hdcp->shim->check_2_2_link(dig_port); if (ret == HDCP_LINK_PROTECTED) { if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { - hdcp->value = DRM_MODE_CONTENT_PROTECTION_ENABLED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); } goto out; } @@ -1788,8 +1832,9 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) "HDCP2.2 Downstream topology change\n"); ret = hdcp2_authenticate_repeater_topology(connector); if (!ret) { - hdcp->value = DRM_MODE_CONTENT_PROTECTION_ENABLED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); goto out; } drm_dbg_kms(&dev_priv->drm, @@ -1807,8 +1852,8 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) drm_err(&dev_priv->drm, "[%s:%d] Failed to disable hdcp2.2 (%d)\n", connector->base.name, connector->base.base.id, ret); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, true); goto out; } @@ -1818,8 +1863,9 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) "[%s:%d] Failed to enable hdcp2.2 (%d)\n", connector->base.name, connector->base.base.id, ret); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } @@ -1835,6 +1881,9 @@ static void intel_hdcp_check_work(struct work_struct *work) check_work); struct intel_connector *connector = intel_hdcp_to_connector(hdcp); + if (drm_connector_is_unregistered(&connector->base)) + return; + if (!intel_hdcp2_check_link(connector)) schedule_delayed_work(&hdcp->check_work, DRM_HDCP2_CHECK_PERIOD_MS); @@ -1896,6 +1945,7 @@ static enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) } static int initialize_hdcp_port_data(struct intel_connector *connector, + enum port port, const struct intel_hdcp_shim *shim) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); @@ -1903,8 +1953,7 @@ static int initialize_hdcp_port_data(struct intel_connector *connector, struct hdcp_port_data *data = &hdcp->port_data; if (INTEL_GEN(dev_priv) < 12) - data->fw_ddi = - intel_get_mei_fw_ddi_index(intel_attached_encoder(connector)->port); + data->fw_ddi = intel_get_mei_fw_ddi_index(port); else /* * As per ME FW API expectation, for GEN 12+, fw_ddi is filled @@ -1974,14 +2023,14 @@ void intel_hdcp_component_init(struct drm_i915_private *dev_priv) } } -static void intel_hdcp2_init(struct intel_connector *connector, +static void intel_hdcp2_init(struct intel_connector *connector, enum port port, const struct intel_hdcp_shim *shim) { struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; int ret; - ret = initialize_hdcp_port_data(connector, shim); + ret = initialize_hdcp_port_data(connector, port, shim); if (ret) { drm_dbg_kms(&i915->drm, "Mei hdcp data init failed\n"); return; @@ -1991,6 +2040,7 @@ static void intel_hdcp2_init(struct intel_connector *connector, } int intel_hdcp_init(struct intel_connector *connector, + enum port port, const struct intel_hdcp_shim *shim) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); @@ -2000,8 +2050,8 @@ int intel_hdcp_init(struct intel_connector *connector, if (!shim) return -EINVAL; - if (is_hdcp2_supported(dev_priv)) - intel_hdcp2_init(connector, shim); + if (is_hdcp2_supported(dev_priv) && !connector->mst_port) + intel_hdcp2_init(connector, port, shim); ret = drm_connector_attach_content_protection_property(&connector->base, @@ -2025,6 +2075,7 @@ int intel_hdcp_enable(struct intel_connector *connector, enum transcoder cpu_transcoder, u8 content_type) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct intel_hdcp *hdcp = &connector->hdcp; unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS; int ret = -EINVAL; @@ -2033,14 +2084,14 @@ int intel_hdcp_enable(struct intel_connector *connector, return -ENOENT; mutex_lock(&hdcp->mutex); + mutex_lock(&dig_port->hdcp_mutex); drm_WARN_ON(&dev_priv->drm, hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED); hdcp->content_type = content_type; + hdcp->cpu_transcoder = cpu_transcoder; - if (INTEL_GEN(dev_priv) >= 12) { - hdcp->cpu_transcoder = cpu_transcoder; + if (INTEL_GEN(dev_priv) >= 12) hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder); - } /* * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup @@ -2063,16 +2114,19 @@ int intel_hdcp_enable(struct intel_connector *connector, if (!ret) { schedule_delayed_work(&hdcp->check_work, check_link_interval); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_ENABLED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); } + mutex_unlock(&dig_port->hdcp_mutex); mutex_unlock(&hdcp->mutex); return ret; } int intel_hdcp_disable(struct intel_connector *connector) { + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct intel_hdcp *hdcp = &connector->hdcp; int ret = 0; @@ -2080,15 +2134,20 @@ int intel_hdcp_disable(struct intel_connector *connector) return -ENOENT; mutex_lock(&hdcp->mutex); + mutex_lock(&dig_port->hdcp_mutex); - if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { - hdcp->value = DRM_MODE_CONTENT_PROTECTION_UNDESIRED; - if (hdcp->hdcp2_encrypted) - ret = _intel_hdcp2_disable(connector); - else if (hdcp->hdcp_encrypted) - ret = _intel_hdcp_disable(connector); - } + if (hdcp->value == DRM_MODE_CONTENT_PROTECTION_UNDESIRED) + goto out; + + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_UNDESIRED, false); + if (hdcp->hdcp2_encrypted) + ret = _intel_hdcp2_disable(connector); + else if (hdcp->hdcp_encrypted) + ret = _intel_hdcp_disable(connector); +out: + mutex_unlock(&dig_port->hdcp_mutex); mutex_unlock(&hdcp->mutex); cancel_delayed_work_sync(&hdcp->check_work); return ret; @@ -2102,11 +2161,15 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_hdcp *hdcp = &connector->hdcp; - bool content_protection_type_changed = + bool content_protection_type_changed, desired_and_not_enabled = false; + + if (!connector->hdcp.shim) + return; + + content_protection_type_changed = (conn_state->hdcp_content_type != hdcp->content_type && conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED); - bool desired_and_not_enabled = false; /* * During the HDCP encryption session if Type change is requested, @@ -2159,12 +2222,39 @@ void intel_hdcp_component_fini(struct drm_i915_private *dev_priv) void intel_hdcp_cleanup(struct intel_connector *connector) { - if (!connector->hdcp.shim) + struct intel_hdcp *hdcp = &connector->hdcp; + + if (!hdcp->shim) return; - mutex_lock(&connector->hdcp.mutex); - kfree(connector->hdcp.port_data.streams); - mutex_unlock(&connector->hdcp.mutex); + /* + * If the connector is registered, it's possible userspace could kick + * off another HDCP enable, which would re-spawn the workers. + */ + drm_WARN_ON(connector->base.dev, + connector->base.registration_state == DRM_CONNECTOR_REGISTERED); + + /* + * Now that the connector is not registered, check_work won't be run, + * but cancel any outstanding instances of it + */ + cancel_delayed_work_sync(&hdcp->check_work); + + /* + * We don't cancel prop_work in the same way as check_work since it + * requires connection_mutex which could be held while calling this + * function. Instead, we rely on the connector references grabbed before + * scheduling prop_work to ensure the connector is alive when prop_work + * is run. So if we're in the destroy path (which is where this + * function should be called), we're "guaranteed" that prop_work is not + * active (tl;dr This Should Never Happen). + */ + drm_WARN_ON(connector->base.dev, work_pending(&hdcp->prop_work)); + + mutex_lock(&hdcp->mutex); + kfree(hdcp->port_data.streams); + hdcp->shim = NULL; + mutex_unlock(&hdcp->mutex); } void intel_hdcp_atomic_check(struct drm_connector *connector, diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h index 86bbaec120cc..1bbf5b67ed0a 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp.h @@ -22,7 +22,7 @@ enum transcoder; void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); -int intel_hdcp_init(struct intel_connector *connector, +int intel_hdcp_init(struct intel_connector *connector, enum port port, const struct intel_hdcp_shim *hdcp_shim); int intel_hdcp_enable(struct intel_connector *connector, enum transcoder cpu_transcoder, u8 content_type); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index de2ce5632b94..3f2008d845c2 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1477,7 +1477,8 @@ int intel_hdmi_hdcp_read_v_prime_part(struct intel_digital_port *dig_port, return ret; } -static int kbl_repositioning_enc_en_signal(struct intel_connector *connector) +static int kbl_repositioning_enc_en_signal(struct intel_connector *connector, + enum transcoder cpu_transcoder) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_digital_port *dig_port = intel_attached_dig_port(connector); @@ -1494,13 +1495,15 @@ static int kbl_repositioning_enc_en_signal(struct intel_connector *connector) usleep_range(25, 50); } - ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, false); + ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, cpu_transcoder, + false); if (ret) { drm_err(&dev_priv->drm, "Disable HDCP signalling failed (%d)\n", ret); return ret; } - ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, true); + ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, cpu_transcoder, + true); if (ret) { drm_err(&dev_priv->drm, "Enable HDCP signalling failed (%d)\n", ret); @@ -1512,6 +1515,7 @@ static int kbl_repositioning_enc_en_signal(struct intel_connector *connector) static int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port, + enum transcoder cpu_transcoder, bool enable) { struct intel_hdmi *hdmi = &dig_port->hdmi; @@ -1522,7 +1526,8 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port, if (!enable) usleep_range(6, 60); /* Bspec says >= 6us */ - ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, enable); + ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, cpu_transcoder, + enable); if (ret) { drm_err(&dev_priv->drm, "%s HDCP signalling failed (%d)\n", enable ? "Enable" : "Disable", ret); @@ -1534,17 +1539,17 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port, * opportunity and enc_en signalling in KABYLAKE. */ if (IS_KABYLAKE(dev_priv) && enable) - return kbl_repositioning_enc_en_signal(connector); + return kbl_repositioning_enc_en_signal(connector, + cpu_transcoder); return 0; } static -bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port) +bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port, + struct intel_connector *connector) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - struct intel_connector *connector = - dig_port->hdmi.attached_connector; enum port port = dig_port->base.port; enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; int ret; @@ -1572,13 +1577,14 @@ bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port) } static -bool intel_hdmi_hdcp_check_link(struct intel_digital_port *dig_port) +bool intel_hdmi_hdcp_check_link(struct intel_digital_port *dig_port, + struct intel_connector *connector) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); int retry; for (retry = 0; retry < 3; retry++) - if (intel_hdmi_hdcp_check_link_once(dig_port)) + if (intel_hdmi_hdcp_check_link_once(dig_port, connector)) return true; drm_err(&i915->drm, "Link check failed\n"); @@ -2271,35 +2277,18 @@ intel_hdmi_mode_valid(struct drm_connector *connector, return intel_mode_valid_max_plane_size(dev_priv, mode); } -static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, - int bpc) +bool intel_hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, + int bpc, bool has_hdmi_sink, bool ycbcr420_output) { - struct drm_i915_private *dev_priv = - to_i915(crtc_state->uapi.crtc->dev); struct drm_atomic_state *state = crtc_state->uapi.state; struct drm_connector_state *connector_state; struct drm_connector *connector; - const struct drm_display_mode *adjusted_mode = - &crtc_state->hw.adjusted_mode; int i; - if (HAS_GMCH(dev_priv)) - return false; - - if (bpc == 10 && INTEL_GEN(dev_priv) < 11) - return false; - if (crtc_state->pipe_bpp < bpc * 3) return false; - if (!crtc_state->has_hdmi_sink) - return false; - - /* - * HDMI deep color affects the clocks, so it's only possible - * when not cloning with other encoder types. - */ - if (crtc_state->output_types != 1 << INTEL_OUTPUT_HDMI) + if (!has_hdmi_sink) return false; for_each_new_connector_in_state(state, connector, connector_state, i) { @@ -2308,7 +2297,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, if (connector_state->crtc != crtc_state->uapi.crtc) continue; - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) { + if (ycbcr420_output) { const struct drm_hdmi_info *hdmi = &info->hdmi; if (bpc == 12 && !(hdmi->y420_dc_modes & @@ -2327,6 +2316,30 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, } } + return true; +} + +static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, + int bpc) +{ + struct drm_i915_private *dev_priv = + to_i915(crtc_state->uapi.crtc->dev); + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + + if (HAS_GMCH(dev_priv)) + return false; + + if (bpc == 10 && INTEL_GEN(dev_priv) < 11) + return false; + + /* + * HDMI deep color affects the clocks, so it's only possible + * when not cloning with other encoder types. + */ + if (crtc_state->output_types != BIT(INTEL_OUTPUT_HDMI)) + return false; + /* Display Wa_1405510057:icl,ehl */ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 && bpc == 10 && IS_GEN(dev_priv, 11) && @@ -2334,7 +2347,10 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, adjusted_mode->crtc_hblank_start) % 8 == 2) return false; - return true; + return intel_hdmi_deep_color_possible(crtc_state, bpc, + crtc_state->has_hdmi_sink, + crtc_state->output_format == + INTEL_OUTPUT_FORMAT_YCBCR420); } static int @@ -2459,6 +2475,23 @@ bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state, } } +static bool intel_hdmi_has_audio(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + const struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(conn_state); + + if (!crtc_state->has_hdmi_sink) + return false; + + if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) + return intel_hdmi->has_audio; + else + return intel_conn_state->force_audio == HDMI_AUDIO_ON; +} + int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -2468,8 +2501,6 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct drm_connector *connector = conn_state->connector; struct drm_scdc *scdc = &connector->display_info.hdmi.scdc; - struct intel_digital_connector_state *intel_conn_state = - to_intel_digital_connector_state(conn_state); int ret; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -2495,13 +2526,8 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv)) pipe_config->has_pch_encoder = true; - if (pipe_config->has_hdmi_sink) { - if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) - pipe_config->has_audio = intel_hdmi->has_audio; - else - pipe_config->has_audio = - intel_conn_state->force_audio == HDMI_AUDIO_ON; - } + pipe_config->has_audio = + intel_hdmi_has_audio(encoder, pipe_config, conn_state); ret = intel_hdmi_compute_clock(encoder, pipe_config); if (ret) @@ -2667,6 +2693,9 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); + if (!INTEL_DISPLAY_ENABLED(dev_priv)) + return connector_status_disconnected; + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); if (INTEL_GEN(dev_priv) >= 11 && @@ -3250,7 +3279,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) connector->ycbcr_420_allowed = true; - intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); intel_connector->polled = DRM_CONNECTOR_POLL_HPD; if (HAS_DDI(dev_priv)) @@ -3264,7 +3292,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, intel_hdmi->attached_connector = intel_connector; if (is_hdcp_supported(dev_priv, port)) { - int ret = intel_hdcp_init(intel_connector, + int ret = intel_hdcp_init(intel_connector, port, &intel_hdmi_hdcp_shim); if (ret) drm_dbg_kms(&dev_priv->drm, @@ -3335,6 +3363,8 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder = &dig_port->base; + mutex_init(&dig_port->hdcp_mutex); + drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, "HDMI %c", port_name(port)); @@ -3382,6 +3412,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder->pipe_mask = ~0; } intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG; + intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); /* * BSpec is unclear about HDMI+HDMI cloning on g4x, but it seems * to work on real hardware. And since g4x can send infoframes to diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h index 5b348dcab77a..15eb0ccde76e 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.h +++ b/drivers/gpu/drm/i915/display/intel_hdmi.h @@ -48,5 +48,7 @@ void intel_read_infoframe(struct intel_encoder *encoder, union hdmi_infoframe *frame); bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); +bool intel_hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, int bpc, + bool has_hdmi_sink, bool ycbcr420_output); #endif /* __INTEL_HDMI_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 3f1d7b804a66..5c58c1ed6493 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -81,33 +81,12 @@ * * It is only valid and used by digital port encoder. * - * Return pin that is associatade with @port and HDP_NONE if no pin is - * hard associated with that @port. + * Return pin that is associatade with @port. */ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, enum port port) { - enum phy phy = intel_port_to_phy(dev_priv, port); - - /* - * RKL + TGP PCH is a special case; we effectively choose the hpd_pin - * based on the DDI rather than the PHY (i.e., the last two outputs - * shold be HPD_PORT_{D,E} rather than {C,D}. Note that this differs - * from the behavior of both TGL+TGP and RKL+CMP. - */ - if (IS_ROCKETLAKE(dev_priv) && HAS_PCH_TGP(dev_priv)) - return HPD_PORT_A + port - PORT_A; - - switch (phy) { - case PHY_F: - return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F; - case PHY_A ... PHY_E: - case PHY_G ... PHY_I: - return HPD_PORT_A + phy - PHY_A; - default: - MISSING_CASE(phy); - return HPD_NONE; - } + return HPD_PORT_A + port - PORT_A; } #define HPD_STORM_DETECT_PERIOD 1000 @@ -503,7 +482,6 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, * only the one of them (DP) will have ->hpd_pulse(). */ for_each_intel_encoder(&dev_priv->drm, encoder) { - bool has_hpd_pulse = intel_encoder_has_hpd_pulse(encoder); enum port port = encoder->port; bool long_hpd; @@ -511,7 +489,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, if (!(BIT(pin) & pin_mask)) continue; - if (!has_hpd_pulse) + if (!intel_encoder_has_hpd_pulse(encoder)) continue; long_hpd = long_mask & BIT(pin); diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index b781bf469644..dc1b35559afd 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -571,7 +571,7 @@ bool lspcon_init(struct intel_digital_port *dig_port) return false; } - if (!intel_dp_read_dpcd(dp)) { + if (drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd) != 0) { DRM_ERROR("LSPCON DPCD read failed\n"); return false; } diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 1888611244db..e65c2de522c3 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -456,12 +456,6 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, return 0; } -static enum drm_connector_status -intel_lvds_detect(struct drm_connector *connector, bool force) -{ - return connector_status_connected; -} - /* * Return the list of DDC modes if available, or the BIOS fixed mode otherwise. */ @@ -490,7 +484,7 @@ static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs }; static const struct drm_connector_funcs intel_lvds_connector_funcs = { - .detect = intel_lvds_detect, + .detect = intel_panel_detect, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 4072d7062efd..9f23bac0d792 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -40,8 +40,6 @@ #include "intel_dsi_dcs_backlight.h" #include "intel_panel.h" -#define CRC_PMIC_PWM_PERIOD_NS 21333 - void intel_fixed_panel_mode(const struct drm_display_mode *fixed_mode, struct drm_display_mode *adjusted_mode) @@ -594,10 +592,10 @@ static u32 bxt_get_backlight(struct intel_connector *connector) static u32 pwm_get_backlight(struct intel_connector *connector) { struct intel_panel *panel = &connector->panel; - int duty_ns; + struct pwm_state state; - duty_ns = pwm_get_duty_cycle(panel->backlight.pwm); - return DIV_ROUND_UP(duty_ns * 100, CRC_PMIC_PWM_PERIOD_NS); + pwm_get_state(panel->backlight.pwm, &state); + return pwm_get_relative_duty_cycle(&state, 100); } static void lpt_set_backlight(const struct drm_connector_state *conn_state, u32 level) @@ -671,9 +669,9 @@ static void bxt_set_backlight(const struct drm_connector_state *conn_state, u32 static void pwm_set_backlight(const struct drm_connector_state *conn_state, u32 level) { struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel; - int duty_ns = DIV_ROUND_UP(level * CRC_PMIC_PWM_PERIOD_NS, 100); - pwm_config(panel->backlight.pwm, duty_ns, CRC_PMIC_PWM_PERIOD_NS); + pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100); + pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); } static void @@ -842,10 +840,8 @@ static void pwm_disable_backlight(const struct drm_connector_state *old_conn_sta struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct intel_panel *panel = &connector->panel; - /* Disable the backlight */ - intel_panel_actually_set_backlight(old_conn_state, 0); - usleep_range(2000, 3000); - pwm_disable(panel->backlight.pwm); + panel->backlight.pwm_state.enabled = false; + pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); } void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state) @@ -1177,9 +1173,12 @@ static void pwm_enable_backlight(const struct intel_crtc_state *crtc_state, { struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; + int level = panel->backlight.level; - pwm_enable(panel->backlight.pwm); - intel_panel_actually_set_backlight(conn_state, panel->backlight.level); + level = intel_panel_compute_brightness(connector, level); + pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100); + panel->backlight.pwm_state.enabled = true; + pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); } static void __intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, @@ -1543,18 +1542,9 @@ static u32 vlv_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * mul); } -static u32 get_backlight_max_vbt(struct intel_connector *connector) +static u16 get_vbt_pwm_freq(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_panel *panel = &connector->panel; u16 pwm_freq_hz = dev_priv->vbt.backlight.pwm_freq_hz; - u32 pwm; - - if (!panel->backlight.hz_to_pwm) { - drm_dbg_kms(&dev_priv->drm, - "backlight frequency conversion not supported\n"); - return 0; - } if (pwm_freq_hz) { drm_dbg_kms(&dev_priv->drm, @@ -1567,6 +1557,22 @@ static u32 get_backlight_max_vbt(struct intel_connector *connector) pwm_freq_hz); } + return pwm_freq_hz; +} + +static u32 get_backlight_max_vbt(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u16 pwm_freq_hz = get_vbt_pwm_freq(dev_priv); + u32 pwm; + + if (!panel->backlight.hz_to_pwm) { + drm_dbg_kms(&dev_priv->drm, + "backlight frequency conversion not supported\n"); + return 0; + } + pwm = panel->backlight.hz_to_pwm(connector, pwm_freq_hz); if (!pwm) { drm_dbg_kms(&dev_priv->drm, @@ -1891,8 +1897,7 @@ static int pwm_setup_backlight(struct intel_connector *connector, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_panel *panel = &connector->panel; const char *desc; - u32 level, ns; - int retval; + u32 level; /* Get the right PWM chip for DSI backlight according to VBT */ if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) { @@ -1910,30 +1915,28 @@ static int pwm_setup_backlight(struct intel_connector *connector, return -ENODEV; } - /* - * FIXME: pwm_apply_args() should be removed when switching to - * the atomic PWM API. - */ - pwm_apply_args(panel->backlight.pwm); - - panel->backlight.min = 0; /* 0% */ panel->backlight.max = 100; /* 100% */ - level = intel_panel_compute_brightness(connector, 100); - ns = DIV_ROUND_UP(level * CRC_PMIC_PWM_PERIOD_NS, 100); + panel->backlight.min = get_backlight_min_vbt(connector); - retval = pwm_config(panel->backlight.pwm, ns, CRC_PMIC_PWM_PERIOD_NS); - if (retval < 0) { - drm_err(&dev_priv->drm, "Failed to configure the pwm chip\n"); - pwm_put(panel->backlight.pwm); - panel->backlight.pwm = NULL; - return retval; - } + if (pwm_is_enabled(panel->backlight.pwm)) { + /* PWM is already enabled, use existing settings */ + pwm_get_state(panel->backlight.pwm, &panel->backlight.pwm_state); + + level = pwm_get_relative_duty_cycle(&panel->backlight.pwm_state, + 100); + level = intel_panel_compute_brightness(connector, level); + panel->backlight.level = clamp(level, panel->backlight.min, + panel->backlight.max); + panel->backlight.enabled = true; - level = DIV_ROUND_UP_ULL(pwm_get_duty_cycle(panel->backlight.pwm) * 100, - CRC_PMIC_PWM_PERIOD_NS); - panel->backlight.level = - intel_panel_compute_brightness(connector, level); - panel->backlight.enabled = panel->backlight.level != 0; + drm_dbg_kms(&dev_priv->drm, "PWM already enabled at freq %ld, VBT freq %d, level %d\n", + NSEC_PER_SEC / (unsigned long)panel->backlight.pwm_state.period, + get_vbt_pwm_freq(dev_priv), level); + } else { + /* Set period from VBT frequency, leave other settings at 0. */ + panel->backlight.pwm_state.period = + NSEC_PER_SEC / get_vbt_pwm_freq(dev_priv); + } drm_info(&dev_priv->drm, "Using %s PWM for LCD backlight control\n", desc); @@ -2092,6 +2095,17 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel) } } +enum drm_connector_status +intel_panel_detect(struct drm_connector *connector, bool force) +{ + struct drm_i915_private *i915 = to_i915(connector->dev); + + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + + return connector_status_connected; +} + int intel_panel_init(struct intel_panel *panel, struct drm_display_mode *fixed_mode, struct drm_display_mode *downclock_mode) diff --git a/drivers/gpu/drm/i915/display/intel_panel.h b/drivers/gpu/drm/i915/display/intel_panel.h index 968b95281cb4..5b813fe90557 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.h +++ b/drivers/gpu/drm/i915/display/intel_panel.h @@ -23,6 +23,8 @@ int intel_panel_init(struct intel_panel *panel, struct drm_display_mode *fixed_mode, struct drm_display_mode *downclock_mode); void intel_panel_fini(struct intel_panel *panel); +enum drm_connector_status +intel_panel_detect(struct drm_connector *connector, bool force); void intel_fixed_panel_mode(const struct drm_display_mode *fixed_mode, struct drm_display_mode *adjusted_mode); int intel_pch_panel_fitting(struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index bf9e320c547d..8a9d0bdde1bf 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -553,6 +553,22 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val |= EDP_PSR2_FAST_WAKE(7); } + if (dev_priv->psr.psr2_sel_fetch_enabled) { + /* WA 1408330847 */ + if (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0) || + IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0)) + intel_de_rmw(dev_priv, CHICKEN_PAR1_1, + DIS_RAM_BYPASS_PSR2_MAN_TRACK, + DIS_RAM_BYPASS_PSR2_MAN_TRACK); + + intel_de_write(dev_priv, + PSR2_MAN_TRK_CTL(dev_priv->psr.transcoder), + PSR2_MAN_TRK_CTL_ENABLE); + } else if (HAS_PSR2_SEL_FETCH(dev_priv)) { + intel_de_write(dev_priv, + PSR2_MAN_TRK_CTL(dev_priv->psr.transcoder), 0); + } + /* * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is * recommending keep this bit unset while PSR2 is enabled. @@ -663,6 +679,38 @@ tgl_dc3co_exitline_compute_config(struct intel_dp *intel_dp, crtc_state->dc3co_exitline = crtc_vdisplay - exit_scanlines; } +static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct intel_plane_state *plane_state; + struct intel_plane *plane; + int i; + + if (!dev_priv->params.enable_psr2_sel_fetch) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 sel fetch not enabled, disabled by parameter\n"); + return false; + } + + if (crtc_state->uapi.async_flip) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 sel fetch not enabled, async flip enabled\n"); + return false; + } + + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + if (plane_state->uapi.rotation != DRM_MODE_ROTATE_0) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 sel fetch not enabled, plane rotated\n"); + return false; + } + } + + return crtc_state->enable_psr2_sel_fetch = true; +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -732,22 +780,17 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - /* - * Some platforms lack PSR2 HW tracking and instead require manual - * tracking by software. In this case, the driver is required to track - * the areas that need updates and program hardware to send selective - * updates. - * - * So until the software tracking is implemented, PSR2 needs to be - * disabled for platforms without PSR2 HW tracking. - */ - if (!HAS_PSR_HW_TRACKING(dev_priv)) { - drm_dbg_kms(&dev_priv->drm, - "No PSR2 HW tracking in the platform\n"); - return false; + if (HAS_PSR2_SEL_FETCH(dev_priv)) { + if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) && + !HAS_PSR_HW_TRACKING(dev_priv)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, selective fetch not valid and no HW tracking available\n"); + return false; + } } - if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) { + if (!crtc_state->enable_psr2_sel_fetch && + (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v)) { drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, resolution %dx%d > max supported %dx%d\n", crtc_hdisplay, crtc_vdisplay, @@ -898,6 +941,11 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, val |= EXITLINE_ENABLE; intel_de_write(dev_priv, EXITLINE(cpu_transcoder), val); } + + if (HAS_PSR_HW_TRACKING(dev_priv)) + intel_de_rmw(dev_priv, CHICKEN_PAR1_1, IGNORE_PSR2_HW_TRACKING, + dev_priv->psr.psr2_sel_fetch_enabled ? + IGNORE_PSR2_HW_TRACKING : 0); } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, @@ -919,6 +967,7 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, /* DC5/DC6 requires at least 6 idle frames */ val = usecs_to_jiffies(intel_get_frame_time_us(crtc_state) * 6); dev_priv->psr.dc3co_exit_delay = val; + dev_priv->psr.psr2_sel_fetch_enabled = crtc_state->enable_psr2_sel_fetch; /* * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR @@ -1058,6 +1107,13 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) psr_status_mask, 2000)) drm_err(&dev_priv->drm, "Timed out waiting PSR idle state\n"); + /* WA 1408330847 */ + if (dev_priv->psr.psr2_sel_fetch_enabled && + (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0) || + IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0))) + intel_de_rmw(dev_priv, CHICKEN_PAR1_1, + DIS_RAM_BYPASS_PSR2_MAN_TRACK, 0); + /* Disable PSR on Sink */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); @@ -1115,6 +1171,32 @@ static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv) intel_psr_exit(dev_priv); } +void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct i915_psr *psr = &dev_priv->psr; + + if (!HAS_PSR2_SEL_FETCH(dev_priv) || + !crtc_state->enable_psr2_sel_fetch) + return; + + intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(psr->transcoder), + crtc_state->psr2_man_track_ctl); +} + +void intel_psr2_sel_fetch_update(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + crtc_state->psr2_man_track_ctl = PSR2_MAN_TRK_CTL_ENABLE | + PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; +} + /** * intel_psr_update - Update PSR state * @intel_dp: Intel DP diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index b4515186d5f4..6a83c8e682e6 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -13,6 +13,8 @@ struct drm_connector_state; struct drm_i915_private; struct intel_crtc_state; struct intel_dp; +struct intel_crtc; +struct intel_atomic_state; #define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support) void intel_psr_init_dpcd(struct intel_dp *intel_dp); @@ -43,5 +45,8 @@ void intel_psr_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); void intel_psr_set_force_mode_changed(struct intel_dp *intel_dp); +void intel_psr2_sel_fetch_update(struct intel_atomic_state *state, + struct intel_crtc *crtc); +void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_PSR_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 5e9fb349c829..4eaa4aa86ecd 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2084,14 +2084,18 @@ intel_sdvo_connector_matches_edid(struct intel_sdvo_connector *sdvo, static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector, bool force) { - u16 response; + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); enum drm_connector_status ret; + u16 response; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ATTACHED_DISPLAYS, &response, 2)) diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index c89f5f7ccb06..63040cb0d4e1 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1626,8 +1626,7 @@ static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, hscale = drm_rect_calc_hscale(&plane_state->uapi.src, &plane_state->uapi.dst, 0, INT_MAX); - if (hscale < 0x10000) - return pixel_rate; + hscale = max(hscale, 0x10000u); /* Decimation steps at 2x,4x,8x,16x */ decimate = ilog2(hscale >> 16); @@ -1640,8 +1639,8 @@ static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, limit -= decimate; /* -10% for RGB */ - if (fb->format->cpp[0] >= 4) - limit--; /* -10% for RGB */ + if (!fb->format->is_yuv) + limit--; /* * We should also do -10% if sprite scaling is enabled @@ -2843,8 +2842,9 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, static bool gen12_plane_supports_mc_ccs(struct drm_i915_private *dev_priv, enum plane_id plane_id) { - /* Wa_14010477008:tgl[a0..c0] */ - if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_C0)) + /* Wa_14010477008:tgl[a0..c0],rkl[all] */ + if (IS_ROCKETLAKE(dev_priv) || + IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_C0)) return false; return plane_id < PLANE_SPRITE4; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 777032d9697b..7a7b99b015a5 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1706,6 +1706,9 @@ intel_tv_detect(struct drm_connector *connector, drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] force=%d\n", connector->base.id, connector->name, force); + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + if (force) { struct intel_load_detect_pipe tmp; int ret; diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 6faabd4f6d49..54bcc6a6947c 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -293,8 +293,12 @@ struct bdb_general_features { #define DVO_PORT_HDMIE 12 /* 193 */ #define DVO_PORT_DPF 13 /* N/A */ #define DVO_PORT_HDMIF 14 /* N/A */ -#define DVO_PORT_DPG 15 -#define DVO_PORT_HDMIG 16 +#define DVO_PORT_DPG 15 /* 217 */ +#define DVO_PORT_HDMIG 16 /* 217 */ +#define DVO_PORT_DPH 17 /* 217 */ +#define DVO_PORT_HDMIH 18 /* 217 */ +#define DVO_PORT_DPI 19 /* 217 */ +#define DVO_PORT_HDMII 20 /* 217 */ #define DVO_PORT_MIPIA 21 /* 171 */ #define DVO_PORT_MIPIB 22 /* 171 */ #define DVO_PORT_MIPIC 23 /* 171 */ @@ -330,6 +334,8 @@ enum vbt_gmbus_ddi { #define DP_AUX_E 0x50 #define DP_AUX_F 0x60 #define DP_AUX_G 0x70 +#define DP_AUX_H 0x80 +#define DP_AUX_I 0x90 #define VBT_DP_MAX_LINK_RATE_HBR3 0 #define VBT_DP_MAX_LINK_RATE_HBR2 1 diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 052e0b31a2da..5e5522923b1e 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1585,6 +1585,7 @@ static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs }; static const struct drm_connector_funcs intel_dsi_connector_funcs = { + .detect = intel_panel_detect, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_connector_destroy, diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index d0a514301575..4070b00c3690 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -483,7 +483,7 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder, if (dsi_ratio < dsi_ratio_min || dsi_ratio > dsi_ratio_max) { drm_err(&dev_priv->drm, - "Cant get a suitable ratio from DSI PLL ratios\n"); + "Can't get a suitable ratio from DSI PLL ratios\n"); return -ECHRNG; } else drm_dbg_kms(&dev_priv->drm, "DSI PLL calculation is Done!!\n"); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 278664f831e7..272cf3ea68d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -32,12 +32,13 @@ static void vma_clear_pages(struct i915_vma *vma) vma->pages = NULL; } -static int vma_bind(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void vma_bind(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - return vm->vma_ops.bind_vma(vm, vma, cache_level, flags); + vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags); } static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) @@ -157,6 +158,7 @@ static void clear_pages_worker(struct work_struct *work) struct clear_pages_work *w = container_of(work, typeof(*w), work); struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; + struct i915_gem_ww_ctx ww; struct i915_request *rq; struct i915_vma *batch; int err = w->dma.error; @@ -172,17 +174,20 @@ static void clear_pages_worker(struct work_struct *work) obj->read_domains = I915_GEM_GPU_DOMAINS; obj->write_domain = 0; - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (unlikely(err)) + i915_gem_ww_ctx_init(&ww, false); + intel_engine_pm_get(w->ce->engine); +retry: + err = intel_context_pin_ww(w->ce, &ww); + if (err) goto out_signal; - batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); + batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto out_unpin; + goto out_ctx; } - rq = intel_context_create_request(w->ce); + rq = i915_request_create(w->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -224,9 +229,19 @@ out_request: i915_request_add(rq); out_batch: intel_emit_vma_release(w->ce, batch); -out_unpin: - i915_vma_unpin(vma); +out_ctx: + intel_context_unpin(w->ce); out_signal: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + i915_vma_unpin(w->sleeve->vma); + intel_engine_pm_put(w->ce->engine); + if (unlikely(err)) { dma_fence_set_error(&w->dma, err); dma_fence_signal(&w->dma); @@ -234,6 +249,44 @@ out_signal: } } +static int pin_wait_clear_pages_work(struct clear_pages_work *w, + struct intel_context *ce) +{ + struct i915_vma *vma = w->sleeve->vma; + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out; + + err = i915_sw_fence_await_reservation(&w->wait, + vma->obj->base.resv, NULL, + true, 0, I915_FENCE_GFP); + if (err) + goto err_unpin_vma; + + dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma); + +err_unpin_vma: + if (err) + i915_vma_unpin(vma); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + return err; +} + static int __i915_sw_fence_call clear_pages_work_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) @@ -287,17 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); i915_sw_fence_init(&work->wait, clear_pages_work_notify); - i915_gem_object_lock(obj); - err = i915_sw_fence_await_reservation(&work->wait, - obj->base.resv, NULL, true, 0, - I915_FENCE_GFP); - if (err < 0) { + err = pin_wait_clear_pages_work(work, ce); + if (err < 0) dma_fence_set_error(&work->dma, err); - } else { - dma_resv_add_excl_fence(obj->base.resv, &work->dma); - err = 0; - } - i915_gem_object_unlock(obj); dma_fence_get(&work->dma); i915_sw_fence_commit(&work->wait); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ef755dd5e68f..4fd38101bb56 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -390,24 +390,6 @@ __context_engines_static(const struct i915_gem_context *ctx) return rcu_dereference_protected(ctx->engines, true); } -static bool __reset_engine(struct intel_engine_cs *engine) -{ - struct intel_gt *gt = engine->gt; - bool success = false; - - if (!intel_has_reset_engine(gt)) - return false; - - if (!test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) { - success = intel_engine_reset(engine, NULL) == 0; - clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags); - } - - return success; -} - static void __reset_context(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -431,12 +413,7 @@ static bool __cancel_engine(struct intel_engine_cs *engine) * kill the banned context, we fallback to doing a local reset * instead. */ - if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) && - !intel_engine_pulse(engine)) - return true; - - /* If we are unable to send a pulse, try resetting this engine. */ - return __reset_engine(engine); + return intel_engine_pulse(engine) == 0; } static bool @@ -460,8 +437,8 @@ __active_engine(struct i915_request *rq, struct intel_engine_cs **active) spin_lock(&locked->active.lock); } - if (!i915_request_completed(rq)) { - if (i915_request_is_active(rq) && rq->fence.error != -EIO) + if (i915_request_is_active(rq)) { + if (!i915_request_completed(rq)) *active = locked; ret = true; } @@ -479,13 +456,26 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) if (!ce->timeline) return NULL; + /* + * rq->link is only SLAB_TYPESAFE_BY_RCU, we need to hold a reference + * to the request to prevent it being transferred to a new timeline + * (and onto a new timeline->requests list). + */ rcu_read_lock(); - list_for_each_entry_rcu(rq, &ce->timeline->requests, link) { - if (i915_request_is_active(rq) && i915_request_completed(rq)) - continue; + list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { + bool found; + + /* timeline is already completed upto this point? */ + if (!i915_request_get_rcu(rq)) + break; /* Check with the backend if the request is inflight */ - if (__active_engine(rq, &engine)) + found = true; + if (likely(rcu_access_pointer(rq->timeline) == ce->timeline)) + found = __active_engine(rq, &engine); + + i915_request_put(rq); + if (found) break; } rcu_read_unlock(); @@ -493,7 +483,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) return engine; } -static void kill_engines(struct i915_gem_engines *engines) +static void kill_engines(struct i915_gem_engines *engines, bool ban) { struct i915_gem_engines_iter it; struct intel_context *ce; @@ -508,7 +498,7 @@ static void kill_engines(struct i915_gem_engines *engines) for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; - if (intel_context_set_banned(ce)) + if (ban && intel_context_set_banned(ce)) continue; /* @@ -521,7 +511,7 @@ static void kill_engines(struct i915_gem_engines *engines) engine = active_engine(ce); /* First attempt to gracefully cancel the context */ - if (engine && !__cancel_engine(engine)) + if (engine && !__cancel_engine(engine) && ban) /* * If we are unable to send a preemptive pulse to bump * the context from the GPU, we have to resort to a full @@ -531,8 +521,10 @@ static void kill_engines(struct i915_gem_engines *engines) } } -static void kill_stale_engines(struct i915_gem_context *ctx) +static void kill_context(struct i915_gem_context *ctx) { + bool ban = (!i915_gem_context_is_persistent(ctx) || + !ctx->i915->params.enable_hangcheck); struct i915_gem_engines *pos, *next; spin_lock_irq(&ctx->stale.lock); @@ -545,7 +537,7 @@ static void kill_stale_engines(struct i915_gem_context *ctx) spin_unlock_irq(&ctx->stale.lock); - kill_engines(pos); + kill_engines(pos, ban); spin_lock_irq(&ctx->stale.lock); GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); @@ -557,11 +549,6 @@ static void kill_stale_engines(struct i915_gem_context *ctx) spin_unlock_irq(&ctx->stale.lock); } -static void kill_context(struct i915_gem_context *ctx) -{ - kill_stale_engines(ctx); -} - static void engines_idle_release(struct i915_gem_context *ctx, struct i915_gem_engines *engines) { @@ -596,7 +583,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, kill: if (list_empty(&engines->link)) /* raced, already closed */ - kill_engines(engines); + kill_engines(engines, true); i915_sw_fence_commit(&engines->fence); } @@ -654,9 +641,7 @@ static void context_close(struct i915_gem_context *ctx) * case we opt to forcibly kill off all remaining requests on * context close. */ - if (!i915_gem_context_is_persistent(ctx) || - !ctx->i915->params.enable_hangcheck) - kill_context(ctx); + kill_context(ctx); i915_gem_context_put(ctx); } @@ -892,7 +877,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { struct intel_timeline *timeline; - timeline = intel_timeline_create(&i915->gt, NULL); + timeline = intel_timeline_create(&i915->gt); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -1106,6 +1091,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); static int context_barrier_task(struct i915_gem_context *ctx, intel_engine_mask_t engines, bool (*skip)(struct intel_context *ce, void *data), + int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data), int (*emit)(struct i915_request *rq, void *data), void (*task)(void *data), void *data) @@ -1113,6 +1099,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct i915_gem_engines *e; + struct i915_gem_ww_ctx ww; struct intel_context *ce; int err = 0; @@ -1150,10 +1137,21 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (skip && skip(ce, data)) continue; - rq = intel_context_create_request(ce); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = intel_context_pin_ww(ce, &ww); + if (err) + goto err; + + if (pin) + err = pin(ce, &ww, data); + if (err) + goto err_unpin; + + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - break; + goto err_unpin; } err = 0; @@ -1163,6 +1161,16 @@ static int context_barrier_task(struct i915_gem_context *ctx, err = i915_active_add_request(&cb->base, rq); i915_request_add(rq); +err_unpin: + intel_context_unpin(ce); +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + if (err) break; } @@ -1218,6 +1226,17 @@ static void set_ppgtt_barrier(void *data) i915_vm_close(old); } +static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data) +{ + struct i915_address_space *vm = ce->vm; + + if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915)) + /* ppGTT is not part of the legacy context image */ + return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww); + + return 0; +} + static int emit_ppgtt_update(struct i915_request *rq, void *data) { struct i915_address_space *vm = rq->context->vm; @@ -1274,20 +1293,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) static bool skip_ppgtt_update(struct intel_context *ce, void *data) { - if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) - return true; - if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return false; - - if (!atomic_read(&ce->pin_count)) - return true; - - /* ppGTT is not part of the legacy context image */ - if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm))) - return true; - - return false; + return !ce->state; + else + return !atomic_read(&ce->pin_count); } static int set_ppgtt(struct drm_i915_file_private *file_priv, @@ -1338,6 +1347,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, */ err = context_barrier_task(ctx, ALL_ENGINES, skip_ppgtt_update, + pin_ppgtt_update, emit_ppgtt_update, set_ppgtt_barrier, old); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 2679380159fc..8dd295dbe241 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -48,12 +48,9 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme src = sg_next(src); } - if (!dma_map_sg_attrs(attachment->dev, - st->sgl, st->nents, dir, - DMA_ATTR_SKIP_CPU_SYNC)) { - ret = -ENOMEM; + ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC); + if (ret) goto err_free_sg; - } return st; @@ -73,9 +70,7 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, { struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - dma_unmap_sg_attrs(attachment->dev, - sg->sgl, sg->nents, dir, - DMA_ATTR_SKIP_CPU_SYNC); + dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sg); kfree(sg); @@ -128,7 +123,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (err) return err; - err = i915_gem_object_lock_interruptible(obj); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out; @@ -149,7 +144,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (err) return err; - err = i915_gem_object_lock_interruptible(obj); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 7f76fc68f498..7c90a63c273d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -32,11 +32,17 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_framebuffer(obj)) return; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); __i915_gem_object_flush_for_display(obj); i915_gem_object_unlock(obj); } +void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) +{ + if (i915_gem_object_is_framebuffer(obj)) + __i915_gem_object_flush_for_display(obj); +} + /** * Moves a single object to the WC read, and possibly write domain. * @obj: object to act on @@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (ret) return ret; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; - /* Always invalidate stale cachelines */ if (obj->cache_level != cache_level) { i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; } - i915_gem_object_unlock(obj); - /* The cache-level will be applied when each vma is rebound. */ return i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE | @@ -293,7 +293,12 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, goto out; } + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + goto out; + ret = i915_gem_object_set_cache_level(obj, level); + i915_gem_object_unlock(obj); out: i915_gem_object_put(obj); @@ -313,6 +318,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, unsigned int flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_gem_ww_ctx ww; struct i915_vma *vma; int ret; @@ -320,6 +326,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(obj, &ww); + if (ret) + goto err; /* * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -334,7 +345,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) - return ERR_PTR(ret); + goto err; /* * As the user may map the buffer once pinned in the display plane @@ -347,18 +358,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma = ERR_PTR(-ENOSPC); if ((flags & PIN_MAPPABLE) == 0 && (!view || view->type == I915_GGTT_VIEW_NORMAL)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, - flags | - PIN_MAPPABLE | - PIN_NONBLOCK); - if (IS_ERR(vma)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); - if (IS_ERR(vma)) - return vma; + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment, + flags | PIN_MAPPABLE | + PIN_NONBLOCK); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, + alignment, flags); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - i915_gem_object_flush_if_display(obj); + i915_gem_object_flush_if_display_locked(obj); + +err: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (ret) + return ERR_PTR(ret); return vma; } @@ -536,7 +560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out; - err = i915_gem_object_lock_interruptible(obj); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out_unpin; @@ -576,19 +600,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (ret) - goto err_unlock; + return ret; ret = i915_gem_object_pin_pages(obj); if (ret) - goto err_unlock; + return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || !static_cpu_has(X86_FEATURE_CLFLUSH)) { @@ -616,8 +638,6 @@ out: err_unpin: i915_gem_object_unpin_pages(obj); -err_unlock: - i915_gem_object_unlock(obj); return ret; } @@ -630,20 +650,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); if (ret) - goto err_unlock; + return ret; ret = i915_gem_object_pin_pages(obj); if (ret) - goto err_unlock; + return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || !static_cpu_has(X86_FEATURE_CLFLUSH)) { @@ -680,7 +698,5 @@ out: err_unpin: i915_gem_object_unpin_pages(obj); -err_unlock: - i915_gem_object_unlock(obj); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 446e76e95c38..4b09bcd70cf4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -26,6 +26,7 @@ #include "i915_gem_ioctls.h" #include "i915_sw_fence_work.h" #include "i915_trace.h" +#include "i915_user_extensions.h" struct eb_vma { struct i915_vma *vma; @@ -40,11 +41,6 @@ struct eb_vma { u32 handle; }; -struct eb_vma_array { - struct kref kref; - struct eb_vma vma[]; -}; - enum { FORCE_CPU_RELOC = 1, FORCE_GTT_RELOC, @@ -57,9 +53,11 @@ enum { #define __EXEC_OBJECT_NEEDS_MAP BIT(29) #define __EXEC_OBJECT_NEEDS_BIAS BIT(28) #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ +#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) -#define __EXEC_INTERNAL_FLAGS (~0u << 31) +#define __EXEC_ENGINE_PINNED BIT(30) +#define __EXEC_INTERNAL_FLAGS (~0u << 30) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -229,6 +227,13 @@ enum { * the batchbuffer in trusted mode, otherwise the ioctl is rejected. */ +struct eb_fence { + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + struct dma_fence *dma_fence; + u64 value; + struct dma_fence_chain *chain_fence; +}; + struct i915_execbuffer { struct drm_i915_private *i915; /** i915 backpointer */ struct drm_file *file; /** per-file lookup tables and limits */ @@ -253,6 +258,8 @@ struct i915_execbuffer { /** list of vma that have execobj.relocation_count */ struct list_head relocs; + struct i915_gem_ww_ctx ww; + /** * Track the most recently used object for relocations, as we * frequently have to perform multiple relocations within the same @@ -268,19 +275,22 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; - struct i915_vma *target; struct i915_request *rq; - struct i915_vma *rq_vma; u32 *rq_cmd; unsigned int rq_size; + struct intel_gt_buffer_pool_node *pool; } reloc_cache; + struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ + struct intel_context *reloc_context; + u64 invalid_flags; /** Set of execobj.flags that are invalid */ u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u32 batch_start_offset; /** Location within object of batch */ u32 batch_len; /** Length of batch within object */ u32 batch_flags; /** Flags composed for emit_bb_start() */ + struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ /** * Indicate either the size of the hastable used to resolve @@ -289,9 +299,16 @@ struct i915_execbuffer { */ int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ - struct eb_vma_array *array; + + struct eb_fence *fences; + unsigned long num_fences; }; +static int eb_parse(struct i915_execbuffer *eb); +static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, + bool throttle); +static void eb_unpin_engine(struct i915_execbuffer *eb); + static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { return intel_engine_requires_cmd_parser(eb->engine) || @@ -299,62 +316,8 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) eb->args->batch_len); } -static struct eb_vma_array *eb_vma_array_create(unsigned int count) -{ - struct eb_vma_array *arr; - - arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN); - if (!arr) - return NULL; - - kref_init(&arr->kref); - arr->vma[0].vma = NULL; - - return arr; -} - -static inline void eb_unreserve_vma(struct eb_vma *ev) -{ - struct i915_vma *vma = ev->vma; - - if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) - __i915_vma_unpin_fence(vma); - - if (ev->flags & __EXEC_OBJECT_HAS_PIN) - __i915_vma_unpin(vma); - - ev->flags &= ~(__EXEC_OBJECT_HAS_PIN | - __EXEC_OBJECT_HAS_FENCE); -} - -static void eb_vma_array_destroy(struct kref *kref) -{ - struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref); - struct eb_vma *ev = arr->vma; - - while (ev->vma) { - eb_unreserve_vma(ev); - i915_vma_put(ev->vma); - ev++; - } - - kvfree(arr); -} - -static void eb_vma_array_put(struct eb_vma_array *arr) -{ - kref_put(&arr->kref, eb_vma_array_destroy); -} - static int eb_create(struct i915_execbuffer *eb) { - /* Allocate an extra slot for use by the command parser + sentinel */ - eb->array = eb_vma_array_create(eb->buffer_count + 2); - if (!eb->array) - return -ENOMEM; - - eb->vma = eb->array->vma; - if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { unsigned int size = 1 + ilog2(eb->buffer_count); @@ -388,10 +351,8 @@ static int eb_create(struct i915_execbuffer *eb) break; } while (--size); - if (unlikely(!size)) { - eb_vma_array_put(eb->array); + if (unlikely(!size)) return -ENOMEM; - } eb->lut_size = size; } else { @@ -475,16 +436,17 @@ eb_pin_vma(struct i915_execbuffer *eb, pin_flags |= PIN_GLOBAL; /* Attempt to reuse the current location if available */ - if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) { + /* TODO: Add -EDEADLK handling here */ + if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) { if (entry->flags & EXEC_OBJECT_PINNED) return false; /* Failing that pick any _free_ space if suitable */ - if (unlikely(i915_vma_pin(vma, - entry->pad_to_size, - entry->alignment, - eb_pin_flags(entry, ev->flags) | - PIN_USER | PIN_NOEVICT))) + if (unlikely(i915_vma_pin_ww(vma, &eb->ww, + entry->pad_to_size, + entry->alignment, + eb_pin_flags(entry, ev->flags) | + PIN_USER | PIN_NOEVICT))) return false; } @@ -502,6 +464,19 @@ eb_pin_vma(struct i915_execbuffer *eb, return !eb_vma_misplaced(entry, vma, ev->flags); } +static inline void +eb_unreserve_vma(struct eb_vma *ev) +{ + if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) + return; + + if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) + __i915_vma_unpin_fence(ev->vma); + + __i915_vma_unpin(ev->vma); + ev->flags &= ~__EXEC_OBJECT_RESERVED; +} + static int eb_validate_vma(struct i915_execbuffer *eb, struct drm_i915_gem_exec_object2 *entry, @@ -593,16 +568,6 @@ eb_add_vma(struct i915_execbuffer *eb, eb->batch = ev; } - - if (eb_pin_vma(eb, entry, ev)) { - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; - } - } else { - eb_unreserve_vma(ev); - list_add_tail(&ev->bind_link, &eb->unbound); - } } static inline int use_cpu_reloc(const struct reloc_cache *cache, @@ -622,7 +587,7 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, obj->cache_level != I915_CACHE_NONE); } -static int eb_reserve_vma(const struct i915_execbuffer *eb, +static int eb_reserve_vma(struct i915_execbuffer *eb, struct eb_vma *ev, u64 pin_flags) { @@ -637,7 +602,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, return err; } - err = i915_vma_pin(vma, + err = i915_vma_pin_ww(vma, &eb->ww, entry->pad_to_size, entry->alignment, eb_pin_flags(entry, ev->flags) | pin_flags); if (err) @@ -687,10 +652,6 @@ static int eb_reserve(struct i915_execbuffer *eb) * This avoid unnecessary unbinding of later objects in order to make * room for the earlier objects *unless* we need to defragment. */ - - if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex)) - return -EINTR; - pass = 0; do { list_for_each_entry(ev, &eb->unbound, bind_link) { @@ -698,8 +659,8 @@ static int eb_reserve(struct i915_execbuffer *eb) if (err) break; } - if (!(err == -ENOSPC || err == -EAGAIN)) - break; + if (err != -ENOSPC) + return err; /* Resort *all* the objects into priority order */ INIT_LIST_HEAD(&eb->unbound); @@ -729,13 +690,6 @@ static int eb_reserve(struct i915_execbuffer *eb) } list_splice_tail(&last, &eb->unbound); - if (err == -EAGAIN) { - mutex_unlock(&eb->i915->drm.struct_mutex); - flush_workqueue(eb->i915->mm.userptr_wq); - mutex_lock(&eb->i915->drm.struct_mutex); - continue; - } - switch (pass++) { case 0: break; @@ -746,20 +700,15 @@ static int eb_reserve(struct i915_execbuffer *eb) err = i915_gem_evict_vm(eb->context->vm); mutex_unlock(&eb->context->vm->mutex); if (err) - goto unlock; + return err; break; default: - err = -ENOSPC; - goto unlock; + return -ENOSPC; } pin_flags = PIN_USER; } while (1); - -unlock: - mutex_unlock(&eb->i915->drm.struct_mutex); - return err; } static unsigned int eb_batch_index(const struct i915_execbuffer *eb) @@ -882,12 +831,12 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) static int eb_lookup_vmas(struct i915_execbuffer *eb) { + struct drm_i915_private *i915 = eb->i915; unsigned int batch = eb_batch_index(eb); unsigned int i; int err = 0; INIT_LIST_HEAD(&eb->relocs); - INIT_LIST_HEAD(&eb->unbound); for (i = 0; i < eb->buffer_count; i++) { struct i915_vma *vma; @@ -895,22 +844,83 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) vma = eb_lookup_vma(eb, eb->exec[i].handle); if (IS_ERR(vma)) { err = PTR_ERR(vma); - break; + goto err; } err = eb_validate_vma(eb, &eb->exec[i], vma); if (unlikely(err)) { i915_vma_put(vma); - break; + goto err; } eb_add_vma(eb, i, batch, vma); } + if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) { + drm_dbg(&i915->drm, + "Attempting to use self-modifying batch buffer\n"); + return -EINVAL; + } + + if (range_overflows_t(u64, + eb->batch_start_offset, eb->batch_len, + eb->batch->vma->size)) { + drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); + return -EINVAL; + } + + if (eb->batch_len == 0) + eb->batch_len = eb->batch->vma->size - eb->batch_start_offset; + + return 0; + +err: eb->vma[i].vma = NULL; return err; } +static int eb_validate_vmas(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + INIT_LIST_HEAD(&eb->unbound); + + for (i = 0; i < eb->buffer_count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + + err = i915_gem_object_lock(vma->obj, &eb->ww); + if (err) + return err; + + if (eb_pin_vma(eb, entry, ev)) { + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + } else { + eb_unreserve_vma(ev); + + list_add_tail(&ev->bind_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) { + err = i915_vma_unbind(vma); + if (err) + return err; + } + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); + } + + if (!list_empty(&eb->unbound)) + return eb_reserve(eb); + + return 0; +} + static struct eb_vma * eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) { @@ -931,13 +941,31 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } +static void eb_release_vmas(struct i915_execbuffer *eb, bool final) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + + if (!vma) + break; + + eb_unreserve_vma(ev); + + if (final) + i915_vma_put(vma); + } + + eb_unpin_engine(eb); +} + static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->array) - eb_vma_array_put(eb->array); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -949,6 +977,14 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target->node.start); } +static void reloc_cache_clear(struct reloc_cache *cache) +{ + cache->rq = NULL; + cache->rq_cmd = NULL; + cache->pool = NULL; + cache->rq_size = 0; +} + static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { @@ -961,8 +997,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - cache->rq = NULL; - cache->target = NULL; + reloc_cache_clear(cache); } static inline void *unmask_page(unsigned long p) @@ -984,132 +1019,60 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } -#define RELOC_TAIL 4 - -static int reloc_gpu_chain(struct reloc_cache *cache) +static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) { - struct intel_gt_buffer_pool_node *pool; - struct i915_request *rq = cache->rq; - struct i915_vma *batch; - u32 *cmd; - int err; - - pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE); - if (IS_ERR(pool)) - return PTR_ERR(pool); - - batch = i915_vma_instance(pool->obj, rq->context->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_pool; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto out_pool; - - GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32)); - cmd = cache->rq_cmd + cache->rq_size; - *cmd++ = MI_ARB_CHECK; - if (cache->gen >= 8) - *cmd++ = MI_BATCH_BUFFER_START_GEN8; - else if (cache->gen >= 6) - *cmd++ = MI_BATCH_BUFFER_START; - else - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cmd++ = lower_32_bits(batch->node.start); - *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */ - i915_gem_object_flush_map(cache->rq_vma->obj); - i915_gem_object_unpin_map(cache->rq_vma->obj); - cache->rq_vma = NULL; - - err = intel_gt_buffer_pool_mark_active(pool, rq); - if (err == 0) { - i915_vma_lock(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); - } - i915_vma_unpin(batch); - if (err) - goto out_pool; - - cmd = i915_gem_object_pin_map(batch->obj, - cache->has_llc ? - I915_MAP_FORCE_WB : - I915_MAP_FORCE_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_pool; - } - - /* Return with batch mapping (cmd) still pinned */ - cache->rq_cmd = cmd; - cache->rq_size = 0; - cache->rq_vma = batch; - -out_pool: - intel_gt_buffer_pool_put(pool); - return err; -} + if (!cache->pool) + return; -static unsigned int reloc_bb_flags(const struct reloc_cache *cache) -{ - return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE; + /* + * This is a bit nasty, normally we keep objects locked until the end + * of execbuffer, but we already submit this, and have to unlock before + * dropping the reference. Fortunately we can only hold 1 pool node at + * a time, so this should be harmless. + */ + i915_gem_ww_unlock_single(cache->pool->obj); + intel_gt_buffer_pool_put(cache->pool); + cache->pool = NULL; } -static int reloc_gpu_flush(struct reloc_cache *cache) +static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) { - struct i915_request *rq; - int err; + struct drm_i915_gem_object *obj = cache->rq->batch->obj; - rq = fetch_and_zero(&cache->rq); - if (!rq) - return 0; + GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - if (cache->rq_vma) { - struct drm_i915_gem_object *obj = cache->rq_vma->obj; + __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_unpin_map(obj); - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END; + intel_gt_chipset_flush(cache->rq->engine->gt); - __i915_gem_object_flush_map(obj, - 0, sizeof(u32) * cache->rq_size); - i915_gem_object_unpin_map(obj); - } + i915_request_add(cache->rq); + reloc_cache_put_pool(eb, cache); + reloc_cache_clear(cache); - err = 0; - if (rq->engine->emit_init_breadcrumb) - err = rq->engine->emit_init_breadcrumb(rq); - if (!err) - err = rq->engine->emit_bb_start(rq, - rq->batch->node.start, - PAGE_SIZE, - reloc_bb_flags(cache)); - if (err) - i915_request_set_error_once(rq, err); - - intel_gt_chipset_flush(rq->engine->gt); - i915_request_add(rq); - - return err; + eb->reloc_pool = NULL; } -static void reloc_cache_reset(struct reloc_cache *cache) +static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; + if (cache->rq) + reloc_gpu_flush(eb, cache); + if (!cache->vaddr) return; vaddr = unmask_page(cache->vaddr); if (cache->vaddr & KMAP) { + struct drm_i915_gem_object *obj = + (struct drm_i915_gem_object *)cache->node.mm; if (cache->vaddr & CLFLUSH_AFTER) mb(); kunmap_atomic(vaddr); - i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); + i915_gem_object_finish_access(obj); } else { struct i915_ggtt *ggtt = cache_to_ggtt(cache); @@ -1134,9 +1097,10 @@ static void reloc_cache_reset(struct reloc_cache *cache) static void *reloc_kmap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, - unsigned long page) + unsigned long pageno) { void *vaddr; + struct page *page; if (cache->vaddr) { kunmap_atomic(unmask_page(cache->vaddr)); @@ -1157,17 +1121,22 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, mb(); } - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + page = i915_gem_object_get_page(obj, pageno); + if (!obj->mm.dirty) + set_page_dirty(page); + + vaddr = kmap_atomic(page); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; - cache->page = page; + cache->page = pageno; return vaddr; } static void *reloc_iomap(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, + struct i915_execbuffer *eb, unsigned long page) { + struct reloc_cache *cache = &eb->reloc_cache; struct i915_ggtt *ggtt = cache_to_ggtt(cache); unsigned long offset; void *vaddr; @@ -1185,16 +1154,17 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL; - i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); if (err) return ERR_PTR(err); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); + vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (vma == ERR_PTR(-EDEADLK)) + return vma; + if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); mutex_lock(&ggtt->vm.mutex); @@ -1230,9 +1200,10 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, } static void *reloc_vaddr(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, + struct i915_execbuffer *eb, unsigned long page) { + struct reloc_cache *cache = &eb->reloc_cache; void *vaddr; if (cache->page == page) { @@ -1240,7 +1211,7 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj, } else { vaddr = NULL; if ((cache->vaddr & KMAP) == 0) - vaddr = reloc_iomap(obj, cache, page); + vaddr = reloc_iomap(obj, eb, page); if (!vaddr) vaddr = reloc_kmap(obj, cache, page); } @@ -1276,7 +1247,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) struct drm_i915_gem_object *obj = vma->obj; int err; - i915_vma_lock(vma); + assert_vma_held(vma); if (obj->cache_dirty & ~obj->cache_coherent) i915_gem_clflush_object(obj, 0); @@ -1286,25 +1257,31 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - return err; } static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine, + struct i915_vma *vma, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct intel_gt_buffer_pool_node *pool; + struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); - if (IS_ERR(pool)) - return PTR_ERR(pool); + if (!pool) { + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); + } + eb->reloc_pool = NULL; + + err = i915_gem_object_lock(pool->obj, &eb->ww); + if (err) + goto err_pool; cmd = i915_gem_object_pin_map(pool->obj, cache->has_llc ? @@ -1312,35 +1289,42 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, I915_MAP_FORCE_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_pool; + goto err_pool; } - batch = i915_vma_instance(pool->obj, eb->context->vm, NULL); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; } - err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK); if (err) goto err_unmap; if (engine == eb->context->engine) { rq = i915_request_create(eb->context); } else { - struct intel_context *ce; + struct intel_context *ce = eb->reloc_context; - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err_unpin; + if (!ce) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_unpin; + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(eb->context->vm); + eb->reloc_context = ce; } - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(eb->context->vm); + err = intel_context_pin_ww(ce, &eb->ww); + if (err) + goto err_unpin; - rq = intel_context_create_request(ce); - intel_context_put(ce); + rq = i915_request_create(ce); + intel_context_unpin(ce); } if (IS_ERR(rq)) { err = PTR_ERR(rq); @@ -1351,11 +1335,20 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_request; - i915_vma_lock(batch); + err = reloc_move_to_gpu(rq, vma); + if (err) + goto err_request; + + err = eb->engine->emit_bb_start(rq, + batch->node.start, PAGE_SIZE, + cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); + if (err) + goto skip_request; + + assert_vma_held(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); if (err) goto skip_request; @@ -1365,10 +1358,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq = rq; cache->rq_cmd = cmd; cache->rq_size = 0; - cache->rq_vma = batch; + cache->pool = pool; /* Return with batch mapping (cmd) still pinned */ - goto out_pool; + return 0; skip_request: i915_request_set_error_once(rq, err); @@ -1378,8 +1371,8 @@ err_unpin: i915_vma_unpin(batch); err_unmap: i915_gem_object_unpin_map(pool->obj); -out_pool: - intel_gt_buffer_pool_put(pool); +err_pool: + eb->reloc_pool = pool; return err; } @@ -1394,9 +1387,12 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, { struct reloc_cache *cache = &eb->reloc_cache; u32 *cmd; - int err; + + if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) + reloc_gpu_flush(eb, cache); if (unlikely(!cache->rq)) { + int err; struct intel_engine_cs *engine = eb->engine; if (!reloc_can_use_engine(engine)) { @@ -1405,31 +1401,11 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, return ERR_PTR(-ENODEV); } - err = __reloc_gpu_alloc(eb, engine, len); + err = __reloc_gpu_alloc(eb, engine, vma, len); if (unlikely(err)) return ERR_PTR(err); } - if (vma != cache->target) { - err = reloc_move_to_gpu(cache->rq, vma); - if (unlikely(err)) { - i915_request_set_error_once(cache->rq, err); - return ERR_PTR(err); - } - - cache->target = vma; - } - - if (unlikely(cache->rq_size + len > - PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) { - err = reloc_gpu_chain(cache); - if (unlikely(err)) { - i915_request_set_error_once(cache->rq, err); - return ERR_PTR(err); - } - } - - GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32)); cmd = cache->rq_cmd + cache->rq_size; cache->rq_size += len; @@ -1461,7 +1437,7 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) return addr + offset_in_page(offset); } -static bool __reloc_entry_gpu(struct i915_execbuffer *eb, +static int __reloc_entry_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, u64 offset, u64 target_addr) @@ -1479,7 +1455,9 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb, len = 3; batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) + if (batch == ERR_PTR(-EDEADLK)) + return -EDEADLK; + else if (IS_ERR(batch)) return false; addr = gen8_canonical_addr(vma->node.start + offset); @@ -1532,7 +1510,7 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb, return true; } -static bool reloc_entry_gpu(struct i915_execbuffer *eb, +static int reloc_entry_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, u64 offset, u64 target_addr) @@ -1554,14 +1532,17 @@ relocate_entry(struct i915_vma *vma, { u64 target_addr = relocation_target(reloc, target); u64 offset = reloc->offset; + int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); - if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { + if (reloc_gpu < 0) + return reloc_gpu; + + if (!reloc_gpu) { bool wide = eb->reloc_cache.use_64bit_reloc; void *vaddr; repeat: - vaddr = reloc_vaddr(vma->obj, - &eb->reloc_cache, + vaddr = reloc_vaddr(vma->obj, eb, offset >> PAGE_SHIFT); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -1712,7 +1693,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * we would try to acquire the struct mutex again. Obviously * this is bad and so lockdep complains vehemently. */ - copied = __copy_from_user(r, urelocs, count * sizeof(r[0])); + pagefault_disable(); + copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); + pagefault_enable(); if (unlikely(copied)) { remain = -EFAULT; goto out; @@ -1756,74 +1739,400 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) urelocs += ARRAY_SIZE(stack); } while (remain); out: - reloc_cache_reset(&eb->reloc_cache); + reloc_cache_reset(&eb->reloc_cache, eb); return remain; } -static int eb_relocate(struct i915_execbuffer *eb) +static int +eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev) { + const struct drm_i915_gem_exec_object2 *entry = ev->exec; + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + unsigned int i; int err; - err = eb_lookup_vmas(eb); - if (err) - return err; + for (i = 0; i < entry->relocation_count; i++) { + u64 offset = eb_relocate_entry(eb, ev, &relocs[i]); + + if ((s64)offset < 0) { + err = (int)offset; + goto err; + } + } + err = 0; +err: + reloc_cache_reset(&eb->reloc_cache, eb); + return err; +} + +static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) +{ + const char __user *addr, *end; + unsigned long size; + char __maybe_unused c; + + size = entry->relocation_count; + if (size == 0) + return 0; - if (!list_empty(&eb->unbound)) { - err = eb_reserve(eb); + if (size > N_RELOC(ULONG_MAX)) + return -EINVAL; + + addr = u64_to_user_ptr(entry->relocs_ptr); + size *= sizeof(struct drm_i915_gem_relocation_entry); + if (!access_ok(addr, size)) + return -EFAULT; + + end = addr + size; + for (; addr < end; addr += PAGE_SIZE) { + int err = __get_user(c, addr); if (err) return err; } + return __get_user(c, end - 1); +} - /* The objects are in their final locations, apply the relocations. */ - if (eb->args->flags & __EXEC_HAS_RELOC) { - struct eb_vma *ev; - int flush; +static int eb_copy_relocations(const struct i915_execbuffer *eb) +{ + struct drm_i915_gem_relocation_entry *relocs; + const unsigned int count = eb->buffer_count; + unsigned int i; + int err; - list_for_each_entry(ev, &eb->relocs, reloc_link) { + for (i = 0; i < count; i++) { + const unsigned int nreloc = eb->exec[i].relocation_count; + struct drm_i915_gem_relocation_entry __user *urelocs; + unsigned long size; + unsigned long copied; + + if (nreloc == 0) + continue; + + err = check_relocations(&eb->exec[i]); + if (err) + goto err; + + urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); + size = nreloc * sizeof(*relocs); + + relocs = kvmalloc_array(size, 1, GFP_KERNEL); + if (!relocs) { + err = -ENOMEM; + goto err; + } + + /* copy_from_user is limited to < 4GiB */ + copied = 0; + do { + unsigned int len = + min_t(u64, BIT_ULL(31), size - copied); + + if (__copy_from_user((char *)relocs + copied, + (char __user *)urelocs + copied, + len)) + goto end; + + copied += len; + } while (copied < size); + + /* + * As we do not update the known relocation offsets after + * relocating (due to the complexities in lock handling), + * we need to mark them as invalid now so that we force the + * relocation processing next time. Just in case the target + * object is evicted and then rebound into its old + * presumed_offset before the next execbuffer - if that + * happened we would make the mistake of assuming that the + * relocations were valid. + */ + if (!user_access_begin(urelocs, size)) + goto end; + + for (copied = 0; copied < nreloc; copied++) + unsafe_put_user(-1, + &urelocs[copied].presumed_offset, + end_user); + user_access_end(); + + eb->exec[i].relocs_ptr = (uintptr_t)relocs; + } + + return 0; + +end_user: + user_access_end(); +end: + kvfree(relocs); + err = -EFAULT; +err: + while (i--) { + relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); + if (eb->exec[i].relocation_count) + kvfree(relocs); + } + return err; +} + +static int eb_prefault_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + int err; + + err = check_relocations(&eb->exec[i]); + if (err) + return err; + } + + return 0; +} + +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, + struct i915_request *rq) +{ + bool have_copy = false; + struct eb_vma *ev; + int err = 0; + +repeat: + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } + + /* We may process another execbuffer during the unlock... */ + eb_release_vmas(eb, false); + i915_gem_ww_ctx_fini(&eb->ww); + + if (rq) { + /* nonblocking is always false */ + if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + rq = NULL; + + err = -EINTR; + goto err_relock; + } + + i915_request_put(rq); + rq = NULL; + } + + /* + * We take 3 passes through the slowpatch. + * + * 1 - we try to just prefault all the user relocation entries and + * then attempt to reuse the atomic pagefault disabled fast path again. + * + * 2 - we copy the user entries to a local buffer here outside of the + * local and allow ourselves to wait upon any rendering before + * relocations + * + * 3 - we already have a local copy of the relocation entries, but + * were interrupted (EAGAIN) whilst waiting for the objects, try again. + */ + if (!err) { + err = eb_prefault_relocations(eb); + } else if (!have_copy) { + err = eb_copy_relocations(eb); + have_copy = err == 0; + } else { + cond_resched(); + err = 0; + } + + if (!err) + flush_workqueue(eb->i915->mm.userptr_wq); + +err_relock: + i915_gem_ww_ctx_init(&eb->ww, true); + if (err) + goto out; + + /* reacquire the objects */ +repeat_validate: + rq = eb_pin_engine(eb, false); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + rq = NULL; + goto err; + } + + /* We didn't throttle, should be NULL */ + GEM_WARN_ON(rq); + + err = eb_validate_vmas(eb); + if (err) + goto err; + + GEM_BUG_ON(!eb->batch); + + list_for_each_entry(ev, &eb->relocs, reloc_link) { + if (!have_copy) { + pagefault_disable(); err = eb_relocate_vma(eb, ev); + pagefault_enable(); + if (err) + break; + } else { + err = eb_relocate_vma_slow(eb, ev); if (err) break; } + } + + if (err == -EDEADLK) + goto err; - flush = reloc_gpu_flush(&eb->reloc_cache); + if (err && !have_copy) + goto repeat; + + if (err) + goto err; + + /* as last step, parse the command buffer */ + err = eb_parse(eb); + if (err) + goto err; + + /* + * Leave the user relocations as are, this is the painfully slow path, + * and we want to avoid the complication of dropping the lock whilst + * having buffers reserved in the aperture and so causing spurious + * ENOSPC for random operations. + */ + +err: + if (err == -EDEADLK) { + eb_release_vmas(eb, false); + err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) - err = flush; + goto repeat_validate; + } + + if (err == -EAGAIN) + goto repeat; + +out: + if (have_copy) { + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = + &eb->exec[i]; + struct drm_i915_gem_relocation_entry *relocs; + + if (!entry->relocation_count) + continue; + + relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + kvfree(relocs); + } } + if (rq) + i915_request_put(rq); + return err; } -static int eb_move_to_gpu(struct i915_execbuffer *eb) +static int eb_relocate_parse(struct i915_execbuffer *eb) { - const unsigned int count = eb->buffer_count; - struct ww_acquire_ctx acquire; - unsigned int i; - int err = 0; + int err; + struct i915_request *rq = NULL; + bool throttle = true; - ww_acquire_init(&acquire, &reservation_ww_class); +retry: + rq = eb_pin_engine(eb, throttle); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + rq = NULL; + if (err != -EDEADLK) + return err; - for (i = 0; i < count; i++) { - struct eb_vma *ev = &eb->vma[i]; - struct i915_vma *vma = ev->vma; + goto err; + } - err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire); - if (err == -EDEADLK) { - GEM_BUG_ON(i == 0); - do { - int j = i - 1; + if (rq) { + bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; - ww_mutex_unlock(&eb->vma[j].vma->resv->lock); + /* Need to drop all locks now for throttling, take slowpath */ + err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0); + if (err == -ETIME) { + if (nonblock) { + err = -EWOULDBLOCK; + i915_request_put(rq); + goto err; + } + goto slow; + } + i915_request_put(rq); + rq = NULL; + } - swap(eb->vma[i], eb->vma[j]); - } while (--i); + /* only throttle once, even if we didn't need to throttle */ + throttle = false; - err = ww_mutex_lock_slow_interruptible(&vma->resv->lock, - &acquire); + err = eb_validate_vmas(eb); + if (err == -EAGAIN) + goto slow; + else if (err) + goto err; + + /* The objects are in their final locations, apply the relocations. */ + if (eb->args->flags & __EXEC_HAS_RELOC) { + struct eb_vma *ev; + + list_for_each_entry(ev, &eb->relocs, reloc_link) { + err = eb_relocate_vma(eb, ev); + if (err) + break; } - if (err) - break; + + if (err == -EDEADLK) + goto err; + else if (err) + goto slow; + } + + if (!err) + err = eb_parse(eb); + +err: + if (err == -EDEADLK) { + eb_release_vmas(eb, false); + err = i915_gem_ww_ctx_backoff(&eb->ww); + if (!err) + goto retry; } - ww_acquire_done(&acquire); + + return err; + +slow: + err = eb_relocate_parse_slow(eb, rq); + if (err) + /* + * If the user expects the execobject.offset and + * reloc.presumed_offset to be an exact match, + * as for using NO_RELOC, then we cannot update + * the execobject.offset until we have completed + * relocation. + */ + eb->args->flags &= ~__EXEC_HAS_RELOC; + + return err; +} + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i = count; + int err = 0; while (i--) { struct eb_vma *ev = &eb->vma[i]; @@ -1868,13 +2177,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (err == 0) err = i915_vma_move_to_active(vma, eb->request, flags); - - i915_vma_unlock(vma); - eb_unreserve_vma(ev); } - ww_acquire_fini(&acquire); - - eb_vma_array_put(fetch_and_zero(&eb->array)); if (unlikely(err)) goto err_skip; @@ -1894,7 +2197,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return -EINVAL; /* Kernel clipping was a DRI1 misfeature */ - if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | + I915_EXEC_USE_EXTENSIONS))) { if (exec->num_cliprects || exec->cliprects_ptr) return -EINVAL; } @@ -1938,7 +2242,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) } static struct i915_vma * -shadow_batch_pin(struct drm_i915_gem_object *obj, +shadow_batch_pin(struct i915_execbuffer *eb, + struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned int flags) { @@ -1949,7 +2254,7 @@ shadow_batch_pin(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return vma; - err = i915_vma_pin(vma, 0, 0, flags); + err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags); if (err) return ERR_PTR(err); @@ -1962,8 +2267,8 @@ struct eb_parse_work { struct i915_vma *batch; struct i915_vma *shadow; struct i915_vma *trampoline; - unsigned int batch_offset; - unsigned int batch_length; + unsigned long batch_offset; + unsigned long batch_length; }; static int __eb_parse(struct dma_fence_work *work) @@ -2001,7 +2306,7 @@ __parser_mark_active(struct i915_vma *vma, { struct intel_gt_buffer_pool_node *node = vma->private; - return i915_active_ref(&node->active, tl, fence); + return i915_active_ref(&node->active, tl->fence_context, fence); } static int @@ -2033,6 +2338,9 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, struct eb_parse_work *pw; int err; + GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); + GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length)); + pw = kzalloc(sizeof(*pw), GFP_KERNEL); if (!pw) return -ENOMEM; @@ -2065,36 +2373,26 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, if (err) goto err_commit; - err = dma_resv_lock_interruptible(pw->batch->resv, NULL); - if (err) - goto err_commit; - err = dma_resv_reserve_shared(pw->batch->resv, 1); if (err) - goto err_commit_unlock; + goto err_commit; /* Wait for all writes (and relocs) into the batch to complete */ err = i915_sw_fence_await_reservation(&pw->base.chain, pw->batch->resv, NULL, false, 0, I915_FENCE_GFP); if (err < 0) - goto err_commit_unlock; + goto err_commit; /* Keep the batch alive and unwritten as we parse */ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - dma_resv_unlock(pw->batch->resv); - /* Force execution to wait for completion of the parser */ - dma_resv_lock(shadow->resv, NULL); dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - dma_resv_unlock(shadow->resv); dma_fence_work_commit_imm(&pw->base); return 0; -err_commit_unlock: - dma_resv_unlock(pw->batch->resv); err_commit: i915_sw_fence_set_error_once(&pw->base.chain, err); dma_fence_work_commit_imm(&pw->base); @@ -2109,16 +2407,33 @@ err_free: return err; } +static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) +{ + /* + * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + * batch" bit. Hence we need to pin secure batches into the global gtt. + * hsw should have this fixed, but bdw mucks it up again. */ + if (eb->batch_flags & I915_DISPATCH_SECURE) + return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0); + + return NULL; +} + static int eb_parse(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *shadow, *trampoline; + struct intel_gt_buffer_pool_node *pool = eb->batch_pool; + struct i915_vma *shadow, *trampoline, *batch; unsigned int len; int err; - if (!eb_use_cmdparser(eb)) - return 0; + if (!eb_use_cmdparser(eb)) { + batch = eb_dispatch_secure(eb, eb->batch->vma); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + goto secure_batch; + } len = eb->batch_len; if (!CMDPARSER_USES_GGTT(eb->i915)) { @@ -2135,11 +2450,18 @@ static int eb_parse(struct i915_execbuffer *eb) len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } - pool = intel_gt_get_buffer_pool(eb->engine->gt, len); - if (IS_ERR(pool)) - return PTR_ERR(pool); + if (!pool) { + pool = intel_gt_get_buffer_pool(eb->engine->gt, len); + if (IS_ERR(pool)) + return PTR_ERR(pool); + eb->batch_pool = pool; + } + + err = i915_gem_object_lock(pool->obj, &eb->ww); + if (err) + goto err; - shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); + shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER); if (IS_ERR(shadow)) { err = PTR_ERR(shadow); goto err; @@ -2151,7 +2473,7 @@ static int eb_parse(struct i915_execbuffer *eb) if (CMDPARSER_USES_GGTT(eb->i915)) { trampoline = shadow; - shadow = shadow_batch_pin(pool->obj, + shadow = shadow_batch_pin(eb, pool->obj, &eb->engine->gt->ggtt->vm, PIN_GLOBAL); if (IS_ERR(shadow)) { @@ -2164,42 +2486,43 @@ static int eb_parse(struct i915_execbuffer *eb) eb->batch_flags |= I915_DISPATCH_SECURE; } + batch = eb_dispatch_secure(eb, shadow); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_trampoline; + } + err = eb_parse_pipeline(eb, shadow, trampoline); if (err) - goto err_trampoline; + goto err_unpin_batch; - eb->vma[eb->buffer_count].vma = i915_vma_get(shadow); - eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN; eb->batch = &eb->vma[eb->buffer_count++]; - eb->vma[eb->buffer_count].vma = NULL; + eb->batch->vma = i915_vma_get(shadow); + eb->batch->flags = __EXEC_OBJECT_HAS_PIN; eb->trampoline = trampoline; eb->batch_start_offset = 0; +secure_batch: + if (batch) { + eb->batch = &eb->vma[eb->buffer_count++]; + eb->batch->flags = __EXEC_OBJECT_HAS_PIN; + eb->batch->vma = i915_vma_get(batch); + } return 0; +err_unpin_batch: + if (batch) + i915_vma_unpin(batch); err_trampoline: if (trampoline) i915_vma_unpin(trampoline); err_shadow: i915_vma_unpin(shadow); err: - intel_gt_buffer_pool_put(pool); return err; } -static void -add_to_client(struct i915_request *rq, struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - - rq->file_priv = file_priv; - - spin_lock(&file_priv->mm.lock); - list_add_tail(&rq->client_link, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); -} - static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) { int err; @@ -2281,7 +2604,7 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static struct i915_request *eb_throttle(struct intel_context *ce) +static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce) { struct intel_ring *ring = ce->ring; struct intel_timeline *tl = ce->timeline; @@ -2315,31 +2638,26 @@ static struct i915_request *eb_throttle(struct intel_context *ce) return i915_request_get(rq); } -static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle) { + struct intel_context *ce = eb->context; struct intel_timeline *tl; - struct i915_request *rq; + struct i915_request *rq = NULL; int err; - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - err = intel_gt_terminally_wedged(ce->engine->gt); - if (err) - return err; + GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); if (unlikely(intel_context_is_banned(ce))) - return -EIO; + return ERR_PTR(-EIO); /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = intel_context_pin(ce); + err = intel_context_pin_ww(ce, &eb->ww); if (err) - return err; + return ERR_PTR(err); /* * Take a local wakeref for preparing to dispatch the execbuf as @@ -2351,45 +2669,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) */ tl = intel_context_timeline_lock(ce); if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto err_unpin; + intel_context_unpin(ce); + return ERR_CAST(tl); } intel_context_enter(ce); - rq = eb_throttle(ce); - + if (throttle) + rq = eb_throttle(eb, ce); intel_context_timeline_unlock(tl); - if (rq) { - bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; - long timeout; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (nonblock) - timeout = 0; - - timeout = i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - timeout); - i915_request_put(rq); - - if (timeout < 0) { - err = nonblock ? -EWOULDBLOCK : timeout; - goto err_exit; - } - } - - eb->engine = ce->engine; - eb->context = ce; - return 0; - -err_exit: - mutex_lock(&tl->mutex); - intel_context_exit(ce); - intel_context_timeline_unlock(tl); -err_unpin: - intel_context_unpin(ce); - return err; + eb->args->flags |= __EXEC_ENGINE_PINNED; + return rq; } static void eb_unpin_engine(struct i915_execbuffer *eb) @@ -2397,6 +2687,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) struct intel_context *ce = eb->context; struct intel_timeline *tl = ce->timeline; + if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) + return; + + eb->args->flags &= ~__EXEC_ENGINE_PINNED; + mutex_lock(&tl->mutex); intel_context_exit(ce); mutex_unlock(&tl->mutex); @@ -2405,11 +2700,10 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) } static unsigned int -eb_select_legacy_ring(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_select_legacy_ring(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; + struct drm_i915_gem_execbuffer2 *args = eb->args; unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; if (user_ring_id != I915_EXEC_BSD && @@ -2424,7 +2718,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_engine(i915, file); + bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -2449,131 +2743,297 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } static int -eb_pin_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_select_engine(struct i915_execbuffer *eb) { struct intel_context *ce; unsigned int idx; int err; if (i915_gem_context_user_engines(eb->gem_context)) - idx = args->flags & I915_EXEC_RING_MASK; + idx = eb->args->flags & I915_EXEC_RING_MASK; else - idx = eb_select_legacy_ring(eb, file, args); + idx = eb_select_legacy_ring(eb); ce = i915_gem_context_get_engine(eb->gem_context, idx); if (IS_ERR(ce)) return PTR_ERR(ce); - err = __eb_pin_engine(eb, ce); - intel_context_put(ce); + intel_gt_pm_get(ce->engine->gt); + + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + err = intel_context_alloc_state(ce); + if (err) + goto err; + } + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = intel_gt_terminally_wedged(ce->engine->gt); + if (err) + goto err; + + eb->context = ce; + eb->engine = ce->engine; + /* + * Make sure engine pool stays alive even if we call intel_context_put + * during ww handling. The pool is destroyed when last pm reference + * is dropped, which breaks our -EDEADLK handling. + */ return err; + +err: + intel_gt_pm_put(ce->engine->gt); + intel_context_put(ce); + return err; +} + +static void +eb_put_engine(struct i915_execbuffer *eb) +{ + intel_gt_pm_put(eb->engine->gt); + intel_context_put(eb->context); } static void -__free_fence_array(struct drm_syncobj **fences, unsigned int n) +__free_fence_array(struct eb_fence *fences, unsigned int n) { - while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + while (n--) { + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + dma_fence_put(fences[n].dma_fence); + kfree(fences[n].chain_fence); + } kvfree(fences); } -static struct drm_syncobj ** -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) +static int +add_timeline_fence_array(struct i915_execbuffer *eb, + const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences) { - const unsigned long nfences = args->num_cliprects; - struct drm_i915_gem_exec_fence __user *user; - struct drm_syncobj **fences; - unsigned long n; - int err; + struct drm_i915_gem_exec_fence __user *user_fences; + u64 __user *user_values; + struct eb_fence *f; + u64 nfences; + int err = 0; - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) - return NULL; + nfences = timeline_fences->fence_count; + if (!nfences) + return 0; /* Check multiplication overflow for access_ok() and kvmalloc_array() */ BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); if (nfences > min_t(unsigned long, - ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) - return ERR_PTR(-EINVAL); + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*f)) - eb->num_fences) + return -EINVAL; - user = u64_to_user_ptr(args->cliprects_ptr); - if (!access_ok(user, nfences * sizeof(*user))) - return ERR_PTR(-EFAULT); + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, nfences * sizeof(*user_fences))) + return -EFAULT; - fences = kvmalloc_array(nfences, sizeof(*fences), - __GFP_NOWARN | GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, nfences * sizeof(*user_values))) + return -EFAULT; + + f = krealloc(eb->fences, + (eb->num_fences + nfences) * sizeof(*f), + __GFP_NOWARN | GFP_KERNEL); + if (!f) + return -ENOMEM; - for (n = 0; n < nfences; n++) { - struct drm_i915_gem_exec_fence fence; + eb->fences = f; + f += eb->num_fences; + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + while (nfences--) { + struct drm_i915_gem_exec_fence user_fence; struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; + u64 point; - if (__copy_from_user(&fence, user++, sizeof(fence))) { - err = -EFAULT; - goto err; + if (__copy_from_user(&user_fence, + user_fences++, + sizeof(user_fence))) + return -EFAULT; + + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) + return -EINVAL; + + if (__get_user(point, user_values++)) + return -EFAULT; + + syncobj = drm_syncobj_find(eb->file, user_fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + return -ENOENT; } - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { - err = -EINVAL; - goto err; + fence = drm_syncobj_fence_get(syncobj); + + if (!fence && user_fence.flags && + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { + DRM_DEBUG("Syncobj handle has no fence\n"); + drm_syncobj_put(syncobj); + return -EINVAL; } - syncobj = drm_syncobj_find(file, fence.handle); + if (fence) + err = dma_fence_chain_find_seqno(&fence, point); + + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); + dma_fence_put(fence); + drm_syncobj_put(syncobj); + return err; + } + + /* + * A point might have been signaled already and + * garbage collected from the timeline. In this case + * just ignore the point and carry on. + */ + if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { + drm_syncobj_put(syncobj); + continue; + } + + /* + * For timeline syncobjs we need to preallocate chains for + * later signaling. + */ + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { + /* + * Waiting and signaling the same point (when point != + * 0) would break the timeline. + */ + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { + DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); + dma_fence_put(fence); + drm_syncobj_put(syncobj); + return -EINVAL; + } + + f->chain_fence = + kmalloc(sizeof(*f->chain_fence), + GFP_KERNEL); + if (!f->chain_fence) { + drm_syncobj_put(syncobj); + dma_fence_put(fence); + return -ENOMEM; + } + } else { + f->chain_fence = NULL; + } + + f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); + f->dma_fence = fence; + f->value = point; + f++; + eb->num_fences++; + } + + return 0; +} + +static int add_fence_array(struct i915_execbuffer *eb) +{ + struct drm_i915_gem_execbuffer2 *args = eb->args; + struct drm_i915_gem_exec_fence __user *user; + unsigned long num_fences = args->num_cliprects; + struct eb_fence *f; + + if (!(args->flags & I915_EXEC_FENCE_ARRAY)) + return 0; + + if (!num_fences) + return 0; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (num_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user), + SIZE_MAX / sizeof(*f) - eb->num_fences)) + return -EINVAL; + + user = u64_to_user_ptr(args->cliprects_ptr); + if (!access_ok(user, num_fences * sizeof(*user))) + return -EFAULT; + + f = krealloc(eb->fences, + (eb->num_fences + num_fences) * sizeof(*f), + __GFP_NOWARN | GFP_KERNEL); + if (!f) + return -ENOMEM; + + eb->fences = f; + f += eb->num_fences; + while (num_fences--) { + struct drm_i915_gem_exec_fence user_fence; + struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; + + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) + return -EFAULT; + + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) + return -EINVAL; + + syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { DRM_DEBUG("Invalid syncobj handle provided\n"); - err = -ENOENT; - goto err; + return -ENOENT; + } + + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { + fence = drm_syncobj_fence_get(syncobj); + if (!fence) { + DRM_DEBUG("Syncobj handle has no fence\n"); + drm_syncobj_put(syncobj); + return -EINVAL; + } } BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); + f->dma_fence = fence; + f->value = 0; + f->chain_fence = NULL; + f++; + eb->num_fences++; } - return fences; - -err: - __free_fence_array(fences, n); - return ERR_PTR(err); + return 0; } -static void -put_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_syncobj **fences) +static void put_fence_array(struct eb_fence *fences, int num_fences) { if (fences) - __free_fence_array(fences, args->num_cliprects); + __free_fence_array(fences, num_fences); } static int -await_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) +await_fence_array(struct i915_execbuffer *eb) { - const unsigned int nfences = eb->args->num_cliprects; unsigned int n; int err; - for (n = 0; n < nfences; n++) { + for (n = 0; n < eb->num_fences; n++) { struct drm_syncobj *syncobj; - struct dma_fence *fence; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); - if (!(flags & I915_EXEC_FENCE_WAIT)) - continue; + syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); - fence = drm_syncobj_fence_get(syncobj); - if (!fence) - return -EINVAL; + if (!eb->fences[n].dma_fence) + continue; - err = i915_request_await_dma_fence(eb->request, fence); - dma_fence_put(fence); + err = i915_request_await_dma_fence(eb->request, + eb->fences[n].dma_fence); if (err < 0) return err; } @@ -2581,26 +3041,47 @@ await_fence_array(struct i915_execbuffer *eb, return 0; } -static void -signal_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) +static void signal_fence_array(const struct i915_execbuffer *eb) { - const unsigned int nfences = eb->args->num_cliprects; struct dma_fence * const fence = &eb->request->fence; unsigned int n; - for (n = 0; n < nfences; n++) { + for (n = 0; n < eb->num_fences; n++) { struct drm_syncobj *syncobj; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); + syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); if (!(flags & I915_EXEC_FENCE_SIGNAL)) continue; - drm_syncobj_replace_fence(syncobj, fence); + if (eb->fences[n].chain_fence) { + drm_syncobj_add_point(syncobj, + eb->fences[n].chain_fence, + fence, + eb->fences[n].value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + eb->fences[n].chain_fence = NULL; + } else { + drm_syncobj_replace_fence(syncobj, fence); + } } } +static int +parse_timeline_fences(struct i915_user_extension __user *ext, void *data) +{ + struct i915_execbuffer *eb = data; + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + + if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) + return -EFAULT; + + return add_timeline_fence_array(eb, &timeline_fences); +} + static void retire_requests(struct intel_timeline *tl, struct i915_request *end) { struct i915_request *rq, *rn; @@ -2642,12 +3123,37 @@ static void eb_request_add(struct i915_execbuffer *eb) mutex_unlock(&tl->mutex); } +static const i915_user_extension_fn execbuf_extensions[] = { + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, +}; + +static int +parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, + struct i915_execbuffer *eb) +{ + if (!(args->flags & I915_EXEC_USE_EXTENSIONS)) + return 0; + + /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot + * have another flag also using it at the same time. + */ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + if (args->num_cliprects != 0) + return -EINVAL; + + return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr), + execbuf_extensions, + ARRAY_SIZE(execbuf_extensions), + eb); +} + static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec, - struct drm_syncobj **fences) + struct drm_i915_gem_exec_object2 *exec) { struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; @@ -2668,6 +3174,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; + eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); + eb.vma[0].vma = NULL; + eb.reloc_pool = eb.batch_pool = NULL; + eb.reloc_context = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -2677,6 +3187,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch_len = args->batch_len; eb.trampoline = NULL; + eb.fences = NULL; + eb.num_fences = 0; + eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (INTEL_GEN(i915) >= 11) @@ -2694,14 +3207,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_IS_PINNED) eb.batch_flags |= I915_DISPATCH_PINNED; + err = parse_execbuf2_extensions(args, &eb); + if (err) + goto err_ext; + + err = add_fence_array(&eb); + if (err) + goto err_ext; + #define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) if (args->flags & IN_FENCES) { if ((args->flags & IN_FENCES) == IN_FENCES) return -EINVAL; in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!in_fence) - return -EINVAL; + if (!in_fence) { + err = -EINVAL; + goto err_ext; + } } #undef IN_FENCES @@ -2723,11 +3246,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - err = eb_pin_engine(&eb, file, args); + err = eb_select_engine(&eb); if (unlikely(err)) goto err_context; - err = eb_relocate(&eb); + err = eb_lookup_vmas(&eb); + if (err) { + eb_release_vmas(&eb, true); + goto err_engine; + } + + i915_gem_ww_ctx_init(&eb.ww, true); + + err = eb_relocate_parse(&eb); if (err) { /* * If the user expects the execobject.offset and @@ -2740,54 +3271,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) { - drm_dbg(&i915->drm, - "Attempting to use self-modifying batch buffer\n"); - err = -EINVAL; - goto err_vma; - } - - if (range_overflows_t(u64, - eb.batch_start_offset, eb.batch_len, - eb.batch->vma->size)) { - drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); - err = -EINVAL; - goto err_vma; - } - - if (eb.batch_len == 0) - eb.batch_len = eb.batch->vma->size - eb.batch_start_offset; - - err = eb_parse(&eb); - if (err) - goto err_vma; + ww_acquire_done(&eb.ww.ctx); - /* - * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure - * batch" bit. Hence we need to pin secure batches into the global gtt. - * hsw should have this fixed, but bdw mucks it up again. */ batch = eb.batch->vma; - if (eb.batch_flags & I915_DISPATCH_SECURE) { - struct i915_vma *vma; - - /* - * So on first glance it looks freaky that we pin the batch here - * outside of the reservation loop. But: - * - The batch is already pinned into the relevant ppgtt, so we - * already have the backing storage fully allocated. - * - No other BO uses the global gtt (well contexts, but meh), - * so we don't really have issues with multiple objects not - * fitting due to fragmentation. - * So this is actually safe. - */ - vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_parse; - } - - batch = vma; - } /* All GPU relocation batches must be submitted prior to the user rq */ GEM_BUG_ON(eb.reloc_cache.rq); @@ -2796,7 +3282,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); - goto err_batch_unpin; + goto err_vma; } if (in_fence) { @@ -2811,8 +3297,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_request; } - if (fences) { - err = await_fence_array(&eb, fences); + if (eb.fences) { + err = await_fence_array(&eb); if (err) goto err_request; } @@ -2833,18 +3319,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, * to explicitly hold another reference here. */ eb.request->batch = batch; - if (batch->private) - intel_gt_buffer_pool_mark_active(batch->private, eb.request); + if (eb.batch_pool) + intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); err_request: - add_to_client(eb.request, file); i915_request_get(eb.request); eb_request_add(&eb); - if (fences) - signal_fence_array(&eb, fences); + if (eb.fences) + signal_fence_array(&eb); if (out_fence) { if (err == 0) { @@ -2858,16 +3343,21 @@ err_request: } i915_request_put(eb.request); -err_batch_unpin: - if (eb.batch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(batch); -err_parse: - if (batch->private) - intel_gt_buffer_pool_put(batch->private); err_vma: + eb_release_vmas(&eb, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); - eb_unpin_engine(&eb); + WARN_ON(err == -EDEADLK); + i915_gem_ww_ctx_fini(&eb.ww); + + if (eb.batch_pool) + intel_gt_buffer_pool_put(eb.batch_pool); + if (eb.reloc_pool) + intel_gt_buffer_pool_put(eb.reloc_pool); + if (eb.reloc_context) + intel_context_put(eb.reloc_context); +err_engine: + eb_put_engine(&eb); err_context: i915_gem_context_put(eb.gem_context); err_destroy: @@ -2877,12 +3367,14 @@ err_out_fence: put_unused_fd(out_fence_fd); err_in_fence: dma_fence_put(in_fence); +err_ext: + put_fence_array(eb.fences, eb.num_fences); return err; } static size_t eb_element_size(void) { - return sizeof(struct drm_i915_gem_exec_object2); + return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); } static bool check_buffer_count(size_t count) @@ -2938,7 +3430,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, /* Copy in the exec list from userland */ exec_list = kvmalloc_array(count, sizeof(*exec_list), __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(count, eb_element_size(), + + /* Allocate extra slots for use by the command parser */ + exec2_list = kvmalloc_array(count + 2, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec_list == NULL || exec2_list == NULL) { drm_dbg(&i915->drm, @@ -2971,7 +3465,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, exec2_list[i].flags = 0; } - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -3003,7 +3497,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; - struct drm_syncobj **fences = NULL; const size_t count = args->buffer_count; int err; @@ -3016,7 +3509,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, if (err) return err; - exec2_list = kvmalloc_array(count, eb_element_size(), + /* Allocate extra slots for use by the command parser */ + exec2_list = kvmalloc_array(count + 2, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", @@ -3031,15 +3525,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EFAULT; } - if (args->flags & I915_EXEC_FENCE_ARRAY) { - fences = get_fence_array(args, file); - if (IS_ERR(fences)) { - kvfree(exec2_list); - return PTR_ERR(fences); - } - } - - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); /* * Now that we have begun execution of the batchbuffer, we ignore @@ -3080,7 +3566,6 @@ end:; } args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; - put_fence_array(args, fences); kvfree(exec2_list); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 753f82d87a31..3d69e51f3e4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct intel_runtime_pm *rpm = &i915->runtime_pm; struct i915_ggtt *ggtt = &i915->ggtt; bool write = area->vm_flags & VM_WRITE; + struct i915_gem_ww_ctx ww; intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; int srcu; int ret; - /* Sanity check that we allow writing into this object */ - if (i915_gem_object_is_readonly(obj) && write) - return VM_FAULT_SIGBUS; - /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; trace_i915_gem_object_fault(obj, page_offset, true, write); - ret = i915_gem_object_pin_pages(obj); + wakeref = intel_runtime_pm_get(rpm); + + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(obj, &ww); if (ret) - goto err; + goto err_rpm; - wakeref = intel_runtime_pm_get(rpm); + /* Sanity check that we allow writing into this object */ + if (i915_gem_object_is_readonly(obj) && write) { + ret = -EFAULT; + goto err_rpm; + } - ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + ret = i915_gem_object_pin_pages(obj); if (ret) goto err_rpm; + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + if (ret) + goto err_pages; + /* Now pin it into the GTT as needed */ - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); - if (IS_ERR(vma)) { + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); @@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) * all hope that the hardware is able to track future writes. */ - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); - if (IS_ERR(vma)) { + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { flags = PIN_MAPPABLE; view.type = I915_GGTT_VIEW_PARTIAL; - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); } /* The entire mappable GGTT is pinned? Unexpected! */ @@ -389,10 +398,16 @@ err_unpin: __i915_vma_unpin(vma); err_reset: intel_gt_reset_unlock(ggtt->vm.gt, srcu); +err_pages: + i915_gem_object_unpin_pages(obj); err_rpm: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); intel_runtime_pm_put(rpm, wakeref); - i915_gem_object_unpin_pages(obj); -err: return i915_error_to_vmf_fault(ret); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 9cf4ad78ece6..d46db8d8f38e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) -static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) +static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + bool intr) { - dma_resv_lock(obj->base.resv, NULL); + int ret; + + if (intr) + ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL); + else + ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL); + + if (!ret && ww) + list_add_tail(&obj->obj_link, &ww->obj_list); + if (ret == -EALREADY) + ret = 0; + + if (ret == -EDEADLK) + ww->contended = obj; + + return ret; } -static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww) { - return dma_resv_trylock(obj->base.resv); + return __i915_gem_object_lock(obj, ww, ww && ww->intr); } -static inline int -i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) +static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww) { - return dma_resv_lock_interruptible(obj->base.resv, NULL); + WARN_ON(ww && !ww->intr); + return __i915_gem_object_lock(obj, ww, true); +} + +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) @@ -412,7 +436,6 @@ static inline void i915_gem_object_finish_access(struct drm_i915_gem_object *obj) { i915_gem_object_unpin_pages(obj); - i915_gem_object_unlock(obj); } static inline struct intel_engine_cs * @@ -435,6 +458,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); +void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index bfdb32d46877..aee7ad3cc3c6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -14,6 +14,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, struct i915_vma *vma, + struct i915_gem_ww_ctx *ww, u32 value) { struct drm_i915_private *i915 = ce->vm->i915; @@ -39,10 +40,24 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, goto out_pm; } + err = i915_gem_object_lock(pool->obj, ww); + if (err) + goto out_put; + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_put; + goto out_unpin; } rem = vma->size; @@ -84,19 +99,11 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, intel_gt_chipset_flush(ce->vm->gt); - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - batch->private = pool; return batch; +out_unpin: + i915_vma_unpin(batch); out_put: intel_gt_buffer_pool_put(pool); out_pm: @@ -108,11 +115,9 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) { int err; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (err == 0) err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); if (unlikely(err)) return err; @@ -141,6 +146,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value) { + struct i915_gem_ww_ctx ww; struct i915_request *rq; struct i915_vma *batch; struct i915_vma *vma; @@ -150,17 +156,28 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (unlikely(err)) - return err; + i915_gem_ww_ctx_init(&ww, true); + intel_engine_pm_get(ce->engine); +retry: + err = i915_gem_object_lock(obj, &ww); + if (err) + goto out; - batch = intel_emit_vma_fill_blt(ce, vma, value); + err = intel_context_pin_ww(ce, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (err) + goto out_ctx; + + batch = intel_emit_vma_fill_blt(ce, vma, &ww, value); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto out_unpin; + goto out_vma; } - rq = intel_context_create_request(ce); + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -170,11 +187,9 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, if (unlikely(err)) goto out_request; - i915_vma_lock(vma); err = move_obj_to_gpu(vma->obj, rq, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); if (unlikely(err)) goto out_request; @@ -193,8 +208,18 @@ out_request: i915_request_add(rq); out_batch: intel_emit_vma_release(ce, batch); -out_unpin: +out_vma: i915_vma_unpin(vma); +out_ctx: + intel_context_unpin(ce); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_engine_pm_put(ce->engine); return err; } @@ -210,6 +235,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) } struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, struct i915_vma *src, struct i915_vma *dst) { @@ -236,10 +262,24 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, goto out_pm; } + err = i915_gem_object_lock(pool->obj, ww); + if (err) + goto out_put; + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_put; + goto out_unpin; } rem = src->size; @@ -296,20 +336,11 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, i915_gem_object_unpin_map(pool->obj); intel_gt_chipset_flush(ce->vm->gt); - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - batch->private = pool; return batch; +out_unpin: + i915_vma_unpin(batch); out_put: intel_gt_buffer_pool_put(pool); out_pm: @@ -321,10 +352,9 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, struct drm_i915_gem_object *dst, struct intel_context *ce) { - struct drm_gem_object *objs[] = { &src->base, &dst->base }; struct i915_address_space *vm = ce->vm; struct i915_vma *vma[2], *batch; - struct ww_acquire_ctx acquire; + struct i915_gem_ww_ctx ww; struct i915_request *rq; int err, i; @@ -332,25 +362,36 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, if (IS_ERR(vma[0])) return PTR_ERR(vma[0]); - err = i915_vma_pin(vma[0], 0, 0, PIN_USER); - if (unlikely(err)) - return err; - vma[1] = i915_vma_instance(dst, vm, NULL); if (IS_ERR(vma[1])) - goto out_unpin_src; + return PTR_ERR(vma[1]); - err = i915_vma_pin(vma[1], 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, true); + intel_engine_pm_get(ce->engine); +retry: + err = i915_gem_object_lock(src, &ww); + if (!err) + err = i915_gem_object_lock(dst, &ww); + if (!err) + err = intel_context_pin_ww(ce, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER); + if (err) + goto out_ctx; + + err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER); if (unlikely(err)) goto out_unpin_src; - batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]); + batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto out_unpin_dst; } - rq = intel_context_create_request(ce); + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -360,14 +401,10 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, if (unlikely(err)) goto out_request; - err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); - if (unlikely(err)) - goto out_request; - for (i = 0; i < ARRAY_SIZE(vma); i++) { err = move_obj_to_gpu(vma[i]->obj, rq, i); if (unlikely(err)) - goto out_unlock; + goto out_request; } for (i = 0; i < ARRAY_SIZE(vma); i++) { @@ -375,20 +412,19 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, err = i915_vma_move_to_active(vma[i], rq, flags); if (unlikely(err)) - goto out_unlock; + goto out_request; } if (rq->engine->emit_init_breadcrumb) { err = rq->engine->emit_init_breadcrumb(rq); if (unlikely(err)) - goto out_unlock; + goto out_request; } err = rq->engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0); -out_unlock: - drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); + out_request: if (unlikely(err)) i915_request_set_error_once(rq, err); @@ -400,6 +436,16 @@ out_unpin_dst: i915_vma_unpin(vma[1]); out_unpin_src: i915_vma_unpin(vma[0]); +out_ctx: + intel_context_unpin(ce); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_engine_pm_put(ce->engine); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 8bcd336a90dc..2409fdcccf0e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -13,12 +13,15 @@ #include "i915_vma.h" struct drm_i915_gem_object; +struct i915_gem_ww_ctx; struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, struct i915_vma *vma, + struct i915_gem_ww_ctx *ww, u32 value); struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, struct i915_vma *src, struct i915_vma *dst); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 5335f799b548..b5c15557cc87 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -123,6 +123,15 @@ struct drm_i915_gem_object { struct list_head lut_list; spinlock_t lut_lock; /* guards lut_list */ + /** + * @obj_link: Link into @i915_gem_ww_ctx.obj_list + * + * When we lock this object through i915_gem_object_lock() with a + * context, we add it to the list to ensure we can unlock everything + * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called. + */ + struct list_head obj_link; + /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; union { @@ -282,6 +291,7 @@ struct drm_i915_gem_object { } userptr; unsigned long scratch; + u64 encode; void *gvt_info; }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index e8a083743e09..d6eeefab3d01 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -254,9 +254,35 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC) return NULL; + if (GEM_WARN_ON(type == I915_MAP_WC && + !static_cpu_has(X86_FEATURE_PAT))) + return NULL; + /* A single page can always be kmapped */ - if (n_pte == 1 && type == I915_MAP_WB) - return kmap(sg_page(sgt->sgl)); + if (n_pte == 1 && type == I915_MAP_WB) { + struct page *page = sg_page(sgt->sgl); + + /* + * On 32b, highmem using a finite set of indirect PTE (i.e. + * vmap) to provide virtual mappings of the high pages. + * As these are finite, map_new_virtual() must wait for some + * other kmap() to finish when it runs out. If we map a large + * number of objects, there is no method for it to tell us + * to release the mappings, and we deadlock. + * + * However, if we make an explicit vmap of the page, that + * uses a larger vmalloc arena, and also has the ability + * to tell us to release unwanted mappings. Most importantly, + * it will fail and propagate an error instead of waiting + * forever. + * + * So if the page is beyond the 32b boundary, make an explicit + * vmap. On 64b, this check will be optimised away as we can + * directly kmap any page on the system. + */ + if (!PageHighMem(page)) + return kmap(page); + } mem = stack; if (n_pte > ARRAY_SIZE(stack)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 3d215164dd5a..40d3e40500fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); drm_WARN_ON(&i915->drm, i915_gem_object_set_to_gtt_domain(obj, false)); i915_gem_object_unlock(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 540ef0551789..1929d6cf4150 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -9,6 +9,7 @@ #include <drm/drm_file.h> #include "i915_drv.h" +#include "i915_gem_context.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -35,9 +36,10 @@ int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_file_private *file_priv = file->driver_priv; - unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; - struct i915_request *request, *target = NULL; + struct i915_gem_context *ctx; + unsigned long idx; long ret; /* ABI: return -EIO if already wedged */ @@ -45,27 +47,54 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_link) { - if (time_after_eq(request->emitted_jiffies, recent_enough)) - break; + rcu_read_lock(); + xa_for_each(&file_priv->context_xa, idx, ctx) { + struct i915_gem_engines_iter it; + struct intel_context *ce; - if (target && xchg(&target->file_priv, NULL)) - list_del(&target->client_link); + if (!kref_get_unless_zero(&ctx->ref)) + continue; + rcu_read_unlock(); - target = request; - } - if (target) - i915_request_get(target); - spin_unlock(&file_priv->mm.lock); + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), + it) { + struct i915_request *rq, *target = NULL; + + if (!ce->timeline) + continue; + + mutex_lock(&ce->timeline->mutex); + list_for_each_entry_reverse(rq, + &ce->timeline->requests, + link) { + if (i915_request_completed(rq)) + break; - if (!target) - return 0; + if (time_after(rq->emitted_jiffies, + recent_enough)) + continue; - ret = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - i915_request_put(target); + target = i915_request_get(rq); + break; + } + mutex_unlock(&ce->timeline->mutex); + if (!target) + continue; + + ret = i915_request_wait(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + i915_request_put(target); + if (ret < 0) + break; + } + i915_gem_context_unlock_engines(ctx); + i915_gem_context_put(ctx); + + rcu_read_lock(); + } + rcu_read_unlock(); return ret < 0 ? ret : 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ff72ee2fd9cd..ffcaee74a249 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 8291ede6902c..1f35e71429b4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -393,7 +393,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) */ for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { - unsigned int combination = 0; + unsigned int combination = SZ_4K; /* Required for ppGTT */ for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { if (i & BIT(j)) @@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce, { int err; - i915_gem_object_lock(vma->obj); + i915_gem_object_lock(vma->obj, NULL); err = i915_gem_object_set_to_gtt_domain(vma->obj, true); i915_gem_object_unlock(vma->obj); if (err) @@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) unsigned long n; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_read(obj, &needs_flush); if (err) - return err; + goto err_unlock; for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); @@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) } i915_gem_object_finish_access(obj); +err_unlock: + i915_gem_object_unlock(obj); return err; } @@ -1614,7 +1617,7 @@ int i915_gem_huge_page_mock_selftests(void) out_put: i915_vm_put(&ppgtt->vm); out_unlock: - drm_dev_put(&dev_priv->drm); + mock_destroy_device(dev_priv); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 299c29e9ad86..4e36d4897ea6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine) if (err) goto err_unpin; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 87d7d8aa080f..7049a6bbc03d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cpu; int err; + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); if (err) - return err; + goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); @@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) kunmap_atomic(map); i915_gem_object_finish_access(ctx->obj); - return 0; +out: + i915_gem_object_unlock(ctx->obj); + return err; } static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) @@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) u32 *cpu; int err; + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); if (err) - return err; + goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); @@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) kunmap_atomic(map); i915_gem_object_finish_access(ctx->obj); - return 0; +out: + i915_gem_object_unlock(ctx->obj); + return err; } static int gtt_set(struct context *ctx, unsigned long offset, u32 v) @@ -82,7 +88,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) u32 __iomem *map; int err = 0; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); if (err) @@ -115,7 +121,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) u32 __iomem *map; int err = 0; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); if (err) @@ -147,7 +153,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) u32 *map; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_wc_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); if (err) @@ -170,7 +176,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) u32 *map; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_wc_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); if (err) @@ -193,27 +199,27 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cs; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); - i915_gem_object_unlock(ctx->obj); if (err) - return err; + goto out_unlock; vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unlock; + } rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); + err = PTR_ERR(rq); + goto out_unpin; } cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - i915_request_add(rq); - i915_vma_unpin(vma); - return PTR_ERR(cs); + err = PTR_ERR(cs); + goto out_rq; } if (INTEL_GEN(ctx->engine->i915) >= 8) { @@ -234,14 +240,16 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) } intel_ring_advance(rq, cs); - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - i915_vma_unpin(vma); +out_rq: i915_request_add(rq); +out_unpin: + i915_vma_unpin(vma); +out_unlock: + i915_gem_object_unlock(ctx->obj); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 7ffc3c751432..d3f87dc4eda3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) unsigned int n, m, need_flush; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_write(obj, &need_flush); if (err) - return err; + goto out; for (n = 0; n < real_page_count(obj); n++) { u32 *map; @@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) i915_gem_object_finish_access(obj); obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; obj->write_domain = 0; - return 0; +out: + i915_gem_object_unlock(obj); + return err; } static noinline int cpu_check(struct drm_i915_gem_object *obj, @@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, unsigned int n, m, needs_flush; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_read(obj, &needs_flush); if (err) - return err; + goto out_unlock; for (n = 0; n < real_page_count(obj); n++) { u32 *map; @@ -527,6 +531,8 @@ out_unmap: } i915_gem_object_finish_access(obj); +out_unlock: + i915_gem_object_unlock(obj); return err; } @@ -887,24 +893,15 @@ out_file: return err; } -static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) +static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma) { - struct drm_i915_gem_object *obj; u32 *cmd; - int err; - if (INTEL_GEN(vma->vm->i915) < 8) - return ERR_PTR(-EINVAL); + GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8); - obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } + cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); *cmd++ = MI_STORE_REGISTER_MEM_GEN8; *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); @@ -912,26 +909,12 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) *cmd++ = upper_32_bits(vma->node.start); *cmd = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); + __i915_gem_object_flush_map(rpcs, 0, 64); + i915_gem_object_unpin_map(rpcs); intel_gt_chipset_flush(vma->vm->gt); - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); + return 0; } static int @@ -939,52 +922,68 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, struct intel_context *ce, struct i915_request **rq_out) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_request *rq; + struct i915_gem_ww_ctx ww; struct i915_vma *batch; struct i915_vma *vma; + struct drm_i915_gem_object *rpcs; int err; GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); + if (INTEL_GEN(i915) < 8) + return -EINVAL; + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; + rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(rpcs)) + return PTR_ERR(rpcs); - batch = rpcs_query_batch(vma); + batch = i915_vma_instance(rpcs, ce->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto err_vma; + goto err_put; } + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_lock(rpcs, &ww); + if (!err) + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (err) + goto err_put; + + err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); + if (err) + goto err_vma; + + err = rpcs_query_batch(rpcs, vma); + if (err) + goto err_batch; + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; } - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); if (err) goto skip_request; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); if (err) goto skip_request; @@ -1000,23 +999,24 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; - i915_vma_unpin_and_release(&batch, 0); - i915_vma_unpin(vma); - *rq_out = i915_request_get(rq); - i915_request_add(rq); - - return 0; - skip_request: - i915_request_set_error_once(rq, err); + if (err) + i915_request_set_error_once(rq, err); i915_request_add(rq); err_batch: - i915_vma_unpin_and_release(&batch, 0); + i915_vma_unpin(batch); err_vma: i915_vma_unpin(vma); - +err_put: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + i915_gem_object_put(rpcs); return err; } @@ -1709,7 +1709,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_request_add(rq); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) @@ -1748,7 +1748,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) if (!vm) return -ENODEV; - page = vm->scratch[0].base.page; + page = __px_page(vm->scratch[0]); if (!page) { pr_err("No scratch page!\n"); return -EINVAL; @@ -1914,8 +1914,8 @@ static int mock_context_barrier(void *arg) return -ENOMEM; counter = 0; - err = context_barrier_task(ctx, 0, - NULL, NULL, mock_barrier_task, &counter); + err = context_barrier_task(ctx, 0, NULL, NULL, NULL, + mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; @@ -1927,11 +1927,8 @@ static int mock_context_barrier(void *arg) } counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - skip_unused_engines, - NULL, - mock_barrier_task, - &counter); + err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, + NULL, NULL, mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; @@ -1951,8 +1948,8 @@ static int mock_context_barrier(void *arg) counter = 0; context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, NULL, mock_barrier_task, &counter); + err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL, + mock_barrier_task, &counter); context_barrier_inject_fault = 0; if (err == -ENXIO) err = 0; @@ -1966,11 +1963,8 @@ static int mock_context_barrier(void *arg) goto out; counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - skip_unused_engines, - NULL, - mock_barrier_task, - &counter); + err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, + NULL, NULL, mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; @@ -2003,7 +1997,7 @@ int i915_gem_context_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 2a52b92586b9..0845ce1ae37c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -272,7 +272,7 @@ int i915_gem_dmabuf_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index a49016f8ee0d..e1d50a5a1477 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -32,46 +32,39 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + err = i915_gem_object_lock(obj, &eb->ww); + if (err) + return err; + + err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH); if (err) return err; /* 8-Byte aligned */ - if (!__reloc_entry_gpu(eb, vma, - offsets[0] * sizeof(u32), - 0)) { - err = -EIO; - goto unpin_vma; - } + err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); + if (err <= 0) + goto reloc_err; /* !8-Byte aligned */ - if (!__reloc_entry_gpu(eb, vma, - offsets[1] * sizeof(u32), - 1)) { - err = -EIO; - goto unpin_vma; - } + err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); + if (err <= 0) + goto reloc_err; /* Skip to the end of the cmd page */ - i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; + i = PAGE_SIZE / sizeof(u32) - 1; i -= eb->reloc_cache.rq_size; memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, MI_NOOP, i); eb->reloc_cache.rq_size += i; - /* Force batch chaining */ - if (!__reloc_entry_gpu(eb, vma, - offsets[2] * sizeof(u32), - 2)) { - err = -EIO; - goto unpin_vma; - } + /* Force next batch */ + err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); + if (err <= 0) + goto reloc_err; GEM_BUG_ON(!eb->reloc_cache.rq); rq = i915_request_get(eb->reloc_cache.rq); - err = reloc_gpu_flush(&eb->reloc_cache); - if (err) - goto put_rq; + reloc_gpu_flush(eb, &eb->reloc_cache); GEM_BUG_ON(eb->reloc_cache.rq); err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); @@ -103,6 +96,11 @@ put_rq: unpin_vma: i915_vma_unpin(vma); return err; + +reloc_err: + if (!err) + err = -EIO; + goto unpin_vma; } static int igt_gpu_reloc(void *arg) @@ -124,6 +122,8 @@ static int igt_gpu_reloc(void *arg) goto err_scratch; } + intel_gt_pm_get(&eb.i915->gt); + for_each_uabi_engine(eb.engine, eb.i915) { reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); @@ -134,15 +134,29 @@ static int igt_gpu_reloc(void *arg) err = PTR_ERR(eb.context); goto err_pm; } + eb.reloc_pool = NULL; + eb.reloc_context = NULL; - err = intel_context_pin(eb.context); - if (err) - goto err_put; + i915_gem_ww_ctx_init(&eb.ww, false); +retry: + err = intel_context_pin_ww(eb.context, &eb.ww); + if (!err) { + err = __igt_gpu_reloc(&eb, scratch); + + intel_context_unpin(eb.context); + } + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&eb.ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&eb.ww); - err = __igt_gpu_reloc(&eb, scratch); + if (eb.reloc_pool) + intel_gt_buffer_pool_put(eb.reloc_pool); + if (eb.reloc_context) + intel_context_put(eb.reloc_context); - intel_context_unpin(eb.context); -err_put: intel_context_put(eb.context); err_pm: intel_engine_pm_put(eb.engine); @@ -153,6 +167,7 @@ err_pm: if (igt_flush_test(eb.i915)) err = -EIO; + intel_gt_pm_put(&eb.i915->gt); err_scratch: i915_gem_object_put(scratch); return err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 9c7402ce5bf9..d27d87a678c8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { @@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { @@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) for_each_uabi_engine(engine, i915) { struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; int err; vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); if (err) - return err; + goto err; rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); + err = PTR_ERR(rq); + goto err_unpin; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); i915_request_add(rq); +err_unpin: i915_vma_unpin(vma); +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); if (err) return err; } @@ -1123,6 +1134,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, for_each_uabi_engine(engine, i915) { struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL); if (IS_ERR(vma)) { @@ -1130,9 +1142,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, goto out_unmap; } - err = i915_vma_pin(vma, 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); if (err) - goto out_unmap; + goto out_ww; rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { @@ -1140,11 +1156,9 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, goto out_unpin; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (err == 0) err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); err = engine->emit_bb_start(rq, vma->node.start, 0, 0); i915_request_get(rq); @@ -1166,6 +1180,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, out_unpin: i915_vma_unpin(vma); +out_ww: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); if (err) goto out_unmap; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c index faa5b6d91795..bf853c40ec65 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -85,7 +85,7 @@ int i915_gem_object_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 34932871b3a5..8cee68c6a6dc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -44,7 +44,7 @@ static int mock_phys_object(void *arg) } /* Make the object dirty so that put_pages must do copy back the data */ - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { @@ -73,6 +73,6 @@ int i915_gem_phys_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c index debaf7b18ab5..be30b27e2926 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -28,10 +28,9 @@ static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, sg = sg_next(sg); } - if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { - err = -ENOMEM; + err = dma_map_sgtable(attachment->dev, st, dir, 0); + if (err) goto err_st; - } return st; @@ -46,7 +45,7 @@ static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *st, enum dma_data_direction dir) { - dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + dma_unmap_sgtable(attachment->dev, st, dir, 0); sg_free_table(st); kfree(st); } diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index cdc0b9c54305..fd0d24d28763 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -16,8 +16,10 @@ static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, const unsigned int pde, const struct i915_page_table *pt) { + dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]); + /* Caller needs to make sure the write completes if necessary */ - iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID, ppgtt->pd_addr + pde); } @@ -79,7 +81,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, { struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - const gen6_pte_t scratch_pte = vm->scratch[0].encode; + const gen6_pte_t scratch_pte = vm->scratch[0]->encode; unsigned int pde = first_entry / GEN6_PTES; unsigned int pte = first_entry % GEN6_PTES; unsigned int num_entries = length / I915_GTT_PAGE_SIZE; @@ -90,8 +92,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, const unsigned int count = min(num_entries, GEN6_PTES - pte); gen6_pte_t *vaddr; - GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); - num_entries -= count; GEM_BUG_ON(count > atomic_read(&pt->used)); @@ -127,7 +127,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; - GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); + GEM_BUG_ON(!pd->entry[act_pt]); vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); do { @@ -177,39 +177,36 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) mutex_unlock(&ppgtt->flush); } -static int gen6_alloc_va_range(struct i915_address_space *vm, - u64 start, u64 length) +static void gen6_alloc_va_range(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 start, u64 length) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *pt, *alloc = NULL; + struct i915_page_table *pt; bool flush = false; u64 from = start; unsigned int pde; - int ret = 0; spin_lock(&pd->lock); gen6_for_each_pde(pt, pd, start, length, pde) { const unsigned int count = gen6_pte_count(start, length); - if (px_base(pt) == px_base(&vm->scratch[1])) { + if (!pt) { spin_unlock(&pd->lock); - pt = fetch_and_zero(&alloc); - if (!pt) - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; - } + pt = stash->pt[0]; + __i915_gem_object_pin_pages(pt->base); + i915_gem_object_make_unshrinkable(pt->base); - fill32_px(pt, vm->scratch[0].encode); + fill32_px(pt, vm->scratch[0]->encode); spin_lock(&pd->lock); - if (pd->entry[pde] == &vm->scratch[1]) { + if (!pd->entry[pde]) { + stash->pt[0] = pt->stash; + atomic_set(&pt->used, 0); pd->entry[pde] = pt; } else { - alloc = pt; pt = pd->entry[pde]; } @@ -226,38 +223,32 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref) gen6_flush_pd(ppgtt, from, start); } - - goto out; - -unwind_out: - gen6_ppgtt_clear_range(vm, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - return ret; } static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) { struct i915_address_space * const vm = &ppgtt->base.vm; - struct i915_page_directory * const pd = ppgtt->base.pd; int ret; - ret = setup_scratch_page(vm, __GFP_HIGHMEM); + ret = setup_scratch_page(vm); if (ret) return ret; - vm->scratch[0].encode = - vm->pte_encode(px_dma(&vm->scratch[0]), + vm->scratch[0]->encode = + vm->pte_encode(px_dma(vm->scratch[0]), I915_CACHE_NONE, PTE_READ_ONLY); - if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { - cleanup_scratch_page(vm); - return -ENOMEM; + vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(vm->scratch[1])) + return PTR_ERR(vm->scratch[1]); + + ret = pin_pt_dma(vm, vm->scratch[1]); + if (ret) { + i915_gem_object_put(vm->scratch[1]); + return ret; } - fill32_px(&vm->scratch[1], vm->scratch[0].encode); - memset_p(pd->entry, &vm->scratch[1], I915_PDES); + fill32_px(vm->scratch[1], vm->scratch[0]->encode); return 0; } @@ -265,14 +256,12 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) { struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_dma * const scratch = - px_base(&ppgtt->base.vm.scratch[1]); struct i915_page_table *pt; u32 pde; gen6_for_all_pdes(pt, pd, pde) - if (px_base(pt) != scratch) - free_px(&ppgtt->base.vm, pt); + if (pt) + free_pt(&ppgtt->base.vm, pt); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) @@ -286,7 +275,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) mutex_destroy(&ppgtt->flush); mutex_destroy(&ppgtt->pin_mutex); - kfree(ppgtt->base.pd); + + free_pd(&ppgtt->base.vm, ppgtt->base.pd); } static int pd_vma_set_pages(struct i915_vma *vma) @@ -302,28 +292,26 @@ static void pd_vma_clear_pages(struct i915_vma *vma) vma->pages = NULL; } -static int pd_vma_bind(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) +static void pd_vma_bind(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct gen6_ppgtt *ppgtt = vma->private; u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; - px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); + ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10; ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); - return 0; } static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) { struct gen6_ppgtt *ppgtt = vma->private; struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_dma * const scratch = - px_base(&ppgtt->base.vm.scratch[1]); struct i915_page_table *pt; unsigned int pde; @@ -332,11 +320,11 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) /* Free all no longer used page tables */ gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { - if (px_base(pt) == scratch || atomic_read(&pt->used)) + if (!pt || atomic_read(&pt->used)) continue; - free_px(&ppgtt->base.vm, pt); - pd->entry[pde] = scratch; + free_pt(&ppgtt->base.vm, pt); + pd->entry[pde] = NULL; } ppgtt->scan_for_unused_pt = false; @@ -380,7 +368,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) return vma; } -int gen6_ppgtt_pin(struct i915_ppgtt *base) +int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); int err; @@ -406,7 +394,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base) */ err = 0; if (!atomic_read(&ppgtt->pin_count)) - err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); + err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); if (!err) atomic_inc(&ppgtt->pin_count); mutex_unlock(&ppgtt->pin_mutex); @@ -448,6 +436,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) mutex_init(&ppgtt->pin_mutex); ppgtt_init(&ppgtt->base, gt); + ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); ppgtt->base.vm.top = 1; ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; @@ -456,9 +445,10 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; + ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); + ppgtt->base.pd = __alloc_pd(I915_PDES); if (!ppgtt->base.pd) { err = -ENOMEM; goto err_free; @@ -479,7 +469,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) err_scratch: free_scratch(&ppgtt->base.vm); err_pd: - kfree(ppgtt->base.pd); + free_pd(&ppgtt->base.vm, ppgtt->base.pd); err_free: mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt); diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h index 72e481806c96..3357228f3304 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -8,12 +8,15 @@ #include "intel_gtt.h" +struct i915_gem_ww_ctx; + struct gen6_ppgtt { struct i915_ppgtt base; struct mutex flush; struct i915_vma *vma; gen6_pte_t __iomem *pd_addr; + u32 pp_dir; atomic_t pin_count; struct mutex pin_mutex; @@ -66,7 +69,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) (pt = i915_pt_entry(pd, iter), true); \ ++iter) -int gen6_ppgtt_pin(struct i915_ppgtt *base); +int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww); void gen6_ppgtt_unpin(struct i915_ppgtt *base); void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); void gen6_ppgtt_enable(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 699125928272..eb64f474a78c 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -181,7 +181,7 @@ static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, } while (pde++, --count); } - free_px(vm, pd); + free_px(vm, &pd->pt, lvl); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -199,7 +199,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, struct i915_page_directory * const pd, u64 start, const u64 end, int lvl) { - const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + const struct drm_i915_gem_object * const scratch = vm->scratch[lvl]; unsigned int idx, len; GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); @@ -239,7 +239,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, vaddr = kmap_atomic_px(pt); memset64(vaddr + gen8_pd_index(start, 0), - vm->scratch[0].encode, + vm->scratch[0]->encode, count); kunmap_atomic(vaddr); @@ -248,7 +248,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, } if (release_pd_entry(pd, idx, pt, scratch)) - free_px(vm, pt); + free_px(vm, pt, lvl); } while (idx++, --len); return start; @@ -269,14 +269,12 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm, start, start + length, vm->top); } -static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, - struct i915_page_directory * const pd, - u64 * const start, const u64 end, int lvl) +static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, + struct i915_vm_pt_stash *stash, + struct i915_page_directory * const pd, + u64 * const start, const u64 end, int lvl) { - const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; - struct i915_page_table *alloc = NULL; unsigned int idx, len; - int ret = 0; GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); @@ -297,49 +295,31 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", __func__, vm, lvl + 1, idx); - pt = fetch_and_zero(&alloc); - if (lvl) { - if (!pt) { - pt = &alloc_pd(vm)->pt; - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto out; - } - } - - fill_px(pt, vm->scratch[lvl].encode); - } else { - if (!pt) { - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto out; - } - } - - if (intel_vgpu_active(vm->i915) || - gen8_pt_count(*start, end) < I915_PDES) - fill_px(pt, vm->scratch[lvl].encode); - } + pt = stash->pt[!!lvl]; + __i915_gem_object_pin_pages(pt->base); + i915_gem_object_make_unshrinkable(pt->base); + + if (lvl || + gen8_pt_count(*start, end) < I915_PDES || + intel_vgpu_active(vm->i915)) + fill_px(pt, vm->scratch[lvl]->encode); spin_lock(&pd->lock); - if (likely(!pd->entry[idx])) + if (likely(!pd->entry[idx])) { + stash->pt[!!lvl] = pt->stash; + atomic_set(&pt->used, 0); set_pd_entry(pd, idx, pt); - else - alloc = pt, pt = pd->entry[idx]; + } else { + pt = pd->entry[idx]; + } } if (lvl) { atomic_inc(&pt->used); spin_unlock(&pd->lock); - ret = __gen8_ppgtt_alloc(vm, as_pd(pt), - start, end, lvl); - if (unlikely(ret)) { - if (release_pd_entry(pd, idx, pt, scratch)) - free_px(vm, pt); - goto out; - } + __gen8_ppgtt_alloc(vm, stash, + as_pd(pt), start, end, lvl); spin_lock(&pd->lock); atomic_dec(&pt->used); @@ -359,18 +339,12 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, } } while (idx++, --len); spin_unlock(&pd->lock); -out: - if (alloc) - free_px(vm, alloc); - return ret; } -static int gen8_ppgtt_alloc(struct i915_address_space *vm, - u64 start, u64 length) +static void gen8_ppgtt_alloc(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 start, u64 length) { - u64 from; - int err; - GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); GEM_BUG_ON(range_overflows(start, length, vm->total)); @@ -378,25 +352,9 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm, start >>= GEN8_PTE_SHIFT; length >>= GEN8_PTE_SHIFT; GEM_BUG_ON(length == 0); - from = start; - err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd, - &start, start + length, vm->top); - if (unlikely(err && from != start)) - __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, - from, start, vm->top); - - return err; -} - -static __always_inline void -write_pte(gen8_pte_t *pte, const gen8_pte_t val) -{ - /* Magic delays? Or can we refine these to flush all in one pass? */ - *pte = val; - wmb(); /* cpu to cache */ - clflush(pte); /* cache to memory */ - wmb(); /* visible to all */ + __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top); } static __always_inline u64 @@ -415,8 +373,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); do { GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); - write_pte(&vaddr[gen8_pd_index(idx, 0)], - pte_encode | iter->dma); + vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; iter->dma += I915_GTT_PAGE_SIZE; if (iter->dma >= iter->max) { @@ -439,10 +396,12 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, pd = pdp->entry[gen8_pd_index(idx, 2)]; } + clflush_cache_range(vaddr, PAGE_SIZE); kunmap_atomic(vaddr); vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); } } while (1); + clflush_cache_range(vaddr, PAGE_SIZE); kunmap_atomic(vaddr); return idx; @@ -498,7 +457,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, do { GEM_BUG_ON(iter->sg->length < page_size); - write_pte(&vaddr[index++], encode | iter->dma); + vaddr[index++] = encode | iter->dma; start += page_size; iter->dma += page_size; @@ -523,6 +482,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, } } while (rem >= page_size && index < I915_PDES); + clflush_cache_range(vaddr, PAGE_SIZE); kunmap_atomic(vaddr); /* @@ -554,7 +514,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { u16 i; - encode = vma->vm->scratch[0].encode; + encode = vma->vm->scratch[0]->encode; vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); for (i = 1; i < index; i += 16) @@ -608,27 +568,37 @@ static int gen8_init_scratch(struct i915_address_space *vm) GEM_BUG_ON(!clone->has_read_only); vm->scratch_order = clone->scratch_order; - memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch)); - px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */ + for (i = 0; i <= vm->top; i++) + vm->scratch[i] = i915_gem_object_get(clone->scratch[i]); + return 0; } - ret = setup_scratch_page(vm, __GFP_HIGHMEM); + ret = setup_scratch_page(vm); if (ret) return ret; - vm->scratch[0].encode = - gen8_pte_encode(px_dma(&vm->scratch[0]), + vm->scratch[0]->encode = + gen8_pte_encode(px_dma(vm->scratch[0]), I915_CACHE_LLC, vm->has_read_only); for (i = 1; i <= vm->top; i++) { - if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i])))) + struct drm_i915_gem_object *obj; + + obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(obj)) + goto free_scratch; + + ret = pin_pt_dma(vm, obj); + if (ret) { + i915_gem_object_put(obj); goto free_scratch; + } - fill_px(&vm->scratch[i], vm->scratch[i - 1].encode); - vm->scratch[i].encode = - gen8_pde_encode(px_dma(&vm->scratch[i]), - I915_CACHE_LLC); + fill_px(obj, vm->scratch[i - 1]->encode); + obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC); + + vm->scratch[i] = obj; } return 0; @@ -649,12 +619,20 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { struct i915_page_directory *pde; + int err; pde = alloc_pd(vm); if (IS_ERR(pde)) return PTR_ERR(pde); - fill_px(pde, vm->scratch[1].encode); + err = pin_pt_dma(vm, pde->pt.base); + if (err) { + i915_gem_object_put(pde->pt.base); + free_pd(vm, pde); + return err; + } + + fill_px(pde, vm->scratch[1]->encode); set_pd_entry(pd, idx, pde); atomic_inc(px_used(pde)); /* keep pinned */ } @@ -668,21 +646,32 @@ gen8_alloc_top_pd(struct i915_address_space *vm) { const unsigned int count = gen8_pd_top_count(vm); struct i915_page_directory *pd; + int err; - GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); + GEM_BUG_ON(count > I915_PDES); - pd = __alloc_pd(offsetof(typeof(*pd), entry[count])); + pd = __alloc_pd(count); if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - if (unlikely(setup_page_dma(vm, px_base(pd)))) { - kfree(pd); - return ERR_PTR(-ENOMEM); + pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(pd->pt.base)) { + err = PTR_ERR(pd->pt.base); + pd->pt.base = NULL; + goto err_pd; } - fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); + err = pin_pt_dma(vm, pd->pt.base); + if (err) + goto err_pd; + + fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count); atomic_inc(px_used(pd)); /* mark as pinned */ return pd; + +err_pd: + free_pd(vm, pd); + return ERR_PTR(err); } /* @@ -703,6 +692,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt_init(ppgtt, gt); ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; + ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); /* * From bdw, there is hw support for read-only pages in the PPGTT. @@ -714,12 +704,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) */ ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12); - /* - * There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915)) - ppgtt->vm.pt_kmap_wc = true; + ppgtt->vm.alloc_pt_dma = alloc_pt_dma; err = gen8_init_scratch(&ppgtt->vm); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 91786310c114..d8b206e53660 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -28,6 +28,8 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "intel_breadcrumbs.h" +#include "intel_context.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" @@ -53,33 +55,65 @@ static void irq_disable(struct intel_engine_cs *engine) spin_unlock(&engine->gt->irq_lock); } -static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); + lockdep_assert_held(&b->irq_lock); + + if (!b->irq_engine || b->irq_armed) + return; + + if (!intel_gt_pm_get_if_awake(b->irq_engine->gt)) + return; + + /* + * The breadcrumb irq will be disarmed on the interrupt after the + * waiters are signaled. This gives us a single interrupt window in + * which we can add a new waiter and avoid the cost of re-enabling + * the irq. + */ + WRITE_ONCE(b->irq_armed, true); + + /* + * Since we are waiting on a request, the GPU should be busy + * and should have its own rpm reference. This is tracked + * by i915->gt.awake, we can forgo holding our own wakref + * for the interrupt as before i915->gt.awake is released (when + * the driver is idle) we disarm the breadcrumbs. + */ + if (!b->irq_enabled++) + irq_enable(b->irq_engine); +} + +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +{ lockdep_assert_held(&b->irq_lock); + if (!b->irq_engine || !b->irq_armed) + return; + GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) - irq_disable(engine); + irq_disable(b->irq_engine); WRITE_ONCE(b->irq_armed, false); - intel_gt_pm_put_async(engine->gt); + intel_gt_pm_put_async(b->irq_engine->gt); } -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +static void add_signaling_context(struct intel_breadcrumbs *b, + struct intel_context *ce) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - - if (!READ_ONCE(b->irq_armed)) - return; + intel_context_get(ce); + list_add_tail(&ce->signal_link, &b->signalers); + if (list_is_first(&ce->signal_link, &b->signalers)) + __intel_breadcrumbs_arm_irq(b); +} - spin_lock_irqsave(&b->irq_lock, flags); - if (b->irq_armed) - __intel_breadcrumbs_disarm_irq(b); - spin_unlock_irqrestore(&b->irq_lock, flags); +static void remove_signaling_context(struct intel_breadcrumbs *b, + struct intel_context *ce) +{ + list_del(&ce->signal_link); + intel_context_put(ce); } static inline bool __request_completed(const struct i915_request *rq) @@ -90,6 +124,9 @@ static inline bool __request_completed(const struct i915_request *rq) __maybe_unused static bool check_signal_order(struct intel_context *ce, struct i915_request *rq) { + if (rq->context != ce) + return false; + if (!list_is_last(&rq->signal_link, &ce->signals) && i915_seqno_passed(rq->fence.seqno, list_next_entry(rq, signal_link)->fence.seqno)) @@ -133,25 +170,21 @@ __dma_fence_signal__notify(struct dma_fence *fence, static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) { - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - if (unlikely(intel_engine_is_virtual(engine))) - engine = intel_virtual_engine_get_sibling(engine, 0); - - intel_engine_add_retire(engine, tl); + if (b->irq_engine) + intel_engine_add_retire(b->irq_engine, tl); } -static void __signal_request(struct i915_request *rq, struct list_head *signals) +static bool __signal_request(struct i915_request *rq, struct list_head *signals) { - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - if (!__dma_fence_signal(&rq->fence)) - return; + if (!__dma_fence_signal(&rq->fence)) { + i915_request_put(rq); + return false; + } - i915_request_get(rq); list_add_tail(&rq->signal_link, signals); + return true; } static void signal_irq_work(struct irq_work *work) @@ -164,7 +197,7 @@ static void signal_irq_work(struct irq_work *work) spin_lock(&b->irq_lock); - if (b->irq_armed && list_empty(&b->signalers)) + if (list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); list_splice_init(&b->signaled_requests, &signal); @@ -197,8 +230,8 @@ static void signal_irq_work(struct irq_work *work) /* Advance the list to the first incomplete request */ __list_del_many(&ce->signals, pos); if (&ce->signals == pos) { /* now empty */ - list_del_init(&ce->signal_link); add_retire(b, ce->timeline); + remove_signaling_context(b, ce); } } } @@ -220,116 +253,89 @@ static void signal_irq_work(struct irq_work *work) } } -static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) +struct intel_breadcrumbs * +intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) { - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - lockdep_assert_held(&b->irq_lock); - if (b->irq_armed) - return true; - - if (!intel_gt_pm_get_if_awake(engine->gt)) - return false; - - /* - * The breadcrumb irq will be disarmed on the interrupt after the - * waiters are signaled. This gives us a single interrupt window in - * which we can add a new waiter and avoid the cost of re-enabling - * the irq. - */ - WRITE_ONCE(b->irq_armed, true); - - /* - * Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. This is tracked - * by i915->gt.awake, we can forgo holding our own wakref - * for the interrupt as before i915->gt.awake is released (when - * the driver is idle) we disarm the breadcrumbs. - */ - - if (!b->irq_enabled++) - irq_enable(engine); + struct intel_breadcrumbs *b; - return true; -} - -void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; + b = kzalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return NULL; spin_lock_init(&b->irq_lock); INIT_LIST_HEAD(&b->signalers); INIT_LIST_HEAD(&b->signaled_requests); init_irq_work(&b->irq_work, signal_irq_work); + + b->irq_engine = irq_engine; + + return b; } -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) +void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; unsigned long flags; + if (!b->irq_engine) + return; + spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_enabled) - irq_enable(engine); + irq_enable(b->irq_engine); else - irq_disable(engine); + irq_disable(b->irq_engine); spin_unlock_irqrestore(&b->irq_lock, flags); } -void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, - struct intel_context *ce) +void intel_breadcrumbs_park(struct intel_breadcrumbs *b) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; unsigned long flags; - spin_lock_irqsave(&b->irq_lock, flags); - if (!list_empty(&ce->signals)) { - struct i915_request *rq, *next; - - /* Queue for executing the signal callbacks in the irq_work */ - list_for_each_entry_safe(rq, next, &ce->signals, signal_link) { - GEM_BUG_ON(rq->engine != engine); - GEM_BUG_ON(!__request_completed(rq)); - - __signal_request(rq, &b->signaled_requests); - } + if (!READ_ONCE(b->irq_armed)) + return; - INIT_LIST_HEAD(&ce->signals); - list_del_init(&ce->signal_link); + spin_lock_irqsave(&b->irq_lock, flags); + __intel_breadcrumbs_disarm_irq(b); + spin_unlock_irqrestore(&b->irq_lock, flags); + if (!list_empty(&b->signalers)) irq_work_queue(&b->irq_work); - } - spin_unlock_irqrestore(&b->irq_lock, flags); } -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) +void intel_breadcrumbs_free(struct intel_breadcrumbs *b) { + kfree(b); } -bool i915_request_enable_breadcrumb(struct i915_request *rq) +static void insert_breadcrumb(struct i915_request *rq, + struct intel_breadcrumbs *b) { - lockdep_assert_held(&rq->lock); - - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) - return true; + struct intel_context *ce = rq->context; + struct list_head *pos; - if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { - struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - struct intel_context *ce = rq->context; - struct list_head *pos; - - spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + return; - if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) - goto unlock; + i915_request_get(rq); - if (!__intel_breadcrumbs_arm_irq(b)) - goto unlock; + /* + * If the request is already completed, we can transfer it + * straight onto a signaled list, and queue the irq worker for + * its signal completion. + */ + if (__request_completed(rq)) { + if (__signal_request(rq, &b->signaled_requests)) + irq_work_queue(&b->irq_work); + return; + } + if (list_empty(&ce->signals)) { + add_signaling_context(b, ce); + pos = &ce->signals; + } else { /* * We keep the seqno in retirement order, so we can break * inside intel_engine_signal_breadcrumbs as soon as we've @@ -351,24 +357,75 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) break; } - list_add(&rq->signal_link, pos); - if (pos == &ce->signals) /* catch transitions from empty list */ - list_move_tail(&ce->signal_link, &b->signalers); - GEM_BUG_ON(!check_signal_order(ce, rq)); + } + list_add(&rq->signal_link, pos); + GEM_BUG_ON(!check_signal_order(ce, rq)); + set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + /* Check after attaching to irq, interrupt may have already fired. */ + if (__request_completed(rq)) + irq_work_queue(&b->irq_work); +} - set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); -unlock: +bool i915_request_enable_breadcrumb(struct i915_request *rq) +{ + struct intel_breadcrumbs *b; + + /* Serialises with i915_request_retire() using rq->lock */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + return true; + + /* + * Peek at i915_request_submit()/i915_request_unsubmit() status. + * + * If the request is not yet active (and not signaled), we will + * attach the breadcrumb later. + */ + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) + return true; + + /* + * rq->engine is locked by rq->engine->active.lock. That however + * is not known until after rq->engine has been dereferenced and + * the lock acquired. Hence we acquire the lock and then validate + * that rq->engine still matches the lock we hold for it. + * + * Here, we are using the breadcrumb lock as a proxy for the + * rq->engine->active.lock, and we know that since the breadcrumb + * will be serialised within i915_request_submit/i915_request_unsubmit, + * the engine cannot change while active as long as we hold the + * breadcrumb lock on that engine. + * + * From the dma_fence_enable_signaling() path, we are outside of the + * request submit/unsubmit path, and so we must be more careful to + * acquire the right lock. + */ + b = READ_ONCE(rq->engine)->breadcrumbs; + spin_lock(&b->irq_lock); + while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) { spin_unlock(&b->irq_lock); + b = READ_ONCE(rq->engine)->breadcrumbs; + spin_lock(&b->irq_lock); } - return !__request_completed(rq); + /* + * Now that we are finally serialised with request submit/unsubmit, + * [with b->irq_lock] and with i915_request_retire() [via checking + * SIGNALED with rq->lock] confirm the request is indeed active. If + * it is no longer active, the breadcrumb will be attached upon + * i915_request_submit(). + */ + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) + insert_breadcrumb(rq, b); + + spin_unlock(&b->irq_lock); + + return true; } void i915_request_cancel_breadcrumb(struct i915_request *rq) { - struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - - lockdep_assert_held(&rq->lock); + struct intel_breadcrumbs *b = rq->engine->breadcrumbs; /* * We must wait for b->irq_lock so that we know the interrupt handler @@ -382,23 +439,19 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) list_del(&rq->signal_link); if (list_empty(&ce->signals)) - list_del_init(&ce->signal_link); + remove_signaling_context(b, ce); clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + i915_request_put(rq); } spin_unlock(&b->irq_lock); } -void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, - struct drm_printer *p) +static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; struct intel_context *ce; struct i915_request *rq; - if (list_empty(&b->signalers)) - return; - drm_printf(p, "Signals:\n"); spin_lock_irq(&b->irq_lock); @@ -414,3 +467,17 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, } spin_unlock_irq(&b->irq_lock); } + +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p) +{ + struct intel_breadcrumbs *b; + + b = engine->breadcrumbs; + if (!b) + return; + + drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed)); + if (!list_empty(&b->signalers)) + print_signals(b, p); +} diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h new file mode 100644 index 000000000000..ed3d1deabfbd --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_BREADCRUMBS__ +#define __INTEL_BREADCRUMBS__ + +#include <linux/irq_work.h> + +#include "intel_engine_types.h" + +struct drm_printer; +struct i915_request; +struct intel_breadcrumbs; + +struct intel_breadcrumbs * +intel_breadcrumbs_create(struct intel_engine_cs *irq_engine); +void intel_breadcrumbs_free(struct intel_breadcrumbs *b); + +void intel_breadcrumbs_reset(struct intel_breadcrumbs *b); +void intel_breadcrumbs_park(struct intel_breadcrumbs *b); + +static inline void +intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) +{ + irq_work_queue(&engine->breadcrumbs->irq_work); +} + +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p); + +bool i915_request_enable_breadcrumb(struct i915_request *request); +void i915_request_cancel_breadcrumb(struct i915_request *request); + +#endif /* __INTEL_BREADCRUMBS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h new file mode 100644 index 000000000000..8e53b9942695 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_BREADCRUMBS_TYPES__ +#define __INTEL_BREADCRUMBS_TYPES__ + +#include <linux/irq_work.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +/* + * Rather than have every client wait upon all user interrupts, + * with the herd waking after every interrupt and each doing the + * heavyweight seqno dance, we delegate the task (of being the + * bottom-half of the user interrupt) to the first client. After + * every interrupt, we wake up one client, who does the heavyweight + * coherent seqno read and either goes back to sleep (if incomplete), + * or wakes up all the completed clients in parallel, before then + * transferring the bottom-half status to the next client in the queue. + * + * Compared to walking the entire list of waiters in a single dedicated + * bottom-half, we reduce the latency of the first waiter by avoiding + * a context switch, but incur additional coherent seqno reads when + * following the chain of request breadcrumbs. Since it is most likely + * that we have a single client waiting on each seqno, then reducing + * the overhead of waking that client is much preferred. + */ +struct intel_breadcrumbs { + spinlock_t irq_lock; /* protects the lists used in hardirq context */ + + /* Not all breadcrumbs are attached to physical HW */ + struct intel_engine_cs *irq_engine; + + struct list_head signalers; + struct list_head signaled_requests; + + struct irq_work irq_work; /* for use from inside irq_lock */ + + unsigned int irq_enabled; + + bool irq_armed; +}; + +#endif /* __INTEL_BREADCRUMBS_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 52db2bde44a3..92a3f25c4006 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -93,57 +93,210 @@ static void intel_context_active_release(struct intel_context *ce) i915_active_release(&ce->active); } -int __intel_context_do_pin(struct intel_context *ce) +static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) +{ + unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; + int err; + + err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH); + if (err) + return err; + + err = i915_active_acquire(&vma->active); + if (err) + goto err_unpin; + + /* + * And mark it as a globally pinned object to let the shrinker know + * it cannot reclaim the object until we release it. + */ + i915_vma_make_unshrinkable(vma); + vma->obj->mm.dirty = true; + + return 0; + +err_unpin: + i915_vma_unpin(vma); + return err; +} + +static void __context_unpin_state(struct i915_vma *vma) +{ + i915_vma_make_shrinkable(vma); + i915_active_release(&vma->active); + __i915_vma_unpin(vma); +} + +static int __ring_active(struct intel_ring *ring, + struct i915_gem_ww_ctx *ww) +{ + int err; + + err = intel_ring_pin(ring, ww); + if (err) + return err; + + err = i915_active_acquire(&ring->vma->active); + if (err) + goto err_pin; + + return 0; + +err_pin: + intel_ring_unpin(ring); + return err; +} + +static void __ring_retire(struct intel_ring *ring) +{ + i915_active_release(&ring->vma->active); + intel_ring_unpin(ring); +} + +static int intel_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww) { int err; + CE_TRACE(ce, "active\n"); + + err = __ring_active(ce->ring, ww); + if (err) + return err; + + err = intel_timeline_pin(ce->timeline, ww); + if (err) + goto err_ring; + + if (!ce->state) + return 0; + + err = __context_pin_state(ce->state, ww); + if (err) + goto err_timeline; + + + return 0; + +err_timeline: + intel_timeline_unpin(ce->timeline); +err_ring: + __ring_retire(ce->ring); + return err; +} + +static void intel_context_post_unpin(struct intel_context *ce) +{ + if (ce->state) + __context_unpin_state(ce->state); + + intel_timeline_unpin(ce->timeline); + __ring_retire(ce->ring); +} + +int __intel_context_do_pin_ww(struct intel_context *ce, + struct i915_gem_ww_ctx *ww) +{ + bool handoff = false; + void *vaddr; + int err = 0; + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { err = intel_context_alloc_state(ce); if (err) return err; } - err = i915_active_acquire(&ce->active); + /* + * We always pin the context/ring/timeline here, to ensure a pin + * refcount for __intel_context_active(), which prevent a lock + * inversion of ce->pin_mutex vs dma_resv_lock(). + */ + + err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww); + if (!err && ce->ring->vma->obj) + err = i915_gem_object_lock(ce->ring->vma->obj, ww); + if (!err && ce->state) + err = i915_gem_object_lock(ce->state->obj, ww); + if (!err) + err = intel_context_pre_pin(ce, ww); if (err) return err; - if (mutex_lock_interruptible(&ce->pin_mutex)) { - err = -EINTR; - goto out_release; - } + err = i915_active_acquire(&ce->active); + if (err) + goto err_ctx_unpin; + + err = ce->ops->pre_pin(ce, ww, &vaddr); + if (err) + goto err_release; + + err = mutex_lock_interruptible(&ce->pin_mutex); + if (err) + goto err_post_unpin; if (unlikely(intel_context_is_closed(ce))) { err = -ENOENT; - goto out_unlock; + goto err_unlock; } if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { err = intel_context_active_acquire(ce); if (unlikely(err)) - goto out_unlock; + goto err_unlock; - err = ce->ops->pin(ce); - if (unlikely(err)) - goto err_active; + err = ce->ops->pin(ce, vaddr); + if (err) { + intel_context_active_release(ce); + goto err_unlock; + } CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n", i915_ggtt_offset(ce->ring->vma), ce->ring->head, ce->ring->tail); + handoff = true; smp_mb__before_atomic(); /* flush pin before it is visible */ atomic_inc(&ce->pin_count); } GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ - GEM_BUG_ON(i915_active_is_idle(&ce->active)); - goto out_unlock; -err_active: - intel_context_active_release(ce); -out_unlock: +err_unlock: mutex_unlock(&ce->pin_mutex); -out_release: +err_post_unpin: + if (!handoff) + ce->ops->post_unpin(ce); +err_release: i915_active_release(&ce->active); +err_ctx_unpin: + intel_context_post_unpin(ce); + + /* + * Unlock the hwsp_ggtt object since it's shared. + * In principle we can unlock all the global state locked above + * since it's pinned and doesn't need fencing, and will + * thus remain resident until it is explicitly unpinned. + */ + i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj); + + return err; +} + +int __intel_context_do_pin(struct intel_context *ce) +{ + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = __intel_context_do_pin_ww(ce, &ww); + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); return err; } @@ -154,6 +307,7 @@ void intel_context_unpin(struct intel_context *ce) CE_TRACE(ce, "unpin\n"); ce->ops->unpin(ce); + ce->ops->post_unpin(ce); /* * Once released, we may asynchronously drop the active reference. @@ -166,65 +320,6 @@ void intel_context_unpin(struct intel_context *ce) intel_context_put(ce); } -static int __context_pin_state(struct i915_vma *vma) -{ - unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; - int err; - - err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH); - if (err) - return err; - - err = i915_active_acquire(&vma->active); - if (err) - goto err_unpin; - - /* - * And mark it as a globally pinned object to let the shrinker know - * it cannot reclaim the object until we release it. - */ - i915_vma_make_unshrinkable(vma); - vma->obj->mm.dirty = true; - - return 0; - -err_unpin: - i915_vma_unpin(vma); - return err; -} - -static void __context_unpin_state(struct i915_vma *vma) -{ - i915_vma_make_shrinkable(vma); - i915_active_release(&vma->active); - __i915_vma_unpin(vma); -} - -static int __ring_active(struct intel_ring *ring) -{ - int err; - - err = intel_ring_pin(ring); - if (err) - return err; - - err = i915_active_acquire(&ring->vma->active); - if (err) - goto err_pin; - - return 0; - -err_pin: - intel_ring_unpin(ring); - return err; -} - -static void __ring_retire(struct intel_ring *ring) -{ - i915_active_release(&ring->vma->active); - intel_ring_unpin(ring); -} - __i915_active_call static void __intel_context_retire(struct i915_active *active) { @@ -235,48 +330,29 @@ static void __intel_context_retire(struct i915_active *active) intel_context_get_avg_runtime_ns(ce)); set_bit(CONTEXT_VALID_BIT, &ce->flags); - if (ce->state) - __context_unpin_state(ce->state); - - intel_timeline_unpin(ce->timeline); - __ring_retire(ce->ring); - + intel_context_post_unpin(ce); intel_context_put(ce); } static int __intel_context_active(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - int err; - - CE_TRACE(ce, "active\n"); intel_context_get(ce); - err = __ring_active(ce->ring); - if (err) - goto err_put; + /* everything should already be activated by intel_context_pre_pin() */ + GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active)); + __intel_ring_pin(ce->ring); - err = intel_timeline_pin(ce->timeline); - if (err) - goto err_ring; + __intel_timeline_pin(ce->timeline); - if (!ce->state) - return 0; - - err = __context_pin_state(ce->state); - if (err) - goto err_timeline; + if (ce->state) { + GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active)); + __i915_vma_pin(ce->state); + i915_vma_make_unshrinkable(ce->state); + } return 0; - -err_timeline: - intel_timeline_unpin(ce->timeline); -err_ring: - __ring_retire(ce->ring); -err_put: - intel_context_put(ce); - return err; } void @@ -382,15 +458,38 @@ int intel_context_prepare_remote_request(struct intel_context *ce, struct i915_request *intel_context_create_request(struct intel_context *ce) { + struct i915_gem_ww_ctx ww; struct i915_request *rq; int err; - err = intel_context_pin(ce); - if (unlikely(err)) - return ERR_PTR(err); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = intel_context_pin_ww(ce, &ww); + if (!err) { + rq = i915_request_create(ce); + intel_context_unpin(ce); + } else if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + rq = ERR_PTR(err); + } else { + rq = ERR_PTR(err); + } + + i915_gem_ww_ctx_fini(&ww); - rq = i915_request_create(ce); - intel_context_unpin(ce); + if (IS_ERR(rq)) + return rq; + + /* + * timeline->mutex should be the inner lock, but is used as outer lock. + * Hack around this to shut up lockdep in selftests.. + */ + lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie); + mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_); + mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); + rq->cookie = lockdep_pin_lock(&ce->timeline->mutex); return rq; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 07be021882cc..fda2eba81e22 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -25,6 +25,8 @@ ##__VA_ARGS__); \ } while (0) +struct i915_gem_ww_ctx; + void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine); void intel_context_fini(struct intel_context *ce); @@ -81,6 +83,8 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce) } int __intel_context_do_pin(struct intel_context *ce); +int __intel_context_do_pin_ww(struct intel_context *ce, + struct i915_gem_ww_ctx *ww); static inline bool intel_context_pin_if_active(struct intel_context *ce) { @@ -95,6 +99,15 @@ static inline int intel_context_pin(struct intel_context *ce) return __intel_context_do_pin(ce); } +static inline int intel_context_pin_ww(struct intel_context *ce, + struct i915_gem_ww_ctx *ww) +{ + if (likely(intel_context_pin_if_active(ce))) + return 0; + + return __intel_context_do_pin_ww(ce, ww); +} + static inline void __intel_context_pin(struct intel_context *ce) { GEM_BUG_ON(!intel_context_is_pinned(ce)); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 4954b0df4864..552cb57a2e8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -23,6 +23,7 @@ DECLARE_EWMA(runtime, 3, 8); struct i915_gem_context; +struct i915_gem_ww_ctx; struct i915_vma; struct intel_context; struct intel_ring; @@ -30,8 +31,10 @@ struct intel_ring; struct intel_context_ops { int (*alloc)(struct intel_context *ce); - int (*pin)(struct intel_context *ce); + int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); + int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); + void (*post_unpin)(struct intel_context *ce); void (*enter)(struct intel_context *ce); void (*exit)(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index a9249a23903a..7c3a1012e702 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -223,26 +223,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, void intel_engine_init_execlists(struct intel_engine_cs *engine); -void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); - -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); - -static inline void -intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) -{ - irq_work_queue(&engine->breadcrumbs.irq_work); -} - -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); - -void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, - struct intel_context *ce); - -void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, - struct drm_printer *p); - static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); @@ -357,4 +337,13 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } +static inline bool +intel_engine_has_heartbeat(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + return false; + + return READ_ONCE(engine->props.heartbeat_interval_ms); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 26087dd79782..5bfb5f7ed02c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,6 +28,7 @@ #include "i915_drv.h" +#include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" @@ -634,7 +635,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, else flags = PIN_HIGH; - return i915_ggtt_pin(vma, 0, flags); + return i915_ggtt_pin(vma, NULL, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) @@ -700,8 +701,13 @@ static int engine_setup_common(struct intel_engine_cs *engine) if (err) return err; + engine->breadcrumbs = intel_breadcrumbs_create(engine); + if (!engine->breadcrumbs) { + err = -ENOMEM; + goto err_status; + } + intel_engine_init_active(engine, ENGINE_PHYSICAL); - intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); @@ -716,6 +722,10 @@ static int engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_ctx_wa(engine); return 0; + +err_status: + cleanup_status_page(engine); + return err; } struct measure_breadcrumb { @@ -785,9 +795,11 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) } static struct intel_context * -create_kernel_context(struct intel_engine_cs *engine) +create_pinned_context(struct intel_engine_cs *engine, + unsigned int hwsp, + struct lock_class_key *key, + const char *name) { - static struct lock_class_key kernel; struct intel_context *ce; int err; @@ -796,6 +808,7 @@ create_kernel_context(struct intel_engine_cs *engine) return ce; __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); + ce->timeline = page_pack_bits(NULL, hwsp); err = intel_context_pin(ce); /* perma-pin so it is always available */ if (err) { @@ -809,11 +822,20 @@ create_kernel_context(struct intel_engine_cs *engine) * should we need to inject GPU operations during their request * construction. */ - lockdep_set_class(&ce->timeline->mutex, &kernel); + lockdep_set_class_and_name(&ce->timeline->mutex, key, name); return ce; } +static struct intel_context * +create_kernel_context(struct intel_engine_cs *engine) +{ + static struct lock_class_key kernel; + + return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR, + &kernel, "kernel_context"); +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -902,9 +924,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ cleanup_status_page(engine); + intel_breadcrumbs_free(engine->breadcrumbs); intel_engine_fini_retire(engine); - intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); if (engine->default_state) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 8ffdf676c0a0..5067d0524d4b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -177,36 +177,82 @@ void intel_engine_init_heartbeat(struct intel_engine_cs *engine) INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); } +static int __intel_engine_pulse(struct intel_engine_cs *engine) +{ + struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + + lockdep_assert_held(&ce->timeline->mutex); + GEM_BUG_ON(!intel_engine_has_preemption(engine)); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); + idle_pulse(engine, rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); + + return 0; +} + +static unsigned long set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay) +{ + unsigned long old; + + old = xchg(&engine->props.heartbeat_interval_ms, delay); + if (delay) + intel_engine_unpark_heartbeat(engine); + else + intel_engine_park_heartbeat(engine); + + return old; +} + int intel_engine_set_heartbeat(struct intel_engine_cs *engine, unsigned long delay) { - int err; + struct intel_context *ce = engine->kernel_context; + int err = 0; - /* Send one last pulse before to cleanup persistent hogs */ - if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) { - err = intel_engine_pulse(engine); - if (err) - return err; - } + if (!delay && !intel_engine_has_preempt_reset(engine)) + return -ENODEV; + + intel_engine_pm_get(engine); + + err = mutex_lock_interruptible(&ce->timeline->mutex); + if (err) + goto out_rpm; - WRITE_ONCE(engine->props.heartbeat_interval_ms, delay); + if (delay != engine->props.heartbeat_interval_ms) { + unsigned long saved = set_heartbeat(engine, delay); - if (intel_engine_pm_get_if_awake(engine)) { - if (delay) - intel_engine_unpark_heartbeat(engine); - else - intel_engine_park_heartbeat(engine); - intel_engine_pm_put(engine); + /* recheck current execution */ + if (intel_engine_has_preemption(engine)) { + err = __intel_engine_pulse(engine); + if (err) + set_heartbeat(engine, saved); + } } - return 0; + mutex_unlock(&ce->timeline->mutex); + +out_rpm: + intel_engine_pm_put(engine); + return err; } int intel_engine_pulse(struct intel_engine_cs *engine) { - struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; struct intel_context *ce = engine->kernel_context; - struct i915_request *rq; int err; if (!intel_engine_has_preemption(engine)) @@ -215,30 +261,12 @@ int intel_engine_pulse(struct intel_engine_cs *engine) if (!intel_engine_pm_get_if_awake(engine)) return 0; - if (mutex_lock_interruptible(&ce->timeline->mutex)) { - err = -EINTR; - goto out_rpm; - } - - intel_context_enter(ce); - rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); - intel_context_exit(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; + err = -EINTR; + if (!mutex_lock_interruptible(&ce->timeline->mutex)) { + err = __intel_engine_pulse(engine); + mutex_unlock(&ce->timeline->mutex); } - __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); - idle_pulse(engine, rq); - - __i915_request_commit(rq); - __i915_request_queue(rq, &attr); - GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); - err = 0; - -out_unlock: - mutex_unlock(&ce->timeline->mutex); -out_rpm: intel_engine_pm_put(engine); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 8ec3eecf3e39..f7b2e07e2229 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -6,6 +6,7 @@ #include "i915_drv.h" +#include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_heartbeat.h" @@ -247,7 +248,7 @@ static int __engine_park(struct intel_wakeref *wf) call_idle_barriers(engine); /* cleanup after wedging */ intel_engine_park_heartbeat(engine); - intel_engine_disarm_breadcrumbs(engine); + intel_breadcrumbs_park(engine->breadcrumbs); /* Must be reset upon idling, or we may miss the busy wakeup. */ GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 8de92fd7d392..c400aaa2287b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -22,6 +22,7 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" +#include "intel_breadcrumbs_types.h" #include "intel_sseu.h" #include "intel_timeline_types.h" #include "intel_uncore.h" @@ -373,34 +374,8 @@ struct intel_engine_cs { */ struct ewma__engine_latency latency; - /* Rather than have every client wait upon all user interrupts, - * with the herd waking after every interrupt and each doing the - * heavyweight seqno dance, we delegate the task (of being the - * bottom-half of the user interrupt) to the first client. After - * every interrupt, we wake up one client, who does the heavyweight - * coherent seqno read and either goes back to sleep (if incomplete), - * or wakes up all the completed clients in parallel, before then - * transferring the bottom-half status to the next client in the queue. - * - * Compared to walking the entire list of waiters in a single dedicated - * bottom-half, we reduce the latency of the first waiter by avoiding - * a context switch, but incur additional coherent seqno reads when - * following the chain of request breadcrumbs. Since it is most likely - * that we have a single client waiting on each seqno, then reducing - * the overhead of waking that client is much preferred. - */ - struct intel_breadcrumbs { - spinlock_t irq_lock; - struct list_head signalers; - - struct list_head signaled_requests; - - struct irq_work irq_work; /* for use from inside irq_lock */ - - unsigned int irq_enabled; - - bool irq_armed; - } breadcrumbs; + /* Keep track of all the seqno used, a trail of breadcrumbs */ + struct intel_breadcrumbs *breadcrumbs; struct intel_engine_pmu { /** diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 99e28d9021e8..81c05f551b9c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -78,8 +78,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) { int ret; - stash_init(&i915->mm.wc_stash); - /* * Note that we use page colouring to enforce a guard page at the * end of the address space. This is required as the CS may prefetch @@ -232,7 +230,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, /* Fill the allocated but "unused" space beyond the end of the buffer */ while (gte < end) - gen8_set_pte(gte++, vm->scratch[0].encode); + gen8_set_pte(gte++, vm->scratch[0]->encode); /* * We want to flush the TLBs only after we're certain all the PTE @@ -283,7 +281,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, /* Fill the allocated but "unused" space beyond the end of the buffer */ while (gte < end) - iowrite32(vm->scratch[0].encode, gte++); + iowrite32(vm->scratch[0]->encode, gte++); /* * We want to flush the TLBs only after we're certain all the PTE @@ -303,7 +301,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned int first_entry = start / I915_GTT_PAGE_SIZE; unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - const gen8_pte_t scratch_pte = vm->scratch[0].encode; + const gen8_pte_t scratch_pte = vm->scratch[0]->encode; gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; @@ -401,7 +399,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = vm->scratch[0].encode; + scratch_pte = vm->scratch[0]->encode; for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); } @@ -436,16 +434,17 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm, intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } -static int ggtt_bind_vma(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void ggtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK)) - return 0; + return; /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; @@ -454,8 +453,6 @@ static int ggtt_bind_vma(struct i915_address_space *vm, vm->insert_entries(vm, vma, cache_level, pte_flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; - - return 0; } static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) @@ -568,31 +565,25 @@ err: return ret; } -static int aliasing_gtt_bind_vma(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void aliasing_gtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { u32 pte_flags; - int ret; /* Currently applicable only to VLV */ pte_flags = 0; if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias; - - ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags); - if (ret) - return ret; - } + if (flags & I915_VMA_LOCAL_BIND) + ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, + stash, vma, cache_level, flags); if (flags & I915_VMA_GLOBAL_BIND) vm->insert_entries(vm, vma, cache_level, pte_flags); - - return 0; } static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, @@ -607,6 +598,7 @@ static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) { + struct i915_vm_pt_stash stash = {}; struct i915_ppgtt *ppgtt; int err; @@ -619,15 +611,21 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) goto err_ppgtt; } + err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); + if (err) + goto err_ppgtt; + + err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); + if (err) + goto err_stash; + /* * Note we only pre-allocate as far as the end of the global * GTT. On 48b / 4-level page-tables, the difference is very, * very significant! We have to preallocate as GVT/vgpu does * not like the page directory disappearing. */ - err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); - if (err) - goto err_ppgtt; + ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); ggtt->alias = ppgtt; ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; @@ -638,8 +636,11 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; + i915_vm_free_pt_stash(&ppgtt->vm, &stash); return 0; +err_stash: + i915_vm_free_pt_stash(&ppgtt->vm, &stash); err_ppgtt: i915_vm_put(&ppgtt->vm); return err; @@ -715,18 +716,11 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) void i915_ggtt_driver_release(struct drm_i915_private *i915) { struct i915_ggtt *ggtt = &i915->ggtt; - struct pagevec *pvec; fini_aliasing_ppgtt(ggtt); intel_ggtt_fini_fences(ggtt); ggtt_cleanup_hw(ggtt); - - pvec = &i915->mm.wc_stash.pvec; - if (pvec->nr) { - set_pages_array_wb(pvec->pages, pvec->nr); - __pagevec_release(pvec); - } } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -789,7 +783,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } - ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); + ret = setup_scratch_page(&ggtt->vm); if (ret) { drm_err(&i915->drm, "Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -797,8 +791,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return ret; } - ggtt->vm.scratch[0].encode = - ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), + ggtt->vm.scratch[0]->encode = + ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), I915_CACHE_NONE, 0); return 0; @@ -824,7 +818,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); iounmap(ggtt->gsm); - cleanup_scratch_page(vm); + free_scratch(vm); } static struct resource pci_resource(struct pci_dev *pdev, int bar) @@ -852,6 +846,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) else size = gen8_get_total_gtt_size(snb_gmch_ctl); + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; ggtt->vm.cleanup = gen6_gmch_remove; ggtt->vm.insert_page = gen8_ggtt_insert_page; @@ -1000,6 +996,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.clear_range = nop_clear_range; if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) ggtt->vm.clear_range = gen6_ggtt_clear_range; @@ -1050,6 +1048,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->gmadr = (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->do_idle_maps = needs_idle_maps(i915); ggtt->vm.insert_page = i915_ggtt_insert_page; ggtt->vm.insert_entries = i915_ggtt_insert_entries; @@ -1165,11 +1165,6 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) ggtt->invalidate(ggtt); } -static unsigned int clear_bind(struct i915_vma *vma) -{ - return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags); -} - void i915_ggtt_resume(struct i915_ggtt *ggtt) { struct i915_vma *vma; @@ -1187,11 +1182,13 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - unsigned int was_bound = clear_bind(vma); + unsigned int was_bound = + atomic_read(&vma->flags) & I915_VMA_BIND_MASK; - WARN_ON(i915_vma_bind(vma, - obj ? obj->cache_level : 0, - was_bound, NULL)); + GEM_BUG_ON(!was_bound); + vma->ops->bind_vma(&ggtt->vm, NULL, vma, + obj ? obj->cache_level : 0, + was_bound); if (obj) { /* only used during resume => exclusive access */ flush |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index e0755f1a904b..39b428c5049c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -356,7 +356,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) goto err_unref; } - ret = i915_ggtt_pin(vma, 0, PIN_HIGH); + ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); if (ret) goto err_unref; @@ -406,21 +406,20 @@ static int __engines_record_defaults(struct intel_gt *gt) /* We must be able to switch to something! */ GEM_BUG_ON(!engine->kernel_context); - err = intel_renderstate_init(&so, engine); - if (err) - goto out; - ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; } - rq = intel_context_create_request(ce); + err = intel_renderstate_init(&so, ce); + if (err) + goto err; + + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - intel_context_put(ce); - goto out; + goto err_fini; } err = intel_engine_emit_ctx_wa(rq); @@ -434,9 +433,13 @@ static int __engines_record_defaults(struct intel_gt *gt) err_rq: requests[id] = i915_request_get(rq); i915_request_add(rq); - intel_renderstate_fini(&so); - if (err) +err_fini: + intel_renderstate_fini(&so, ce); +err: + if (err) { + intel_context_put(ce); goto out; + } } /* Flush the default context image to memory, and enable powersaving. */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 418ae184cecf..104cb30e8c13 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -35,39 +35,65 @@ static void node_free(struct intel_gt_buffer_pool_node *node) { i915_gem_object_put(node->obj); i915_active_fini(&node->active); - kfree(node); + kfree_rcu(node, rcu); } -static void pool_free_work(struct work_struct *wrk) +static bool pool_free_older_than(struct intel_gt_buffer_pool *pool, long keep) { - struct intel_gt_buffer_pool *pool = - container_of(wrk, typeof(*pool), work.work); - struct intel_gt_buffer_pool_node *node, *next; - unsigned long old = jiffies - HZ; + struct intel_gt_buffer_pool_node *node, *stale = NULL; bool active = false; - LIST_HEAD(stale); int n; /* Free buffers that have not been used in the past second */ - spin_lock_irq(&pool->lock); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { struct list_head *list = &pool->cache_list[n]; - /* Most recent at head; oldest at tail */ - list_for_each_entry_safe_reverse(node, next, list, link) { - if (time_before(node->age, old)) - break; + if (list_empty(list)) + continue; + + if (spin_trylock_irq(&pool->lock)) { + struct list_head *pos; + + /* Most recent at head; oldest at tail */ + list_for_each_prev(pos, list) { + unsigned long age; + + node = list_entry(pos, typeof(*node), link); + + age = READ_ONCE(node->age); + if (!age || jiffies - age < keep) + break; + + /* Check we are the first to claim this node */ + if (!xchg(&node->age, 0)) + break; - list_move(&node->link, &stale); + node->free = stale; + stale = node; + } + if (!list_is_last(pos, list)) + __list_del_many(pos, list); + + spin_unlock_irq(&pool->lock); } + active |= !list_empty(list); } - spin_unlock_irq(&pool->lock); - list_for_each_entry_safe(node, next, &stale, link) + while ((node = stale)) { + stale = stale->free; node_free(node); + } + + return active; +} + +static void pool_free_work(struct work_struct *wrk) +{ + struct intel_gt_buffer_pool *pool = + container_of(wrk, typeof(*pool), work.work); - if (active) + if (pool_free_older_than(pool, HZ)) schedule_delayed_work(&pool->work, round_jiffies_up_relative(HZ)); } @@ -108,9 +134,10 @@ static void pool_retire(struct i915_active *ref) /* Return this object to the shrinker pool */ i915_gem_object_make_purgeable(node->obj); + GEM_BUG_ON(node->age); spin_lock_irqsave(&pool->lock, flags); - node->age = jiffies; - list_add(&node->link, list); + list_add_rcu(&node->link, list); + WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */ spin_unlock_irqrestore(&pool->lock, flags); schedule_delayed_work(&pool->work, @@ -129,6 +156,7 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz) if (!node) return ERR_PTR(-ENOMEM); + node->age = 0; node->pool = pool; i915_active_init(&node->active, pool_active, pool_retire); @@ -151,20 +179,30 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) struct intel_gt_buffer_pool *pool = >->buffer_pool; struct intel_gt_buffer_pool_node *node; struct list_head *list; - unsigned long flags; int ret; size = PAGE_ALIGN(size); list = bucket_for_size(pool, size); - spin_lock_irqsave(&pool->lock, flags); - list_for_each_entry(node, list, link) { + rcu_read_lock(); + list_for_each_entry_rcu(node, list, link) { + unsigned long age; + if (node->obj->base.size < size) continue; - list_del(&node->link); - break; + + age = READ_ONCE(node->age); + if (!age) + continue; + + if (cmpxchg(&node->age, age, 0) == age) { + spin_lock_irq(&pool->lock); + list_del_rcu(&node->link); + spin_unlock_irq(&pool->lock); + break; + } } - spin_unlock_irqrestore(&pool->lock, flags); + rcu_read_unlock(); if (&node->link == list) { node = node_create(pool, size); @@ -192,28 +230,13 @@ void intel_gt_init_buffer_pool(struct intel_gt *gt) INIT_DELAYED_WORK(&pool->work, pool_free_work); } -static void pool_free_imm(struct intel_gt_buffer_pool *pool) -{ - int n; - - spin_lock_irq(&pool->lock); - for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { - struct intel_gt_buffer_pool_node *node, *next; - struct list_head *list = &pool->cache_list[n]; - - list_for_each_entry_safe(node, next, list, link) - node_free(node); - INIT_LIST_HEAD(list); - } - spin_unlock_irq(&pool->lock); -} - void intel_gt_flush_buffer_pool(struct intel_gt *gt) { struct intel_gt_buffer_pool *pool = >->buffer_pool; do { - pool_free_imm(pool); + while (pool_free_older_than(pool, 0)) + ; } while (cancel_delayed_work_sync(&pool->work)); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h index e28bdda771ed..bcf1658c9633 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h @@ -25,7 +25,11 @@ struct intel_gt_buffer_pool_node { struct i915_active active; struct drm_i915_gem_object *obj; struct list_head link; - struct intel_gt_buffer_pool *pool; + union { + struct intel_gt_buffer_pool *pool; + struct intel_gt_buffer_pool_node *free; + struct rcu_head rcu; + }; unsigned long age; }; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index b05da68e52f4..257063a57101 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -8,6 +8,7 @@ #include "i915_drv.h" #include "i915_irq.h" +#include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_irq.h" #include "intel_uncore.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 2a72cce63fd9..3f1114b58b01 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -11,160 +11,24 @@ #include "intel_gt.h" #include "intel_gtt.h" -void stash_init(struct pagestash *stash) +struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) { - pagevec_init(&stash->pvec); - spin_lock_init(&stash->lock); -} - -static struct page *stash_pop_page(struct pagestash *stash) -{ - struct page *page = NULL; - - spin_lock(&stash->lock); - if (likely(stash->pvec.nr)) - page = stash->pvec.pages[--stash->pvec.nr]; - spin_unlock(&stash->lock); - - return page; -} - -static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) -{ - unsigned int nr; - - spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); - - nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); - memcpy(stash->pvec.pages + stash->pvec.nr, - pvec->pages + pvec->nr - nr, - sizeof(pvec->pages[0]) * nr); - stash->pvec.nr += nr; - - spin_unlock(&stash->lock); - - pvec->nr -= nr; -} - -static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) -{ - struct pagevec stack; - struct page *page; - if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) i915_gem_shrink_all(vm->i915); - page = stash_pop_page(&vm->free_pages); - if (page) - return page; - - if (!vm->pt_kmap_wc) - return alloc_page(gfp); - - /* Look in our global stash of WC pages... */ - page = stash_pop_page(&vm->i915->mm.wc_stash); - if (page) - return page; - - /* - * Otherwise batch allocate pages to amortize cost of set_pages_wc. - * - * We have to be careful as page allocation may trigger the shrinker - * (via direct reclaim) which will fill up the WC stash underneath us. - * So we add our WB pages into a temporary pvec on the stack and merge - * them into the WC stash after all the allocations are complete. - */ - pagevec_init(&stack); - do { - struct page *page; - - page = alloc_page(gfp); - if (unlikely(!page)) - break; - - stack.pages[stack.nr++] = page; - } while (pagevec_space(&stack)); - - if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { - page = stack.pages[--stack.nr]; - - /* Merge spare WC pages to the global stash */ - if (stack.nr) - stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); - - /* Push any surplus WC pages onto the local VM stash */ - if (stack.nr) - stash_push_pagevec(&vm->free_pages, &stack); - } - - /* Return unwanted leftovers */ - if (unlikely(stack.nr)) { - WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); - __pagevec_release(&stack); - } - - return page; + return i915_gem_object_create_internal(vm->i915, sz); } -static void vm_free_pages_release(struct i915_address_space *vm, - bool immediate) +int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) { - struct pagevec *pvec = &vm->free_pages.pvec; - struct pagevec stack; - - lockdep_assert_held(&vm->free_pages.lock); - GEM_BUG_ON(!pagevec_count(pvec)); - - if (vm->pt_kmap_wc) { - /* - * When we use WC, first fill up the global stash and then - * only if full immediately free the overflow. - */ - stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); - - /* - * As we have made some room in the VM's free_pages, - * we can wait for it to fill again. Unless we are - * inside i915_address_space_fini() and must - * immediately release the pages! - */ - if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) - return; + int err; - /* - * We have to drop the lock to allow ourselves to sleep, - * so take a copy of the pvec and clear the stash for - * others to use it as we sleep. - */ - stack = *pvec; - pagevec_reinit(pvec); - spin_unlock(&vm->free_pages.lock); - - pvec = &stack; - set_pages_array_wb(pvec->pages, pvec->nr); - - spin_lock(&vm->free_pages.lock); - } + err = i915_gem_object_pin_pages(obj); + if (err) + return err; - __pagevec_release(pvec); -} - -static void vm_free_page(struct i915_address_space *vm, struct page *page) -{ - /* - * On !llc, we need to change the pages back to WB. We only do so - * in bulk, so we rarely need to change the page attributes here, - * but doing so requires a stop_machine() from deep inside arch/x86/mm. - * To make detection of the possible sleep more likely, use an - * unconditional might_sleep() for everybody. - */ - might_sleep(); - spin_lock(&vm->free_pages.lock); - while (!pagevec_space(&vm->free_pages.pvec)) - vm_free_pages_release(vm, false); - GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); - pagevec_add(&vm->free_pages.pvec, page); - spin_unlock(&vm->free_pages.lock); + i915_gem_object_make_unshrinkable(obj); + return 0; } void __i915_vm_close(struct i915_address_space *vm) @@ -194,14 +58,7 @@ void __i915_vm_close(struct i915_address_space *vm) void i915_address_space_fini(struct i915_address_space *vm) { - spin_lock(&vm->free_pages.lock); - if (pagevec_count(&vm->free_pages.pvec)) - vm_free_pages_release(vm, true); - GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); - spin_unlock(&vm->free_pages.lock); - drm_mm_takedown(&vm->mm); - mutex_destroy(&vm->mutex); } @@ -246,8 +103,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; - stash_init(&vm->free_pages); - INIT_LIST_HEAD(&vm->bound_list); } @@ -264,64 +119,50 @@ void clear_pages(struct i915_vma *vma) memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); } -static int __setup_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p, - gfp_t gfp) -{ - p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); - if (unlikely(!p->page)) - return -ENOMEM; - - p->daddr = dma_map_page_attrs(vm->dma, - p->page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_WARN); - if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { - vm_free_page(vm, p->page); - return -ENOMEM; - } - - return 0; -} - -int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +dma_addr_t __px_dma(struct drm_i915_gem_object *p) { - return __setup_page_dma(vm, p, __GFP_HIGHMEM); + GEM_BUG_ON(!i915_gem_object_has_pages(p)); + return sg_dma_address(p->mm.pages->sgl); } -void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +struct page *__px_page(struct drm_i915_gem_object *p) { - dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - vm_free_page(vm, p->page); + GEM_BUG_ON(!i915_gem_object_has_pages(p)); + return sg_page(p->mm.pages->sgl); } void -fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) +fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) { - kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); + struct page *page = __px_page(p); + void *vaddr; + + vaddr = kmap(page); + memset64(vaddr, val, count); + clflush_cache_range(vaddr, PAGE_SIZE); + kunmap(page); } -static void poison_scratch_page(struct page *page, unsigned long size) +static void poison_scratch_page(struct drm_i915_gem_object *scratch) { - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return; + struct sgt_iter sgt; + struct page *page; + u8 val; - GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + val = 0; + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + val = POISON_FREE; - do { + for_each_sgt_page(page, sgt, scratch->mm.pages) { void *vaddr; vaddr = kmap(page); - memset(vaddr, POISON_FREE, PAGE_SIZE); + memset(vaddr, val, PAGE_SIZE); kunmap(page); - - page = pfn_to_page(page_to_pfn(page) + 1); - size -= PAGE_SIZE; - } while (size); + } } -int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) +int setup_scratch_page(struct i915_address_space *vm) { unsigned long size; @@ -338,21 +179,27 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) */ size = I915_GTT_PAGE_SIZE_4K; if (i915_vm_is_4lvl(vm) && - HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) size = I915_GTT_PAGE_SIZE_64K; - gfp |= __GFP_NOWARN; - } - gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; do { - unsigned int order = get_order(size); - struct page *page; - dma_addr_t addr; + struct drm_i915_gem_object *obj; - page = alloc_pages(gfp, order); - if (unlikely(!page)) + obj = vm->alloc_pt_dma(vm, size); + if (IS_ERR(obj)) goto skip; + if (pin_pt_dma(vm, obj)) + goto skip_obj; + + /* We need a single contiguous page for our scratch */ + if (obj->mm.page_sizes.sg < size) + goto skip_obj; + + /* And it needs to be correspondingly aligned */ + if (__px_dma(obj) & (size - 1)) + goto skip_obj; + /* * Use a non-zero scratch page for debugging. * @@ -362,61 +209,28 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) * should it ever be accidentally used, the effect should be * fairly benign. */ - poison_scratch_page(page, size); - - addr = dma_map_page_attrs(vm->dma, - page, 0, size, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_WARN); - if (unlikely(dma_mapping_error(vm->dma, addr))) - goto free_page; - - if (unlikely(!IS_ALIGNED(addr, size))) - goto unmap_page; - - vm->scratch[0].base.page = page; - vm->scratch[0].base.daddr = addr; - vm->scratch_order = order; + poison_scratch_page(obj); + + vm->scratch[0] = obj; + vm->scratch_order = get_order(size); return 0; -unmap_page: - dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); -free_page: - __free_pages(page, order); +skip_obj: + i915_gem_object_put(obj); skip: if (size == I915_GTT_PAGE_SIZE_4K) return -ENOMEM; size = I915_GTT_PAGE_SIZE_4K; - gfp &= ~__GFP_NOWARN; } while (1); } -void cleanup_scratch_page(struct i915_address_space *vm) -{ - struct i915_page_dma *p = px_base(&vm->scratch[0]); - unsigned int order = vm->scratch_order; - - dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, - PCI_DMA_BIDIRECTIONAL); - __free_pages(p->page, order); -} - void free_scratch(struct i915_address_space *vm) { int i; - if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ - return; - - for (i = 1; i <= vm->top; i++) { - if (!px_dma(&vm->scratch[i])) - break; - cleanup_page_dma(vm, px_base(&vm->scratch[i])); - } - - cleanup_scratch_page(vm); + for (i = 0; i <= vm->top; i++) + i915_gem_object_put(vm->scratch[i]); } void gtt_write_workarounds(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index f2b75078e05f..c13c650ced22 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -134,38 +134,29 @@ typedef u64 gen8_pte_t; #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) +enum i915_cache_level; + +struct drm_i915_file_private; +struct drm_i915_gem_object; struct i915_fence_reg; +struct i915_vma; +struct intel_gt; #define for_each_sgt_daddr(__dp, __iter, __sgt) \ __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) -struct i915_page_dma { - struct page *page; +struct i915_page_table { + struct drm_i915_gem_object *base; union { - dma_addr_t daddr; - - /* - * For gen6/gen7 only. This is the offset in the GGTT - * where the page directory entries for PPGTT begin - */ - u32 ggtt_offset; + atomic_t used; + struct i915_page_table *stash; }; }; -struct i915_page_scratch { - struct i915_page_dma base; - u64 encode; -}; - -struct i915_page_table { - struct i915_page_dma base; - atomic_t used; -}; - struct i915_page_directory { struct i915_page_table pt; spinlock_t lock; - void *entry[512]; + void **entry; }; #define __px_choose_expr(x, type, expr, other) \ @@ -176,12 +167,14 @@ struct i915_page_directory { other) #define px_base(px) \ - __px_choose_expr(px, struct i915_page_dma *, __x, \ - __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \ - __px_choose_expr(px, struct i915_page_table *, &__x->base, \ - __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \ - (void)0)))) -#define px_dma(px) (px_base(px)->daddr) + __px_choose_expr(px, struct drm_i915_gem_object *, __x, \ + __px_choose_expr(px, struct i915_page_table *, __x->base, \ + __px_choose_expr(px, struct i915_page_directory *, __x->pt.base, \ + (void)0))) + +struct page *__px_page(struct drm_i915_gem_object *p); +dma_addr_t __px_dma(struct drm_i915_gem_object *p); +#define px_dma(px) (__px_dma(px_base(px))) #define px_pt(px) \ __px_choose_expr(px, struct i915_page_table *, __x, \ @@ -189,19 +182,18 @@ struct i915_page_directory { (void)0)) #define px_used(px) (&px_pt(px)->used) -enum i915_cache_level; - -struct drm_i915_file_private; -struct drm_i915_gem_object; -struct i915_vma; -struct intel_gt; +struct i915_vm_pt_stash { + /* preallocated chains of page tables/directories */ + struct i915_page_table *pt[2]; +}; struct i915_vma_ops { /* Map an object into an address space with the given cache flags. */ - int (*bind_vma)(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags); + void (*bind_vma)(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); /* * Unmap an object from an address space. This usually consists of * setting the valid PTE entries to a reserved scratch page. @@ -213,13 +205,6 @@ struct i915_vma_ops { void (*clear_pages)(struct i915_vma *vma); }; -struct pagestash { - spinlock_t lock; - struct pagevec pvec; -}; - -void stash_init(struct pagestash *stash); - struct i915_address_space { struct kref ref; struct rcu_work rcu; @@ -256,33 +241,33 @@ struct i915_address_space { #define VM_CLASS_GGTT 0 #define VM_CLASS_PPGTT 1 - struct i915_page_scratch scratch[4]; - unsigned int scratch_order; - unsigned int top; - + struct drm_i915_gem_object *scratch[4]; /** * List of vma currently bound. */ struct list_head bound_list; - struct pagestash free_pages; - /* Global GTT */ bool is_ggtt:1; - /* Some systems require uncached updates of the page directories */ - bool pt_kmap_wc:1; - /* Some systems support read-only mappings for GGTT and/or PPGTT */ bool has_read_only:1; + u8 top; + u8 pd_shift; + u8 scratch_order; + + struct drm_i915_gem_object * + (*alloc_pt_dma)(struct i915_address_space *vm, int sz); + u64 (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, u32 flags); /* Create a valid PTE */ #define PTE_READ_ONLY BIT(0) - int (*allocate_va_range)(struct i915_address_space *vm, - u64 start, u64 length); + void (*allocate_va_range)(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 start, u64 length); void (*clear_range)(struct i915_address_space *vm, u64 start, u64 length); void (*insert_page)(struct i915_address_space *vm, @@ -490,9 +475,9 @@ i915_pd_entry(const struct i915_page_directory * const pdp, static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) { - struct i915_page_dma *pt = ppgtt->pd->entry[n]; + struct i915_page_table *pt = ppgtt->pd->entry[n]; - return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top])); + return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]); } void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt); @@ -517,13 +502,10 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); -int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); -void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); - -#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) +#define kmap_atomic_px(px) kmap_atomic(__px_page(px_base(px))) void -fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count); +fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count); #define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64)) #define fill32_px(px, v) do { \ @@ -531,47 +513,51 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count); fill_px((px), v__ << 32 | v__); \ } while (0) -int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp); -void cleanup_scratch_page(struct i915_address_space *vm); +int setup_scratch_page(struct i915_address_space *vm); void free_scratch(struct i915_address_space *vm); +struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz); struct i915_page_table *alloc_pt(struct i915_address_space *vm); struct i915_page_directory *alloc_pd(struct i915_address_space *vm); -struct i915_page_directory *__alloc_pd(size_t sz); +struct i915_page_directory *__alloc_pd(int npde); -void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd); +int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj); -#define free_px(vm, px) free_pd(vm, px_base(px)) +void free_px(struct i915_address_space *vm, + struct i915_page_table *pt, int lvl); +#define free_pt(vm, px) free_px(vm, px, 0) +#define free_pd(vm, px) free_px(vm, px_pt(px), 1) void __set_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, - struct i915_page_dma * const to, + struct i915_page_table *pt, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)); #define set_pd_entry(pd, idx, to) \ - __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) + __set_pd_entry((pd), (idx), px_pt(to), gen8_pde_encode) void clear_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, - const struct i915_page_scratch * const scratch); + const struct drm_i915_gem_object * const scratch); bool release_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, struct i915_page_table * const pt, - const struct i915_page_scratch * const scratch); + const struct drm_i915_gem_object * const scratch); void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); int ggtt_set_pages(struct i915_vma *vma); int ppgtt_set_pages(struct i915_vma *vma); void clear_pages(struct i915_vma *vma); -int ppgtt_bind_vma(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags); +void ppgtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma); @@ -579,6 +565,14 @@ void gtt_write_workarounds(struct intel_gt *gt); void setup_private_pat(struct intel_uncore *uncore); +int i915_vm_alloc_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 size); +int i915_vm_pin_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash); +void i915_vm_free_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash); + static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9eeaca957a7e..0412a44f25f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -137,6 +137,7 @@ #include "i915_perf.h" #include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" @@ -1148,20 +1149,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) } else { struct intel_engine_cs *owner = rq->context->engine; - /* - * Decouple the virtual breadcrumb before moving it - * back to the virtual engine -- we don't want the - * request to complete in the background and try - * and cancel the breadcrumb on the virtual engine - * (instead of the old engine where it is linked)! - */ - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags)) { - spin_lock_nested(&rq->lock, - SINGLE_DEPTH_NESTING); - i915_request_cancel_breadcrumb(rq); - spin_unlock(&rq->lock); - } WRITE_ONCE(rq->engine, owner); owner->submit_request(rq); active = NULL; @@ -1819,16 +1806,31 @@ static bool virtual_matches(const struct virtual_engine *ve, return true; } -static void virtual_xfer_breadcrumbs(struct virtual_engine *ve) +static void virtual_xfer_context(struct virtual_engine *ve, + struct intel_engine_cs *engine) { + unsigned int n; + + if (likely(engine == ve->siblings[0])) + return; + + GEM_BUG_ON(READ_ONCE(ve->context.inflight)); + if (!intel_engine_has_relative_mmio(engine)) + virtual_update_register_offsets(ve->context.lrc_reg_state, + engine); + /* - * All the outstanding signals on ve->siblings[0] must have - * been completed, just pending the interrupt handler. As those - * signals still refer to the old sibling (via rq->engine), we must - * transfer those to the old irq_worker to keep our locking - * consistent. + * Move the bound engine to the top of the list for + * future execution. We then kick this tasklet first + * before checking others, so that we preferentially + * reuse this set of bound registers. */ - intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context); + for (n = 1; n < ve->num_siblings; n++) { + if (ve->siblings[n] == engine) { + swap(ve->siblings[n], ve->siblings[0]); + break; + } + } } #define for_each_waiter(p__, rq__) \ @@ -2279,38 +2281,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(!(rq->execution_mask & engine->mask)); WRITE_ONCE(rq->engine, engine); - if (engine != ve->siblings[0]) { - u32 *regs = ve->context.lrc_reg_state; - unsigned int n; - - GEM_BUG_ON(READ_ONCE(ve->context.inflight)); - - if (!intel_engine_has_relative_mmio(engine)) - virtual_update_register_offsets(regs, - engine); - - if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve); - + if (__i915_request_submit(rq)) { /* - * Move the bound engine to the top of the list - * for future execution. We then kick this - * tasklet first before checking others, so that - * we preferentially reuse this set of bound - * registers. + * Only after we confirm that we will submit + * this request (i.e. it has not already + * completed), do we want to update the context. + * + * This serves two purposes. It avoids + * unnecessary work if we are resubmitting an + * already completed request after timeslicing. + * But more importantly, it prevents us altering + * ve->siblings[] on an idle context, where + * we may be using ve->siblings[] in + * virtual_context_enter / virtual_context_exit. */ - for (n = 1; n < ve->num_siblings; n++) { - if (ve->siblings[n] == engine) { - swap(ve->siblings[n], - ve->siblings[0]); - break; - } - } - + virtual_xfer_context(ve, engine); GEM_BUG_ON(ve->siblings[0] != engine); - } - if (__i915_request_submit(rq)) { submit = true; last = rq; } @@ -3316,7 +3303,10 @@ static void execlists_context_unpin(struct intel_context *ce) { check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, ce->engine); +} +static void execlists_context_post_unpin(struct intel_context *ce) +{ i915_gem_object_unpin_map(ce->state->obj); } @@ -3478,20 +3468,24 @@ __execlists_update_reg_state(const struct intel_context *ce, } static int -__execlists_context_pin(struct intel_context *ce, - struct intel_engine_cs *engine) +execlists_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, void **vaddr) { - void *vaddr; - GEM_BUG_ON(!ce->state); GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(engine->i915) | + *vaddr = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(ce->engine->i915) | I915_MAP_OVERRIDE); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); + return PTR_ERR_OR_ZERO(*vaddr); +} + +static int +__execlists_context_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr) +{ ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; __execlists_update_reg_state(ce, engine, ce->ring->tail); @@ -3499,9 +3493,9 @@ __execlists_context_pin(struct intel_context *ce, return 0; } -static int execlists_context_pin(struct intel_context *ce) +static int execlists_context_pin(struct intel_context *ce, void *vaddr) { - return __execlists_context_pin(ce, ce->engine); + return __execlists_context_pin(ce, ce->engine, vaddr); } static int execlists_context_alloc(struct intel_context *ce) @@ -3527,8 +3521,10 @@ static void execlists_context_reset(struct intel_context *ce) static const struct intel_context_ops execlists_context_ops = { .alloc = execlists_context_alloc, + .pre_pin = execlists_context_pre_pin, .pin = execlists_context_pin, .unpin = execlists_context_unpin, + .post_unpin = execlists_context_post_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, @@ -3892,7 +3888,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_ggtt_pin(vma, 0, PIN_HIGH); + err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); if (err) goto err; @@ -4133,7 +4129,7 @@ static int execlists_resume(struct intel_engine_cs *engine) { intel_mocs_init_engine(engine); - intel_engine_reset_breadcrumbs(engine); + intel_breadcrumbs_reset(engine->breadcrumbs); if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { struct drm_printer p = drm_debug_printer(__func__); @@ -4562,7 +4558,7 @@ static int gen8_emit_flush_render(struct i915_request *request, vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0)) + if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0)) dc_flush_wa = true; } @@ -4764,14 +4760,21 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode) intel_engine_mask_t aux_inv = 0; u32 cmd, *cs; + cmd = 4; + if (mode & EMIT_INVALIDATE) + cmd += 2; if (mode & EMIT_INVALIDATE) aux_inv = request->engine->mask & ~BIT(BCS0); + if (aux_inv) + cmd += 2 * hweight8(aux_inv) + 2; - cs = intel_ring_begin(request, - 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0)); + cs = intel_ring_begin(request, cmd); if (IS_ERR(cs)) return PTR_ERR(cs); + if (mode & EMIT_INVALIDATE) + *cs++ = preparser_disable(true); + cmd = MI_FLUSH_DW + 1; /* We always require a command barrier so that subsequent @@ -4804,6 +4807,10 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode) } *cs++ = MI_NOOP; } + + if (mode & EMIT_INVALIDATE) + *cs++ = preparser_disable(false); + intel_ring_advance(request, cs); return 0; @@ -5302,6 +5309,14 @@ populate_lr_context(struct intel_context *ce, return 0; } +static struct intel_timeline *pinned_timeline(struct intel_context *ce) +{ + struct intel_timeline *tl = fetch_and_zero(&ce->timeline); + + return intel_timeline_create_from_engine(ce->engine, + page_unmask_bits(tl)); +} + static int __execlists_context_alloc(struct intel_context *ce, struct intel_engine_cs *engine) { @@ -5332,19 +5347,17 @@ static int __execlists_context_alloc(struct intel_context *ce, goto error_deref_obj; } - if (!ce->timeline) { + if (!page_mask_bits(ce->timeline)) { struct intel_timeline *tl; - struct i915_vma *hwsp; /* * Use the static global HWSP for the kernel context, and * a dynamically allocated cacheline for everyone else. */ - hwsp = NULL; - if (unlikely(intel_context_is_barrier(ce))) - hwsp = engine->status_page.vma; - - tl = intel_timeline_create(engine->gt, hwsp); + if (unlikely(ce->timeline)) + tl = pinned_timeline(ce); + else + tl = intel_timeline_create(engine->gt); if (IS_ERR(tl)) { ret = PTR_ERR(tl); goto error_deref_obj; @@ -5450,12 +5463,12 @@ static int virtual_context_alloc(struct intel_context *ce) return __execlists_context_alloc(ce, ve->siblings[0]); } -static int virtual_context_pin(struct intel_context *ce) +static int virtual_context_pin(struct intel_context *ce, void *vaddr) { struct virtual_engine *ve = container_of(ce, typeof(*ve), context); /* Note: we must use a real engine class for setting up reg state */ - return __execlists_context_pin(ce, ve->siblings[0]); + return __execlists_context_pin(ce, ve->siblings[0], vaddr); } static void virtual_context_enter(struct intel_context *ce) @@ -5483,8 +5496,10 @@ static void virtual_context_exit(struct intel_context *ce) static const struct intel_context_ops virtual_context_ops = { .alloc = virtual_context_alloc, + .pre_pin = execlists_context_pre_pin, .pin = virtual_context_pin, .unpin = execlists_context_unpin, + .post_unpin = execlists_context_post_unpin, .enter = virtual_context_enter, .exit = virtual_context_exit, @@ -5718,9 +5733,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); - intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); - ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */ ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; @@ -5737,6 +5750,12 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, intel_context_init(&ve->context, &ve->base); + ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); + if (!ve->base.breadcrumbs) { + err = -ENOMEM; + goto err_put; + } + for (n = 0; n < count; n++) { struct intel_engine_cs *sibling = siblings[n]; diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index f0862e924d11..46d9aceda64c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -18,7 +18,8 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm) if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - if (unlikely(setup_page_dma(vm, &pt->base))) { + pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(pt->base)) { kfree(pt); return ERR_PTR(-ENOMEM); } @@ -27,14 +28,20 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm) return pt; } -struct i915_page_directory *__alloc_pd(size_t sz) +struct i915_page_directory *__alloc_pd(int count) { struct i915_page_directory *pd; - pd = kzalloc(sz, I915_GFP_ALLOW_FAIL); + pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); if (unlikely(!pd)) return NULL; + pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL); + if (unlikely(!pd->entry)) { + kfree(pd); + return NULL; + } + spin_lock_init(&pd->lock); return pd; } @@ -43,11 +50,13 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - pd = __alloc_pd(sizeof(*pd)); + pd = __alloc_pd(I915_PDES); if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - if (unlikely(setup_page_dma(vm, px_base(pd)))) { + pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(pd->pt.base)) { + kfree(pd->entry); kfree(pd); return ERR_PTR(-ENOMEM); } @@ -55,41 +64,52 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm) return pd; } -void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) +void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl) { - cleanup_page_dma(vm, pd); - kfree(pd); + BUILD_BUG_ON(offsetof(struct i915_page_directory, pt)); + + if (lvl) { + struct i915_page_directory *pd = + container_of(pt, typeof(*pd), pt); + kfree(pd->entry); + } + + if (pt->base) + i915_gem_object_put(pt->base); + + kfree(pt); } static inline void -write_dma_entry(struct i915_page_dma * const pdma, +write_dma_entry(struct drm_i915_gem_object * const pdma, const unsigned short idx, const u64 encoded_entry) { - u64 * const vaddr = kmap_atomic(pdma->page); + u64 * const vaddr = kmap_atomic(__px_page(pdma)); vaddr[idx] = encoded_entry; + clflush_cache_range(&vaddr[idx], sizeof(u64)); kunmap_atomic(vaddr); } void __set_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, - struct i915_page_dma * const to, + struct i915_page_table * const to, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) { /* Each thread pre-pins the pd, and we may have a thread per pde. */ - GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry)); + GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES); atomic_inc(px_used(pd)); pd->entry[idx] = to; - write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); + write_dma_entry(px_base(pd), idx, encode(px_dma(to), I915_CACHE_LLC)); } void clear_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, - const struct i915_page_scratch * const scratch) + const struct drm_i915_gem_object * const scratch) { GEM_BUG_ON(atomic_read(px_used(pd)) == 0); @@ -102,7 +122,7 @@ bool release_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, struct i915_page_table * const pt, - const struct i915_page_scratch * const scratch) + const struct drm_i915_gem_object * const scratch) { bool free = false; @@ -155,19 +175,16 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt) return ppgtt; } -int ppgtt_bind_vma(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +void ppgtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { u32 pte_flags; - int err; if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { - err = vm->allocate_va_range(vm, vma->node.start, vma->size); - if (err) - return err; - + vm->allocate_va_range(vm, stash, vma->node.start, vma->size); set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); } @@ -178,8 +195,6 @@ int ppgtt_bind_vma(struct i915_address_space *vm, vm->insert_entries(vm, vma, cache_level, pte_flags); wmb(); - - return 0; } void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) @@ -188,12 +203,93 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) vm->clear_range(vm, vma->node.start, vma->size); } +static unsigned long pd_count(u64 size, int shift) +{ + /* Beware later misalignment */ + return (size + 2 * (BIT_ULL(shift) - 1)) >> shift; +} + +int i915_vm_alloc_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 size) +{ + unsigned long count; + int shift, n; + + shift = vm->pd_shift; + if (!shift) + return 0; + + count = pd_count(size, shift); + while (count--) { + struct i915_page_table *pt; + + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + i915_vm_free_pt_stash(vm, stash); + return PTR_ERR(pt); + } + + pt->stash = stash->pt[0]; + stash->pt[0] = pt; + } + + for (n = 1; n < vm->top; n++) { + shift += ilog2(I915_PDES); /* Each PD holds 512 entries */ + count = pd_count(size, shift); + while (count--) { + struct i915_page_directory *pd; + + pd = alloc_pd(vm); + if (IS_ERR(pd)) { + i915_vm_free_pt_stash(vm, stash); + return PTR_ERR(pd); + } + + pd->pt.stash = stash->pt[1]; + stash->pt[1] = &pd->pt; + } + } + + return 0; +} + +int i915_vm_pin_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash) +{ + struct i915_page_table *pt; + int n, err; + + for (n = 0; n < ARRAY_SIZE(stash->pt); n++) { + for (pt = stash->pt[n]; pt; pt = pt->stash) { + err = pin_pt_dma(vm, pt->base); + if (err) + return err; + } + } + + return 0; +} + +void i915_vm_free_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash) +{ + struct i915_page_table *pt; + int n; + + for (n = 0; n < ARRAY_SIZE(stash->pt); n++) { + while ((pt = stash->pt[n])) { + stash->pt[n] = pt->stash; + free_px(vm, pt, n); + } + } +} + int ppgtt_set_pages(struct i915_vma *vma) { GEM_BUG_ON(vma->pages); vma->pages = vma->obj->mm.pages; - vma->page_sizes = vma->obj->mm.page_sizes; return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 1bfad589c63b..ea2a77c7b469 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #include "intel_renderstate.h" +#include "gt/intel_context.h" #include "intel_ring.h" static const struct intel_renderstate_rodata * @@ -157,33 +158,47 @@ out: #undef OUT_BATCH int intel_renderstate_init(struct intel_renderstate *so, - struct intel_engine_cs *engine) + struct intel_context *ce) { - struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine = ce->engine; + struct drm_i915_gem_object *obj = NULL; int err; memset(so, 0, sizeof(*so)); so->rodata = render_state_get_rodata(engine); - if (!so->rodata) - return 0; + if (so->rodata) { + if (so->rodata->batch_items * 4 > PAGE_SIZE) + return -EINVAL; + + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(so->vma)) { + err = PTR_ERR(so->vma); + goto err_obj; + } + } - if (so->rodata->batch_items * 4 > PAGE_SIZE) - return -EINVAL; + i915_gem_ww_ctx_init(&so->ww, true); +retry: + err = intel_context_pin_ww(ce, &so->ww); + if (err) + goto err_fini; - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + /* return early if there's nothing to setup */ + if (!err && !so->rodata) + return 0; - so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(so->vma)) { - err = PTR_ERR(so->vma); - goto err_obj; - } + err = i915_gem_object_lock(so->vma->obj, &so->ww); + if (err) + goto err_context; err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) - goto err_obj; + goto err_context; err = render_state_setup(so, engine->i915); if (err) @@ -193,8 +208,18 @@ int intel_renderstate_init(struct intel_renderstate *so, err_unpin: i915_vma_unpin(so->vma); +err_context: + intel_context_unpin(ce); +err_fini: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&so->ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&so->ww); err_obj: - i915_gem_object_put(obj); + if (obj) + i915_gem_object_put(obj); so->vma = NULL; return err; } @@ -208,11 +233,9 @@ int intel_renderstate_emit(struct intel_renderstate *so, if (!so->vma) return 0; - i915_vma_lock(so->vma); err = i915_request_await_object(rq, so->vma->obj, false); if (err == 0) err = i915_vma_move_to_active(so->vma, rq, 0); - i915_vma_unlock(so->vma); if (err) return err; @@ -233,7 +256,17 @@ int intel_renderstate_emit(struct intel_renderstate *so, return 0; } -void intel_renderstate_fini(struct intel_renderstate *so) +void intel_renderstate_fini(struct intel_renderstate *so, + struct intel_context *ce) { - i915_vma_unpin_and_release(&so->vma, 0); + if (so->vma) { + i915_vma_unpin(so->vma); + i915_vma_close(so->vma); + } + + intel_context_unpin(ce); + i915_gem_ww_ctx_fini(&so->ww); + + if (so->vma) + i915_gem_object_put(so->vma->obj); } diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 5700be69a05a..713aa1e86c80 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -25,9 +25,10 @@ #define _INTEL_RENDERSTATE_H_ #include <linux/types.h> +#include "i915_gem.h" struct i915_request; -struct intel_engine_cs; +struct intel_context; struct i915_vma; struct intel_renderstate_rodata { @@ -49,6 +50,7 @@ extern const struct intel_renderstate_rodata gen8_null_state; extern const struct intel_renderstate_rodata gen9_null_state; struct intel_renderstate { + struct i915_gem_ww_ctx ww; const struct intel_renderstate_rodata *rodata; struct i915_vma *vma; u32 batch_offset; @@ -58,9 +60,10 @@ struct intel_renderstate { }; int intel_renderstate_init(struct intel_renderstate *so, - struct intel_engine_cs *engine); + struct intel_context *ce); int intel_renderstate_emit(struct intel_renderstate *so, struct i915_request *rq); -void intel_renderstate_fini(struct intel_renderstate *so); +void intel_renderstate_fini(struct intel_renderstate *so, + struct intel_context *ce); #endif /* _INTEL_RENDERSTATE_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 46a5ceffc22f..ac36b67fb46b 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -15,6 +15,7 @@ #include "i915_drv.h" #include "i915_gpu_error.h" #include "i915_irq.h" +#include "intel_breadcrumbs.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index bdb324167ef3..4034a4bac7f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -21,7 +21,13 @@ unsigned int intel_ring_update_space(struct intel_ring *ring) return space; } -int intel_ring_pin(struct intel_ring *ring) +void __intel_ring_pin(struct intel_ring *ring) +{ + GEM_BUG_ON(!atomic_read(&ring->pin_count)); + atomic_inc(&ring->pin_count); +} + +int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) { struct i915_vma *vma = ring->vma; unsigned int flags; @@ -39,7 +45,7 @@ int intel_ring_pin(struct intel_ring *ring) else flags |= PIN_HIGH; - ret = i915_ggtt_pin(vma, 0, flags); + ret = i915_ggtt_pin(vma, ww, 0, flags); if (unlikely(ret)) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index cc0ebca65167..1700579bdc93 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -21,7 +21,8 @@ int intel_ring_cacheline_align(struct i915_request *rq); unsigned int intel_ring_update_space(struct intel_ring *ring); -int intel_ring_pin(struct intel_ring *ring); +void __intel_ring_pin(struct intel_ring *ring); +int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_reset(struct intel_ring *ring, u32 tail); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 898593ca4889..16b48e72c369 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -32,6 +32,7 @@ #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" +#include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_gt.h" #include "intel_reset.h" @@ -201,16 +202,18 @@ static struct i915_address_space *vm_alias(struct i915_address_space *vm) return vm; } +static u32 pp_dir(struct i915_address_space *vm) +{ + return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir; +} + static void set_pp_dir(struct intel_engine_cs *engine) { struct i915_address_space *vm = vm_alias(engine->gt->vm); if (vm) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); - ENGINE_WRITE(engine, RING_PP_DIR_BASE, - px_base(ppgtt->pd)->ggtt_offset << 10); + ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm)); } } @@ -255,7 +258,7 @@ static int xcs_resume(struct intel_engine_cs *engine) else ring_setup_status_page(engine); - intel_engine_reset_breadcrumbs(engine); + intel_breadcrumbs_reset(engine->breadcrumbs); /* Enforce ordering by reading HEAD register back */ ENGINE_POSTING_READ(engine, RING_HEAD); @@ -474,14 +477,16 @@ static void ring_context_destroy(struct kref *ref) intel_context_free(ce); } -static int __context_pin_ppgtt(struct intel_context *ce) +static int ring_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **unused) { struct i915_address_space *vm; int err = 0; vm = vm_alias(ce->vm); if (vm) - err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm))); + err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww); return err; } @@ -497,6 +502,10 @@ static void __context_unpin_ppgtt(struct intel_context *ce) static void ring_context_unpin(struct intel_context *ce) { +} + +static void ring_context_post_unpin(struct intel_context *ce) +{ __context_unpin_ppgtt(ce); } @@ -584,9 +593,9 @@ static int ring_context_alloc(struct intel_context *ce) return 0; } -static int ring_context_pin(struct intel_context *ce) +static int ring_context_pin(struct intel_context *ce, void *unused) { - return __context_pin_ppgtt(ce); + return 0; } static void ring_context_reset(struct intel_context *ce) @@ -597,8 +606,10 @@ static void ring_context_reset(struct intel_context *ce) static const struct intel_context_ops ring_context_ops = { .alloc = ring_context_alloc, + .pre_pin = ring_context_pre_pin, .pin = ring_context_pin, .unpin = ring_context_unpin, + .post_unpin = ring_context_post_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, @@ -608,7 +619,7 @@ static const struct intel_context_ops ring_context_ops = { }; static int load_pd_dir(struct i915_request *rq, - const struct i915_ppgtt *ppgtt, + struct i915_address_space *vm, u32 valid) { const struct intel_engine_cs * const engine = rq->engine; @@ -624,7 +635,7 @@ static int load_pd_dir(struct i915_request *rq, *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; + *cs++ = pp_dir(vm); /* Stall until the page table load is complete? */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; @@ -826,7 +837,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) * post-sync op, this extra pass appears vital before a * mm switch! */ - ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G); + ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G); if (ret) return ret; @@ -1250,14 +1261,15 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) return -ENODEV; } - timeline = intel_timeline_create(engine->gt, engine->status_page.vma); + timeline = intel_timeline_create_from_engine(engine, + I915_GEM_HWS_SEQNO_ADDR); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; } GEM_BUG_ON(timeline->has_initial_breadcrumb); - err = intel_timeline_pin(timeline); + err = intel_timeline_pin(timeline, NULL); if (err) goto err_timeline; @@ -1267,7 +1279,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) goto err_timeline_unpin; } - err = intel_ring_pin(ring); + err = intel_ring_pin(ring, NULL); if (err) goto err_ring; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 97ba14ad52e4..e6a00eea0631 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -7,6 +7,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" +#include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 46d20f5f3ddc..a2f74cefe4c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -215,7 +215,8 @@ static void cacheline_free(struct intel_timeline_cacheline *cl) static int intel_timeline_init(struct intel_timeline *timeline, struct intel_gt *gt, - struct i915_vma *hwsp) + struct i915_vma *hwsp, + unsigned int offset) { void *vaddr; @@ -246,8 +247,7 @@ static int intel_timeline_init(struct intel_timeline *timeline, vaddr = page_mask_bits(cl->vaddr); } else { - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; - + timeline->hwsp_offset = offset; vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -297,7 +297,9 @@ static void intel_timeline_fini(struct intel_timeline *timeline) } struct intel_timeline * -intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) +__intel_timeline_create(struct intel_gt *gt, + struct i915_vma *global_hwsp, + unsigned int offset) { struct intel_timeline *timeline; int err; @@ -306,7 +308,7 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) if (!timeline) return ERR_PTR(-ENOMEM); - err = intel_timeline_init(timeline, gt, global_hwsp); + err = intel_timeline_init(timeline, gt, global_hwsp, offset); if (err) { kfree(timeline); return ERR_PTR(err); @@ -315,14 +317,20 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) return timeline; } -int intel_timeline_pin(struct intel_timeline *tl) +void __intel_timeline_pin(struct intel_timeline *tl) +{ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + atomic_inc(&tl->pin_count); +} + +int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) { int err; if (atomic_add_unless(&tl->pin_count, 1, 0)) return 0; - err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH); + err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); if (err) return err; @@ -465,7 +473,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, goto err_rollback; } - err = i915_ggtt_pin(vma, 0, PIN_HIGH); + err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); if (err) { __idle_hwsp_free(vma->private, cacheline); goto err_rollback; @@ -484,7 +492,9 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, * free it after the current request is retired, which ensures that * all writes into the cacheline from previous requests are complete. */ - err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); + err = i915_active_ref(&tl->hwsp_cacheline->active, + tl->fence_context, + &rq->fence); if (err) goto err_cacheline; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 4298b9ac7327..9882cd911d8e 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -29,10 +29,27 @@ #include "i915_active.h" #include "i915_syncmap.h" -#include "gt/intel_timeline_types.h" +#include "intel_timeline_types.h" struct intel_timeline * -intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); +__intel_timeline_create(struct intel_gt *gt, + struct i915_vma *global_hwsp, + unsigned int offset); + +static inline struct intel_timeline * +intel_timeline_create(struct intel_gt *gt) +{ + return __intel_timeline_create(gt, NULL, 0); +} + +static inline struct intel_timeline * +intel_timeline_create_from_engine(struct intel_engine_cs *engine, + unsigned int offset) +{ + return __intel_timeline_create(engine->gt, + engine->status_page.vma, + offset); +} static inline struct intel_timeline * intel_timeline_get(struct intel_timeline *timeline) @@ -71,7 +88,8 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); } -int intel_timeline_pin(struct intel_timeline *tl); +void __intel_timeline_pin(struct intel_timeline *tl); +int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww); void intel_timeline_enter(struct intel_timeline *tl); int intel_timeline_get_seqno(struct intel_timeline *tl, struct i915_request *rq, diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 5726cd0a37e0..6c580d0d9ea8 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -52,6 +52,37 @@ * - Public functions to init or apply the given workaround type. */ +/* + * KBL revision ID ordering is bizarre; higher revision ID's map to lower + * steppings in some cases. So rather than test against the revision ID + * directly, let's map that into our own range of increasing ID's that we + * can test against in a regular manner. + */ + +const struct i915_rev_steppings kbl_revids[] = { + [0] = { .gt_stepping = KBL_REVID_A0, .disp_stepping = KBL_REVID_A0 }, + [1] = { .gt_stepping = KBL_REVID_B0, .disp_stepping = KBL_REVID_B0 }, + [2] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B0 }, + [3] = { .gt_stepping = KBL_REVID_D0, .disp_stepping = KBL_REVID_B0 }, + [4] = { .gt_stepping = KBL_REVID_F0, .disp_stepping = KBL_REVID_C0 }, + [5] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B1 }, + [6] = { .gt_stepping = KBL_REVID_D1, .disp_stepping = KBL_REVID_B1 }, + [7] = { .gt_stepping = KBL_REVID_G0, .disp_stepping = KBL_REVID_C0 }, +}; + +const struct i915_rev_steppings tgl_uy_revids[] = { + [0] = { .gt_stepping = TGL_REVID_A0, .disp_stepping = TGL_REVID_A0 }, + [1] = { .gt_stepping = TGL_REVID_B0, .disp_stepping = TGL_REVID_C0 }, + [2] = { .gt_stepping = TGL_REVID_B1, .disp_stepping = TGL_REVID_C0 }, + [3] = { .gt_stepping = TGL_REVID_C0, .disp_stepping = TGL_REVID_D0 }, +}; + +/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ +const struct i915_rev_steppings tgl_revids[] = { + [0] = { .gt_stepping = TGL_REVID_A0, .disp_stepping = TGL_REVID_B0 }, + [1] = { .gt_stepping = TGL_REVID_B0, .disp_stepping = TGL_REVID_D0 }, +}; + static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) { wal->name = name; @@ -470,7 +501,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) + if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -596,8 +627,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); } -static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { /* * Wa_1409142259:tgl @@ -607,12 +638,28 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, * Wa_1409207793:tgl * Wa_1409178076:tgl * Wa_1408979724:tgl + * Wa_14010443199:rkl + * Wa_14010698770:rkl */ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + /* WaDisableGPGPUMidThreadPreemption:gen12 */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); +} + +static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + gen12_ctx_workarounds_init(engine, wal); + /* - * Wa_1604555607:gen12 and Wa_1608008084:gen12 + * Wa_1604555607:tgl,rkl + * + * Note that the implementation of this workaround is further modified + * according to the FF_MODE2 guidance given by Wa_1608008084:gen12. * FF_MODE2 register will return the wrong value when read. The default * value for this register is zero for all fields and there are no bit * masks. So instead of doing a RMW we should just write the GS Timer @@ -623,11 +670,6 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK, FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128, 0); - - /* WaDisableGPGPUMidThreadPreemption:tgl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); } static void @@ -642,8 +684,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, wa_init_start(wal, name, engine->name); - if (IS_GEN(i915, 12)) + if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) tgl_ctx_workarounds_init(engine, wal); + else if (IS_GEN(i915, 12)) + gen12_ctx_workarounds_init(engine, wal); else if (IS_GEN(i915, 11)) icl_ctx_workarounds_init(engine, wal); else if (IS_CANNONLAKE(i915)) @@ -995,7 +1039,7 @@ kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen9_gt_workarounds_init(i915, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) + if (IS_KBL_GT_REVID(i915, 0, KBL_REVID_B0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -1176,18 +1220,25 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +gen12_gt_workarounds_init(struct drm_i915_private *i915, + struct i915_wa_list *wal) { wa_init_mcr(i915, wal); +} + +static void +tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen12_gt_workarounds_init(i915, wal); /* Wa_1409420604:tgl */ - if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ - if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1196,8 +1247,10 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) static void gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) { - if (IS_GEN(i915, 12)) + if (IS_TIGERLAKE(i915)) tgl_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 12)) + gen12_gt_workarounds_init(i915, wal); else if (IS_GEN(i915, 11)) icl_gt_workarounds_init(i915, wal); else if (IS_CANNONLAKE(i915)) @@ -1620,7 +1673,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { + if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { /* * Wa_1607138336:tgl * Wa_1607063988:tgl @@ -1630,18 +1683,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); /* - * Wa_1607030317:tgl - * Wa_1607186500:tgl - * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2 - * of then says it is fixed on B0 the other one says it is - * permanent - */ - wa_masked_en(wal, - GEN6_RC_SLEEP_PSMI_CONTROL, - GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE); - - /* * Wa_1606679103:tgl * (see also Wa_1606682166:icl) */ @@ -1654,22 +1695,17 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) VSUNIT_CLKGATE_DIS_TGL); } - if (IS_TIGERLAKE(i915)) { - /* Wa_1606931601:tgl */ + if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { + /* Wa_1606931601:tgl,rkl */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); - /* Wa_1409804808:tgl */ + /* Wa_1409804808:tgl,rkl */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS); - /* Wa_1606700617:tgl */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); - /* * Wa_1409085225:tgl - * Wa_14010229206:tgl + * Wa_14010229206:tgl,rkl */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); @@ -1677,9 +1713,37 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1407928979:tgl A* * Wa_18011464164:tgl B0+ * Wa_22010931296:tgl B0+ + * Wa_14010919138:rkl,tgl */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + + /* + * Wa_1607030317:tgl + * Wa_1607186500:tgl + * Wa_1607297627:tgl,rkl there are multiple entries for this + * WA in the BSpec; some indicate this is an A0-only WA, + * others indicate it applies to all steppings. + */ + wa_masked_en(wal, + GEN6_RC_SLEEP_PSMI_CONTROL, + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE); + + /* + * Wa_1606700617:tgl + * Wa_22010271021:tgl,rkl + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + } + + if (IS_GEN(i915, 12)) { + /* Wa_1406941453:gen12 */ + wa_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (IS_GEN(i915, 11)) { @@ -1898,7 +1962,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { + if (IS_KBL_GT_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); @@ -2045,6 +2109,7 @@ static int engine_wa_list_verify(struct intel_context *ce, const struct i915_wa *wa; struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; unsigned int i; u32 *results; int err; @@ -2057,29 +2122,34 @@ static int engine_wa_list_verify(struct intel_context *ce, return PTR_ERR(vma); intel_engine_pm_get(ce->engine); - rq = intel_context_create_request(ce); - intel_engine_pm_put(ce->engine); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (err == 0) + err = intel_context_pin_ww(ce, &ww); + if (err) + goto err_pm; + + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_vma; + goto err_unpin; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - if (err) { - i915_request_add(rq); - goto err_vma; - } - - err = wa_list_srm(rq, wal, vma); - if (err) - goto err_vma; + if (err == 0) + err = wa_list_srm(rq, wal, vma); i915_request_get(rq); + if (err) + i915_request_set_error_once(rq, err); i915_request_add(rq); + + if (err) + goto err_rq; + if (i915_request_wait(rq, 0, HZ / 5) < 0) { err = -ETIME; goto err_rq; @@ -2104,7 +2174,16 @@ static int engine_wa_list_verify(struct intel_context *ce, err_rq: i915_request_put(rq); -err_vma: +err_unpin: + intel_context_unpin(ce); +err_pm: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_engine_pm_put(ce->engine); i915_vma_unpin(vma); i915_vma_put(vma); return err; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index b8dd3cbc8696..dfd1cfb8a7ec 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -131,6 +131,10 @@ static void mock_context_unpin(struct intel_context *ce) { } +static void mock_context_post_unpin(struct intel_context *ce) +{ +} + static void mock_context_destroy(struct kref *ref) { struct intel_context *ce = container_of(ref, typeof(*ce), ref); @@ -152,8 +156,7 @@ static int mock_context_alloc(struct intel_context *ce) if (!ce->ring) return -ENOMEM; - GEM_BUG_ON(ce->timeline); - ce->timeline = intel_timeline_create(ce->engine->gt, NULL); + ce->timeline = intel_timeline_create(ce->engine->gt); if (IS_ERR(ce->timeline)) { kfree(ce->engine); return PTR_ERR(ce->timeline); @@ -164,7 +167,13 @@ static int mock_context_alloc(struct intel_context *ce) return 0; } -static int mock_context_pin(struct intel_context *ce) +static int mock_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, void **unused) +{ + return 0; +} + +static int mock_context_pin(struct intel_context *ce, void *unused) { return 0; } @@ -176,8 +185,10 @@ static void mock_context_reset(struct intel_context *ce) static const struct intel_context_ops mock_context_ops = { .alloc = mock_context_alloc, + .pre_pin = mock_context_pre_pin, .pin = mock_context_pin, .unpin = mock_context_unpin, + .post_unpin = mock_context_post_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, @@ -261,11 +272,12 @@ static void mock_engine_release(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); + intel_breadcrumbs_free(engine->breadcrumbs); + intel_context_unpin(engine->kernel_context); intel_context_put(engine->kernel_context); intel_engine_fini_retire(engine); - intel_engine_fini_breadcrumbs(engine); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -323,20 +335,26 @@ int mock_engine_init(struct intel_engine_cs *engine) struct intel_context *ce; intel_engine_init_active(engine, ENGINE_MOCK); - intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); + engine->breadcrumbs = intel_breadcrumbs_create(NULL); + if (!engine->breadcrumbs) + return -ENOMEM; + ce = create_kernel_context(engine); if (IS_ERR(ce)) goto err_breadcrumbs; + /* We insist the kernel context is using the status_page */ + engine->status_page.vma = ce->timeline->hwsp_ggtt; + engine->kernel_context = ce; return 0; err_breadcrumbs: - intel_engine_fini_breadcrumbs(engine); + intel_breadcrumbs_free(engine->breadcrumbs); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 52af1cee9a94..1f4020e906a8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -68,6 +68,8 @@ static int context_sync(struct intel_context *ce) } while (!err); mutex_unlock(&tl->mutex); + /* Wait for all barriers to complete (remote CPU) before we check */ + i915_active_unlock_wait(&ce->active); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 73243ba59c7d..e73854dd2fe0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -47,7 +47,10 @@ static int pulse_active(struct i915_active *active) static void pulse_free(struct kref *kref) { - kfree(container_of(kref, struct pulse, kref)); + struct pulse *p = container_of(kref, typeof(*p), kref); + + i915_active_fini(&p->active); + kfree(p); } static void pulse_put(struct pulse *p) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3fc5de961280..95d41c01d0e0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2729,7 +2729,7 @@ static int create_gang(struct intel_engine_cs *engine, i915_gem_object_put(obj); intel_context_put(ce); - rq->client_link.next = &(*prev)->client_link; + rq->mock.link.next = &(*prev)->mock.link; *prev = rq; return 0; @@ -2970,8 +2970,7 @@ static int live_preempt_gang(void *arg) } while (rq) { /* wait for each rq from highest to lowest prio */ - struct i915_request *n = - list_next_entry(rq, client_link); + struct i915_request *n = list_next_entry(rq, mock.link); if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = @@ -3090,7 +3089,7 @@ static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) return vma; } - err = i915_ggtt_pin(vma, 0, 0); + err = i915_ggtt_pin(vma, NULL, 0, 0); if (err) { i915_vma_put(vma); return ERR_PTR(err); @@ -4997,6 +4996,7 @@ static int __live_lrc_state(struct intel_engine_cs *engine, { struct intel_context *ce; struct i915_request *rq; + struct i915_gem_ww_ctx ww; enum { RING_START_IDX = 0, RING_TAIL_IDX, @@ -5011,7 +5011,11 @@ static int __live_lrc_state(struct intel_engine_cs *engine, if (IS_ERR(ce)) return PTR_ERR(ce); - err = intel_context_pin(ce); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(scratch->obj, &ww); + if (!err) + err = intel_context_pin_ww(ce, &ww); if (err) goto err_put; @@ -5040,11 +5044,9 @@ static int __live_lrc_state(struct intel_engine_cs *engine, *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); *cs++ = 0; - i915_vma_lock(scratch); err = i915_request_await_object(rq, scratch->obj, true); if (!err) err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(scratch); i915_request_get(rq); i915_request_add(rq); @@ -5081,6 +5083,12 @@ err_rq: err_unpin: intel_context_unpin(ce); err_put: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); intel_context_put(ce); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 8624f5d2a1f3..3540ba9bd459 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -77,20 +77,20 @@ create_spin_counter(struct intel_engine_cs *engine, vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { - i915_gem_object_put(obj); - return vma; + err = PTR_ERR(vma); + goto err_put; } err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) { - i915_vma_put(vma); - return ERR_PTR(err); - } + if (err) + goto err_unlock; + + i915_vma_lock(vma); base = i915_gem_object_pin_map(obj, I915_MAP_WC); if (IS_ERR(base)) { - i915_gem_object_put(obj); - return ERR_CAST(base); + err = PTR_ERR(base); + goto err_unpin; } cs = base; @@ -134,6 +134,14 @@ create_spin_counter(struct intel_engine_cs *engine, *cancel = base + loop; *counter = srm ? memset32(base + end, 0, 1) : NULL; return vma; + +err_unpin: + i915_vma_unpin(vma); +err_unlock: + i915_vma_unlock(vma); +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); } static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) @@ -639,7 +647,6 @@ int live_rps_frequency_cs(void *arg) goto err_vma; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (!err) err = i915_vma_move_to_active(vma, rq, 0); @@ -647,7 +654,6 @@ int live_rps_frequency_cs(void *arg) err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); - i915_vma_unlock(vma); i915_request_add(rq); if (err) goto err_vma; @@ -700,7 +706,7 @@ int live_rps_frequency_cs(void *arg) f = act; /* may skip ahead [pcu granularity] */ } - err = -EINVAL; + err = -EINTR; /* ignore error, continue on with test */ } err_vma: @@ -708,6 +714,7 @@ err_vma: i915_gem_object_flush_map(vma->obj); i915_gem_object_unpin_map(vma->obj); i915_vma_unpin(vma); + i915_vma_unlock(vma); i915_vma_put(vma); st_engine_heartbeat_enable(engine); @@ -781,7 +788,6 @@ int live_rps_frequency_srm(void *arg) goto err_vma; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (!err) err = i915_vma_move_to_active(vma, rq, 0); @@ -789,7 +795,6 @@ int live_rps_frequency_srm(void *arg) err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); - i915_vma_unlock(vma); i915_request_add(rq); if (err) goto err_vma; @@ -841,7 +846,7 @@ int live_rps_frequency_srm(void *arg) f = act; /* may skip ahead [pcu granularity] */ } - err = -EINVAL; + err = -EINTR; /* ignore error, continue on with test */ } err_vma: @@ -849,6 +854,7 @@ err_vma: i915_gem_object_flush_map(vma->obj); i915_gem_object_unpin_map(vma->obj); i915_vma_unpin(vma); + i915_vma_unlock(vma); i915_vma_put(vma); st_engine_heartbeat_enable(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index fb5b7d3498a6..19c2cb166e7c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -72,7 +72,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, unsigned long cacheline; int err; - tl = intel_timeline_create(state->gt, NULL); + tl = intel_timeline_create(state->gt); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -158,7 +158,7 @@ out: __mock_hwsp_record(&state, na, NULL); kfree(state.history); err_put: - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } @@ -455,7 +455,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) struct i915_request *rq; int err; - err = intel_timeline_pin(tl); + err = intel_timeline_pin(tl, NULL); if (err) { rq = ERR_PTR(err); goto out; @@ -487,11 +487,11 @@ checked_intel_timeline_create(struct intel_gt *gt) { struct intel_timeline *tl; - tl = intel_timeline_create(gt, NULL); + tl = intel_timeline_create(gt); if (IS_ERR(tl)) return tl; - if (*tl->hwsp_seqno != tl->seqno) { + if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) { pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", *tl->hwsp_seqno, tl->seqno); intel_timeline_put(tl); @@ -561,9 +561,9 @@ static int live_hwsp_engine(void *arg) for (n = 0; n < count; n++) { struct intel_timeline *tl = timelines[n]; - if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n", - n, tl->hwsp_offset, *tl->hwsp_seqno); + if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { + GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", + n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } @@ -633,9 +633,9 @@ out: for (n = 0; n < count; n++) { struct intel_timeline *tl = timelines[n]; - if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n", - n, tl->hwsp_offset, *tl->hwsp_seqno); + if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { + GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", + n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } @@ -660,14 +660,14 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - tl = intel_timeline_create(gt, NULL); + tl = intel_timeline_create(gt); if (IS_ERR(tl)) return PTR_ERR(tl); if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) goto out_free; - err = intel_timeline_pin(tl); + err = intel_timeline_pin(tl, NULL); if (err) goto out_free; @@ -733,7 +733,8 @@ static int live_hwsp_wrap(void *arg) goto out; } - if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { + if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] || + READ_ONCE(*hwsp_seqno[1]) != seqno[1]) { pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", *hwsp_seqno[0], *hwsp_seqno[1], seqno[0], seqno[1]); @@ -966,9 +967,10 @@ static int live_hwsp_recycle(void *arg) break; } - if (*tl->hwsp_seqno != count) { - pr_err("Invalid seqno stored in timeline %lu @ tl->hwsp_offset, found 0x%x\n", - count, *tl->hwsp_seqno); + if (READ_ONCE(*tl->hwsp_seqno) != count) { + GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n", + count, tl->fence_context, + tl->hwsp_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index febc9e6692ba..61a0532d0f3d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx, return PTR_ERR(results); err = 0; - i915_gem_object_lock(results); + i915_gem_object_lock(results, NULL); intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); i915_gem_object_unlock(results); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 861657897c0f..942c7c187adb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -677,7 +677,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) goto err; flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - ret = i915_ggtt_pin(vma, 0, flags); + ret = i915_ggtt_pin(vma, NULL, 0, flags); if (ret) { vma = ERR_PTR(ret); goto err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 59b27aba15c6..80e8b6c3bc8c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -51,8 +51,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * Note that RKL uses the same firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index f1940939260a..16b582cb97ed 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -936,7 +936,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, return -EFAULT; } - if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) { + if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) { gvt_vgpu_err("%s access to non-render register (%x)\n", cmd, offset); return -EBADRQC; @@ -976,7 +976,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, * inhibit context will restore with correct values */ if (IS_GEN(s->engine->i915, 9) && - intel_gvt_mmio_is_in_ctx(gvt, offset) && + intel_gvt_mmio_is_sr_in_ctx(gvt, offset) && !strncmp(cmd, "lri", 3)) { intel_gvt_hypervisor_read_gpa(s->vgpu, s->workload->ring_context_gpa + 12, &ctx_sr_ctl, 4); @@ -992,8 +992,6 @@ static int cmd_reg_handler(struct parser_exec_state *s, } } - /* TODO: Update the global mask if this MMIO is a masked-MMIO */ - intel_gvt_mmio_set_cmd_accessed(gvt, offset); return 0; } @@ -1923,6 +1921,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (ret) goto err_unmap; + i915_gem_object_unlock(bb->obj); INIT_LIST_HEAD(&bb->list); list_add(&bb->list, &s->workload->shadow_bb); @@ -2982,7 +2981,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) goto put_obj; } - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); ret = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (ret) { diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index ff7f2515a6fe..9831361f181e 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -256,11 +256,11 @@ struct intel_gvt_mmio { /* This reg has been accessed by a VM */ #define F_ACCESSED (1 << 4) /* This reg has been accessed through GPU commands */ -#define F_CMD_ACCESSED (1 << 5) -/* This reg could be accessed by unaligned address */ #define F_UNALIGN (1 << 6) -/* This reg is saved/restored in context */ -#define F_IN_CTX (1 << 7) +/* This reg is in GVT's mmio save-restor list and in hardware + * logical context image + */ +#define F_SR_IN_CTX (1 << 7) struct gvt_mmio_block *mmio_block; unsigned int num_mmio_block; @@ -597,39 +597,42 @@ static inline void intel_gvt_mmio_set_accessed( } /** - * intel_gvt_mmio_is_cmd_accessed - mark a MMIO could be accessed by command + * intel_gvt_mmio_is_cmd_accessible - if a MMIO could be accessed by command * @gvt: a GVT device * @offset: register offset * + * Returns: + * True if an MMIO is able to be accessed by GPU commands */ -static inline bool intel_gvt_mmio_is_cmd_access( +static inline bool intel_gvt_mmio_is_cmd_accessible( struct intel_gvt *gvt, unsigned int offset) { return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_ACCESS; } /** - * intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned + * intel_gvt_mmio_set_cmd_accessible - + * mark a MMIO could be accessible by command * @gvt: a GVT device * @offset: register offset * */ -static inline bool intel_gvt_mmio_is_unalign( +static inline void intel_gvt_mmio_set_cmd_accessible( struct intel_gvt *gvt, unsigned int offset) { - return gvt->mmio.mmio_attribute[offset >> 2] & F_UNALIGN; + gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_ACCESS; } /** - * intel_gvt_mmio_set_cmd_accessed - mark a MMIO has been accessed by command + * intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned * @gvt: a GVT device * @offset: register offset * */ -static inline void intel_gvt_mmio_set_cmd_accessed( +static inline bool intel_gvt_mmio_is_unalign( struct intel_gvt *gvt, unsigned int offset) { - gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_ACCESSED; + return gvt->mmio.mmio_attribute[offset >> 2] & F_UNALIGN; } /** @@ -648,30 +651,33 @@ static inline bool intel_gvt_mmio_has_mode_mask( } /** - * intel_gvt_mmio_is_in_ctx - check if a MMIO has in-ctx mask + * intel_gvt_mmio_is_sr_in_ctx - + * check if an MMIO has F_SR_IN_CTX mask * @gvt: a GVT device * @offset: register offset * * Returns: - * True if a MMIO has a in-context mask, false if it isn't. + * True if an MMIO has an F_SR_IN_CTX mask, false if it isn't. * */ -static inline bool intel_gvt_mmio_is_in_ctx( +static inline bool intel_gvt_mmio_is_sr_in_ctx( struct intel_gvt *gvt, unsigned int offset) { - return gvt->mmio.mmio_attribute[offset >> 2] & F_IN_CTX; + return gvt->mmio.mmio_attribute[offset >> 2] & F_SR_IN_CTX; } /** - * intel_gvt_mmio_set_in_ctx - mask a MMIO in logical context + * intel_gvt_mmio_set_sr_in_ctx - + * mask an MMIO in GVT's mmio save-restore list and also + * in hardware logical context image * @gvt: a GVT device * @offset: register offset * */ -static inline void intel_gvt_mmio_set_in_ctx( +static inline void intel_gvt_mmio_set_sr_in_ctx( struct intel_gvt *gvt, unsigned int offset) { - gvt->mmio.mmio_attribute[offset >> 2] |= F_IN_CTX; + gvt->mmio.mmio_attribute[offset >> 2] |= F_SR_IN_CTX; } void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 05f3bc98d242..3be37e6fe33d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1892,7 +1892,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->gt->i915; int ret; - MMIO_RING_DFH(RING_IMR, D_ALL, F_CMD_ACCESS, NULL, + MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); @@ -1900,7 +1900,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler); MMIO_D(SDEISR, D_ALL); - MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_HWSTAM, D_ALL, 0, NULL, NULL); + MMIO_DH(GEN8_GAMW_ECO_DEV_RW_IA, D_BDW_PLUS, NULL, gamw_echo_dev_rw_ia_write); @@ -1927,11 +1928,11 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_GM_RDR(_MMIO(0x12198), D_ALL, NULL, NULL); MMIO_D(GEN7_CXT_SIZE, D_ALL); - MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, mmio_read_from_hw, NULL); - MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); + MMIO_RING_DFH(RING_TAIL, D_ALL, 0, NULL, NULL); + MMIO_RING_DFH(RING_HEAD, D_ALL, 0, NULL, NULL); + MMIO_RING_DFH(RING_CTL, D_ALL, 0, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, 0, mmio_read_from_hw, NULL); + MMIO_RING_GM(RING_START, D_ALL, NULL, NULL); /* RING MODE */ #define RING_REG(base) _MMIO((base) + 0x29c) @@ -2686,7 +2687,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0x4094), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL); + MMIO_RING_GM(RING_BBADDR, D_ALL, NULL, NULL); MMIO_DFH(_MMIO(0x2220), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x12220), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x22220), D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2771,7 +2772,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, 0, mmio_read_from_hw, NULL); #define RING_REG(base) _MMIO((base) + 0xd0) @@ -2785,7 +2786,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) _MMIO((base) + 0x234) - MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, + MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); #undef RING_REG @@ -2820,7 +2821,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_RING_F(RING_REG, 32, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG - MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write); + MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2921,7 +2922,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(MMCD_MISC_CTRL, D_SKL_PLUS, NULL, NULL); + MMIO_DFH(MMCD_MISC_CTRL, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DH(CHICKEN_PAR1_1, D_SKL_PLUS, NULL, NULL); MMIO_D(DC_STATE_EN, D_SKL_PLUS); MMIO_D(DC_STATE_DEBUG, D_SKL_PLUS); @@ -3137,7 +3138,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(GAMT_CHKN_BIT_REG, D_KBL | D_CFL); + MMIO_DFH(GAMT_CHKN_BIT_REG, D_KBL | D_CFL, F_CMD_ACCESS, NULL, NULL); MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS); return 0; @@ -3357,7 +3358,10 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt) gvt->mmio.mmio_attribute = NULL; } -/* Special MMIO blocks. */ +/* Special MMIO blocks. registers in MMIO block ranges should not be command + * accessible (should have no F_CMD_ACCESS flag). + * otherwise, need to update cmd_reg_handler in cmd_parser.c + */ static struct gvt_mmio_block mmio_blocks[] = { {D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL}, {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL}, diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 291993615af9..b6811f6a230d 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -251,6 +251,9 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; + /* uc reset hw expect GS_MIA_IN_RESET */ + vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET; + if (IS_BROXTON(vgpu->gvt->gt->i915)) { vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1)); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 86a60bdf0818..afe574d6b3b5 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -595,7 +595,7 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) i915_mmio_reg_valid(mmio->reg); mmio++) { if (mmio->in_context) { gvt->engine_mmio_list.ctx_mmio_count[mmio->id]++; - intel_gvt_mmio_set_in_ctx(gvt, mmio->reg.reg); + intel_gvt_mmio_set_sr_in_ctx(gvt, mmio->reg.reg); } } } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 3c3b9842bbbd..1570eb8aa978 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -403,6 +403,14 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->indirect_ctx.shadow_va = NULL; } +static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr) +{ + struct scatterlist *sg = pd->pt.base->mm.pages->sgl; + + /* This is not a good idea */ + sg->dma_address = addr; +} + static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, struct intel_context *ce) { @@ -411,7 +419,7 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, int i = 0; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { - px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0]; + set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]); } else { for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { struct i915_page_directory * const pd = @@ -421,7 +429,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, shadow ppgtt. */ if (!pd) break; - px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; + + set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]); } } } @@ -1240,13 +1249,13 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, int i; if (i915_vm_is_4lvl(&ppgtt->vm)) { - px_dma(ppgtt->pd) = s->i915_context_pml4; + set_dma_address(ppgtt->pd, s->i915_context_pml4); } else { for (i = 0; i < GEN8_3LVL_PDPES; i++) { struct i915_page_directory * const pd = i915_pd_entry(ppgtt->pd, i); - px_dma(pd) = s->i915_context_pdps[i]; + set_dma_address(pd, s->i915_context_pdps[i]); } } } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 839bd53df6e9..10a865f3dc09 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -28,12 +28,14 @@ static struct i915_global_active { } global; struct active_node { + struct rb_node node; struct i915_active_fence base; struct i915_active *ref; - struct rb_node node; u64 timeline; }; +#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node) + static inline struct active_node * node_from_active(struct i915_active_fence *active) { @@ -128,8 +130,8 @@ static inline void debug_active_assert(struct i915_active *ref) { } static void __active_retire(struct i915_active *ref) { + struct rb_root root = RB_ROOT; struct active_node *it, *n; - struct rb_root root; unsigned long flags; GEM_BUG_ON(i915_active_is_idle(ref)); @@ -141,9 +143,25 @@ __active_retire(struct i915_active *ref) GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); debug_active_deactivate(ref); - root = ref->tree; - ref->tree = RB_ROOT; - ref->cache = NULL; + /* Even if we have not used the cache, we may still have a barrier */ + if (!ref->cache) + ref->cache = fetch_node(ref->tree.rb_node); + + /* Keep the MRU cached node for reuse */ + if (ref->cache) { + /* Discard all other nodes in the tree */ + rb_erase(&ref->cache->node, &ref->tree); + root = ref->tree; + + /* Rebuild the tree with only the cached node */ + rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node); + rb_insert_color(&ref->cache->node, &ref->tree); + GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); + + /* Make the cached node available for reuse with any timeline */ + if (IS_ENABLED(CONFIG_64BIT)) + ref->cache->timeline = 0; /* needs cmpxchg(u64) */ + } spin_unlock_irqrestore(&ref->tree_lock, flags); @@ -154,6 +172,7 @@ __active_retire(struct i915_active *ref) /* ... except if you wait on it, you must manage your own references! */ wake_up_var(ref); + /* Finally free the discarded timeline tree */ rbtree_postorder_for_each_entry_safe(it, n, &root, node) { GEM_BUG_ON(i915_active_fence_isset(&it->base)); kmem_cache_free(global.slab_cache, it); @@ -216,12 +235,11 @@ excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) active_retire(container_of(cb, struct i915_active, excl.cb)); } -static struct i915_active_fence * -active_instance(struct i915_active *ref, struct intel_timeline *tl) +static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) { - struct active_node *node, *prealloc; - struct rb_node **p, *parent; - u64 idx = tl->fence_context; + struct active_node *it; + + GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */ /* * We track the most recently used timeline to skip a rbtree search @@ -230,8 +248,59 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) * after the previous activity has been retired, or if it matches the * current timeline. */ - node = READ_ONCE(ref->cache); - if (node && node->timeline == idx) + it = READ_ONCE(ref->cache); + if (it) { + u64 cached = READ_ONCE(it->timeline); + + /* Once claimed, this slot will only belong to this idx */ + if (cached == idx) + return it; + +#ifdef CONFIG_64BIT /* for cmpxchg(u64) */ + /* + * An unclaimed cache [.timeline=0] can only be claimed once. + * + * If the value is already non-zero, some other thread has + * claimed the cache and we know that is does not match our + * idx. If, and only if, the timeline is currently zero is it + * worth competing to claim it atomically for ourselves (for + * only the winner of that race will cmpxchg return the old + * value of 0). + */ + if (!cached && !cmpxchg(&it->timeline, 0, idx)) + return it; +#endif + } + + BUILD_BUG_ON(offsetof(typeof(*it), node)); + + /* While active, the tree can only be built; not destroyed */ + GEM_BUG_ON(i915_active_is_idle(ref)); + + it = fetch_node(ref->tree.rb_node); + while (it) { + if (it->timeline < idx) { + it = fetch_node(it->node.rb_right); + } else if (it->timeline > idx) { + it = fetch_node(it->node.rb_left); + } else { + WRITE_ONCE(ref->cache, it); + break; + } + } + + /* NB: If the tree rotated beneath us, we may miss our target. */ + return it; +} + +static struct i915_active_fence * +active_instance(struct i915_active *ref, u64 idx) +{ + struct active_node *node, *prealloc; + struct rb_node **p, *parent; + + node = __active_lookup(ref, idx); + if (likely(node)) return &node->base; /* Preallocate a replacement, just in case */ @@ -268,10 +337,9 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) rb_insert_color(&node->node, &ref->tree); out: - ref->cache = node; + WRITE_ONCE(ref->cache, node); spin_unlock_irq(&ref->tree_lock); - BUILD_BUG_ON(offsetof(typeof(*node), base)); return &node->base; } @@ -353,69 +421,116 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node) return ____active_del_barrier(ref, node, barrier_to_engine(node)); } -int i915_active_ref(struct i915_active *ref, - struct intel_timeline *tl, - struct dma_fence *fence) +static bool +replace_barrier(struct i915_active *ref, struct i915_active_fence *active) +{ + if (!is_barrier(active)) /* proto-node used by our idle barrier? */ + return false; + + /* + * This request is on the kernel_context timeline, and so + * we can use it to substitute for the pending idle-barrer + * request that we want to emit on the kernel_context. + */ + __active_del_barrier(ref, node_from_active(active)); + return true; +} + +int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) { struct i915_active_fence *active; int err; - lockdep_assert_held(&tl->mutex); - /* Prevent reaping in case we malloc/wait while building the tree */ err = i915_active_acquire(ref); if (err) return err; - active = active_instance(ref, tl); + active = active_instance(ref, idx); if (!active) { err = -ENOMEM; goto out; } - if (is_barrier(active)) { /* proto-node used by our idle barrier */ - /* - * This request is on the kernel_context timeline, and so - * we can use it to substitute for the pending idle-barrer - * request that we want to emit on the kernel_context. - */ - __active_del_barrier(ref, node_from_active(active)); + if (replace_barrier(ref, active)) { RCU_INIT_POINTER(active->fence, NULL); atomic_dec(&ref->count); } if (!__i915_active_fence_set(active, fence)) - atomic_inc(&ref->count); + __i915_active_acquire(ref); out: i915_active_release(ref); return err; } -struct dma_fence * -i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) +static struct dma_fence * +__i915_active_set_fence(struct i915_active *ref, + struct i915_active_fence *active, + struct dma_fence *fence) { struct dma_fence *prev; - /* We expect the caller to manage the exclusive timeline ordering */ - GEM_BUG_ON(i915_active_is_idle(ref)); + if (replace_barrier(ref, active)) { + RCU_INIT_POINTER(active->fence, fence); + return NULL; + } rcu_read_lock(); - prev = __i915_active_fence_set(&ref->excl, f); + prev = __i915_active_fence_set(active, fence); if (prev) prev = dma_fence_get_rcu(prev); else - atomic_inc(&ref->count); + __i915_active_acquire(ref); rcu_read_unlock(); return prev; } +static struct i915_active_fence * +__active_fence(struct i915_active *ref, u64 idx) +{ + struct active_node *it; + + it = __active_lookup(ref, idx); + if (unlikely(!it)) { /* Contention with parallel tree builders! */ + spin_lock_irq(&ref->tree_lock); + it = __active_lookup(ref, idx); + spin_unlock_irq(&ref->tree_lock); + } + GEM_BUG_ON(!it); /* slot must be preallocated */ + + return &it->base; +} + +struct dma_fence * +__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) +{ + /* Only valid while active, see i915_active_acquire_for_context() */ + return __i915_active_set_fence(ref, __active_fence(ref, idx), fence); +} + +struct dma_fence * +i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) +{ + /* We expect the caller to manage the exclusive timeline ordering */ + return __i915_active_set_fence(ref, &ref->excl, f); +} + bool i915_active_acquire_if_busy(struct i915_active *ref) { debug_active_assert(ref); return atomic_add_unless(&ref->count, 1, 0); } +static void __i915_active_activate(struct i915_active *ref) +{ + spin_lock_irq(&ref->tree_lock); /* __active_retire() */ + if (!atomic_fetch_inc(&ref->count)) + debug_active_activate(ref); + spin_unlock_irq(&ref->tree_lock); +} + int i915_active_acquire(struct i915_active *ref) { int err; @@ -423,19 +538,19 @@ int i915_active_acquire(struct i915_active *ref) if (i915_active_acquire_if_busy(ref)) return 0; + if (!ref->active) { + __i915_active_activate(ref); + return 0; + } + err = mutex_lock_interruptible(&ref->mutex); if (err) return err; if (likely(!i915_active_acquire_if_busy(ref))) { - if (ref->active) - err = ref->active(ref); - if (!err) { - spin_lock_irq(&ref->tree_lock); /* __active_retire() */ - debug_active_activate(ref); - atomic_inc(&ref->count); - spin_unlock_irq(&ref->tree_lock); - } + err = ref->active(ref); + if (!err) + __i915_active_activate(ref); } mutex_unlock(&ref->mutex); @@ -443,6 +558,24 @@ int i915_active_acquire(struct i915_active *ref) return err; } +int i915_active_acquire_for_context(struct i915_active *ref, u64 idx) +{ + struct i915_active_fence *active; + int err; + + err = i915_active_acquire(ref); + if (err) + return err; + + active = active_instance(ref, idx); + if (!active) { + i915_active_release(ref); + return -ENOMEM; + } + + return 0; /* return with active ref */ +} + void i915_active_release(struct i915_active *ref) { debug_active_assert(ref); @@ -651,16 +784,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence, return await_active(ref, flags, sw_await_fence, fence, fence); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref) { debug_active_fini(ref); GEM_BUG_ON(atomic_read(&ref->count)); GEM_BUG_ON(work_pending(&ref->work)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); mutex_destroy(&ref->mutex); + + if (ref->cache) + kmem_cache_free(global.slab_cache, ref->cache); } -#endif static inline bool is_idle_barrier(struct active_node *node, u64 idx) { @@ -674,7 +807,6 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) if (RB_EMPTY_ROOT(&ref->tree)) return NULL; - spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); /* @@ -700,9 +832,9 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) prev = p; if (node->timeline < idx) - p = p->rb_right; + p = READ_ONCE(p->rb_right); else - p = p->rb_left; + p = READ_ONCE(p->rb_left); } /* @@ -739,14 +871,13 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) goto match; } - spin_unlock_irq(&ref->tree_lock); - return NULL; match: + spin_lock_irq(&ref->tree_lock); rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ if (p == &ref->cache->node) - ref->cache = NULL; + WRITE_ONCE(ref->cache, NULL); spin_unlock_irq(&ref->tree_lock); return rb_entry(p, struct active_node, node); @@ -758,7 +889,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, intel_engine_mask_t tmp, mask = engine->mask; struct llist_node *first = NULL, *last = NULL; struct intel_gt *gt = engine->gt; - int err; GEM_BUG_ON(i915_active_is_idle(ref)); @@ -778,13 +908,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct llist_node *prev = first; struct active_node *node; + rcu_read_lock(); node = reuse_idle_barrier(ref, idx); + rcu_read_unlock(); if (!node) { node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); - if (!node) { - err = ENOMEM; + if (!node) goto unwind; - } RCU_INIT_POINTER(node->base.fence, NULL); node->base.cb.func = node_retire; @@ -804,7 +934,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, */ RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); node->base.cb.node.prev = (void *)engine; - atomic_inc(&ref->count); + __i915_active_acquire(ref); } GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); @@ -832,7 +962,7 @@ unwind: kmem_cache_free(global.slab_cache, node); } - return err; + return -ENOMEM; } void i915_active_acquire_barrier(struct i915_active *ref) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index cf4058150966..fb165d3f01cf 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -163,14 +163,16 @@ void __i915_active_init(struct i915_active *ref, __i915_active_init(ref, active, retire, &__mkey, &__wkey); \ } while (0) -int i915_active_ref(struct i915_active *ref, - struct intel_timeline *tl, - struct dma_fence *fence); +struct dma_fence * +__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence); +int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence); static inline int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) { - return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence); + return i915_active_ref(ref, + i915_request_timeline(rq)->fence_context, + &rq->fence); } struct dma_fence * @@ -198,7 +200,9 @@ int i915_request_await_active(struct i915_request *rq, #define I915_ACTIVE_AWAIT_BARRIER BIT(2) int i915_active_acquire(struct i915_active *ref); +int i915_active_acquire_for_context(struct i915_active *ref, u64 idx); bool i915_active_acquire_if_busy(struct i915_active *ref); + void i915_active_release(struct i915_active *ref); static inline void __i915_active_acquire(struct i915_active *ref) @@ -213,11 +217,7 @@ i915_active_is_idle(const struct i915_active *ref) return !atomic_read(&ref->count); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref); -#else -static inline void i915_active_fini(struct i915_active *ref) { } -#endif int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine); @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void); struct i915_active *i915_active_get(struct i915_active *ref); void i915_active_put(struct i915_active *ref); +static inline int __i915_request_await_exclusive(struct i915_request *rq, + struct i915_active *active) +{ + struct dma_fence *fence; + int err = 0; + + fence = i915_active_fence_get(&active->excl); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + return err; +} + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 5ac4a999f05a..e88970256e8e 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1136,7 +1136,7 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, - u32 offset, u32 length) + unsigned long offset, unsigned long length) { bool needs_clflush; void *dst, *src; @@ -1166,8 +1166,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } if (IS_ERR(src)) { + unsigned long x, n; void *ptr; - int x, n; /* * We can avoid clflushing partial cachelines before the write @@ -1184,7 +1184,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, ptr = dst; x = offset_in_page(offset); for (n = offset >> PAGE_SHIFT; length; n++) { - int len = min_t(int, length, PAGE_SIZE - x); + int len = min(length, PAGE_SIZE - x); src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); if (needs_clflush) @@ -1414,8 +1414,8 @@ static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) */ int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, - u32 batch_offset, - u32 batch_length, + unsigned long batch_offset, + unsigned long batch_length, struct i915_vma *shadow, bool trampoline) { diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 784219962193..ea469168cd44 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -326,6 +326,7 @@ static void print_context_stats(struct seq_file *m, } i915_gem_context_unlock_engines(ctx); + mutex_lock(&ctx->mutex); if (!IS_ERR_OR_NULL(ctx->file_priv)) { struct file_stats stats = { .vm = rcu_access_pointer(ctx->vm), @@ -346,6 +347,7 @@ static void print_context_stats(struct seq_file *m, print_file_stats(m, name, stats); } + mutex_unlock(&ctx->mutex); spin_lock(&i915->gem.contexts.lock); list_safe_reset_next(ctx, cn, link); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5fd5af4bc855..acc32066cec3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -58,7 +58,6 @@ #include "display/intel_hotplug.h" #include "display/intel_overlay.h" #include "display/intel_pipe_crc.h" -#include "display/intel_psr.h" #include "display/intel_sprite.h" #include "display/intel_vga.h" @@ -216,125 +215,6 @@ intel_teardown_mchbar(struct drm_i915_private *dev_priv) release_resource(&dev_priv->mch_res); } -/* part #1: call before irq install */ -static int i915_driver_modeset_probe_noirq(struct drm_i915_private *i915) -{ - int ret; - - if (i915_inject_probe_failure(i915)) - return -ENODEV; - - if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { - ret = drm_vblank_init(&i915->drm, - INTEL_NUM_PIPES(i915)); - if (ret) - return ret; - } - - intel_bios_init(i915); - - ret = intel_vga_register(i915); - if (ret) - goto cleanup_bios; - - intel_power_domains_init_hw(i915, false); - - intel_csr_ucode_init(i915); - - ret = intel_modeset_init_noirq(i915); - if (ret) - goto cleanup_vga_client_pw_domain_csr; - - return 0; - -cleanup_vga_client_pw_domain_csr: - intel_csr_ucode_fini(i915); - intel_power_domains_driver_remove(i915); - intel_vga_unregister(i915); -cleanup_bios: - intel_bios_driver_remove(i915); - return ret; -} - -/* part #2: call after irq install */ -static int i915_driver_modeset_probe(struct drm_i915_private *i915) -{ - int ret; - - /* Important: The output setup functions called by modeset_init need - * working irqs for e.g. gmbus and dp aux transfers. */ - ret = intel_modeset_init(i915); - if (ret) - goto out; - - ret = i915_gem_init(i915); - if (ret) - goto cleanup_modeset; - - intel_overlay_setup(i915); - - if (!HAS_DISPLAY(i915) || !INTEL_DISPLAY_ENABLED(i915)) - return 0; - - ret = intel_fbdev_init(&i915->drm); - if (ret) - goto cleanup_gem; - - /* Only enable hotplug handling once the fbdev is fully set up. */ - intel_hpd_init(i915); - - intel_init_ipc(i915); - - intel_psr_set_force_mode_changed(i915->psr.dp); - - return 0; - -cleanup_gem: - i915_gem_suspend(i915); - i915_gem_driver_remove(i915); - i915_gem_driver_release(i915); -cleanup_modeset: - /* FIXME */ - intel_modeset_driver_remove(i915); - intel_irq_uninstall(i915); - intel_modeset_driver_remove_noirq(i915); -out: - return ret; -} - -/* part #1: call before irq uninstall */ -static void i915_driver_modeset_remove(struct drm_i915_private *i915) -{ - intel_modeset_driver_remove(i915); -} - -/* part #2: call after irq uninstall */ -static void i915_driver_modeset_remove_noirq(struct drm_i915_private *i915) -{ - intel_csr_ucode_fini(i915); - - intel_power_domains_driver_remove(i915); - - intel_vga_unregister(i915); - - intel_bios_driver_remove(i915); -} - -static void intel_init_dpio(struct drm_i915_private *dev_priv) -{ - /* - * IOSF_PORT_DPIO is used for VLV x2 PHY (DP/HDMI B and C), - * CHV x1 PHY (DP/HDMI D) - * IOSF_PORT_DPIO_2 is used for CHV x2 PHY (DP/HDMI B and C) - */ - if (IS_CHERRYVIEW(dev_priv)) { - DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO_2; - DPIO_PHY_IOSF_PORT(DPIO_PHY1) = IOSF_PORT_DPIO; - } else if (IS_VALLEYVIEW(dev_priv)) { - DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO; - } -} - static int i915_workqueues_init(struct drm_i915_private *dev_priv) { /* @@ -392,7 +272,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) pre |= IS_HSW_EARLY_SDV(dev_priv); pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); - pre |= IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0); + pre |= IS_KBL_GT_REVID(dev_priv, 0, KBL_REVID_A0); pre |= IS_GLK_REVID(dev_priv, 0, GLK_REVID_A2); if (pre) { @@ -463,7 +343,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_detect_pch(dev_priv); intel_pm_setup(dev_priv); - intel_init_dpio(dev_priv); ret = intel_power_domains_init(dev_priv); if (ret < 0) goto err_gem; @@ -798,7 +677,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) drm_err(&dev_priv->drm, "Failed to register driver for userspace access!\n"); - if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { + if (HAS_DISPLAY(dev_priv)) { /* Must be done after probing outputs */ intel_opregion_register(dev_priv); acpi_video_register(); @@ -821,7 +700,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) * We need to coordinate the hotplugs with the asynchronous fbdev * configuration, for which we use the fbdev->async_cookie. */ - if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) + if (HAS_DISPLAY(dev_priv)) drm_kms_helper_poll_init(dev); intel_power_domains_enable(dev_priv); @@ -988,7 +867,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret < 0) goto out_cleanup_mmio; - ret = i915_driver_modeset_probe_noirq(i915); + ret = intel_modeset_init_noirq(i915); if (ret < 0) goto out_cleanup_hw; @@ -996,10 +875,18 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_cleanup_modeset; - ret = i915_driver_modeset_probe(i915); - if (ret < 0) + ret = intel_modeset_init_nogem(i915); + if (ret) goto out_cleanup_irq; + ret = i915_gem_init(i915); + if (ret) + goto out_cleanup_modeset2; + + ret = intel_modeset_init(i915); + if (ret) + goto out_cleanup_gem; + i915_driver_register(i915); enable_rpm_wakeref_asserts(&i915->runtime_pm); @@ -1010,10 +897,20 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +out_cleanup_gem: + i915_gem_suspend(i915); + i915_gem_driver_remove(i915); + i915_gem_driver_release(i915); +out_cleanup_modeset2: + /* FIXME clean up the error path */ + intel_modeset_driver_remove(i915); + intel_irq_uninstall(i915); + intel_modeset_driver_remove_noirq(i915); + goto out_cleanup_modeset; out_cleanup_irq: intel_irq_uninstall(i915); out_cleanup_modeset: - i915_driver_modeset_remove_noirq(i915); + intel_modeset_driver_remove_nogem(i915); out_cleanup_hw: i915_driver_hw_remove(i915); intel_memory_regions_driver_release(i915); @@ -1045,7 +942,7 @@ void i915_driver_remove(struct drm_i915_private *i915) intel_gvt_driver_remove(i915); - i915_driver_modeset_remove(i915); + intel_modeset_driver_remove(i915); intel_irq_uninstall(i915); @@ -1054,7 +951,7 @@ void i915_driver_remove(struct drm_i915_private *i915) i915_reset_error_state(i915); i915_gem_driver_remove(i915); - i915_driver_modeset_remove_noirq(i915); + intel_modeset_driver_remove_nogem(i915); i915_driver_hw_remove(i915); @@ -1075,6 +972,7 @@ static void i915_driver_release(struct drm_device *dev) intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); + i915_gem_drain_freed_objects(dev_priv); i915_driver_mmio_release(dev_priv); @@ -1119,7 +1017,6 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) struct drm_i915_file_private *file_priv = file->driver_priv; i915_gem_context_close(file); - i915_gem_release(dev, file); kfree_rcu(file_priv, rcu); @@ -1846,7 +1743,8 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_GEM | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e4f7f6518945..eef9a821c49c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -108,18 +108,11 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20200715" -#define DRIVER_TIMESTAMP 1594811881 +#define DRIVER_DATE "20200917" +#define DRIVER_TIMESTAMP 1600375437 struct drm_i915_gem_object; -/* - * The code assumes that the hpd_pins below have consecutive values and - * starting with HPD_PORT_A, the HPD pin associated with any port can be - * retrieved by adding the corresponding port (or phy) enum value to - * HPD_PORT_A in most cases. For example: - * HPD_PORT_C = HPD_PORT_A + PHY_C - PHY_A - */ enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ @@ -131,10 +124,12 @@ enum hpd_pin { HPD_PORT_C, HPD_PORT_D, HPD_PORT_E, - HPD_PORT_F, - HPD_PORT_G, - HPD_PORT_H, - HPD_PORT_I, + HPD_PORT_TC1, + HPD_PORT_TC2, + HPD_PORT_TC3, + HPD_PORT_TC4, + HPD_PORT_TC5, + HPD_PORT_TC6, HPD_NUM_PINS }; @@ -203,11 +198,6 @@ struct drm_i915_file_private { struct rcu_head rcu; }; - struct { - spinlock_t lock; - struct list_head request_list; - } mm; - struct xarray context_xa; struct xarray vm_xa; @@ -506,6 +496,7 @@ struct i915_psr { bool link_standby; bool colorimetry_support; bool psr2_enabled; + bool psr2_sel_fetch_enabled; u8 sink_sync_latency; ktime_t last_entry_attempt; ktime_t last_exit; @@ -541,13 +532,9 @@ struct intel_gmbus { struct i915_suspend_saved_registers { u32 saveDSPARB; - u32 saveFBC_CONTROL; - u32 saveCACHE_MODE_0; - u32 saveMI_ARB_STATE; u32 saveSWF0[16]; u32 saveSWF1[16]; u32 saveSWF3[3]; - u32 savePCH_PORT_HOTPLUG; u16 saveGCDGMBUS; }; @@ -592,11 +579,6 @@ struct i915_gem_mm { atomic_t free_count; /** - * Small stash of WC pages - */ - struct pagestash wc_stash; - - /** * tmpfs instance used for shmem backed objects */ struct vfsmount *gemfs; @@ -1029,8 +1011,6 @@ struct drm_i915_private { */ u8 active_pipes; - int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; - struct i915_wa_list gt_wa_list; struct i915_frontbuffer_tracking fb_tracking; @@ -1045,6 +1025,14 @@ struct drm_i915_private { struct intel_l3_parity l3_parity; /* + * HTI (aka HDPORT) state read during initial hw readout. Most + * platforms don't have HTI, so this will just stay 0. Those that do + * will use this later to figure out which PLLs and PHYs are unavailable + * for driver usage. + */ + u32 hti_state; + + /* * edram size in MB. * Cannot be determined by PCIID. You must always read a register. */ @@ -1489,6 +1477,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_WITH_PORT_F(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF) +#define IS_TGL_U(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULT) + +#define IS_TGL_Y(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) + #define SKL_REVID_A0 0x0 #define SKL_REVID_B0 0x1 #define SKL_REVID_C0 0x2 @@ -1509,14 +1503,34 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_BXT_REVID(dev_priv, since, until) \ (IS_BROXTON(dev_priv) && IS_REVID(dev_priv, since, until)) -#define KBL_REVID_A0 0x0 -#define KBL_REVID_B0 0x1 -#define KBL_REVID_C0 0x2 -#define KBL_REVID_D0 0x3 -#define KBL_REVID_E0 0x4 +enum { + KBL_REVID_A0, + KBL_REVID_B0, + KBL_REVID_B1, + KBL_REVID_C0, + KBL_REVID_D0, + KBL_REVID_D1, + KBL_REVID_E0, + KBL_REVID_F0, + KBL_REVID_G0, +}; + +struct i915_rev_steppings { + u8 gt_stepping; + u8 disp_stepping; +}; + +/* Defined in intel_workarounds.c */ +extern const struct i915_rev_steppings kbl_revids[]; -#define IS_KBL_REVID(dev_priv, since, until) \ - (IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until)) +#define IS_KBL_GT_REVID(dev_priv, since, until) \ + (IS_KABYLAKE(dev_priv) && \ + kbl_revids[INTEL_REVID(dev_priv)].gt_stepping >= since && \ + kbl_revids[INTEL_REVID(dev_priv)].gt_stepping <= until) +#define IS_KBL_DISP_REVID(dev_priv, since, until) \ + (IS_KABYLAKE(dev_priv) && \ + kbl_revids[INTEL_REVID(dev_priv)].disp_stepping >= since && \ + kbl_revids[INTEL_REVID(dev_priv)].disp_stepping <= until) #define GLK_REVID_A0 0x0 #define GLK_REVID_A1 0x1 @@ -1547,12 +1561,41 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_EHL_REVID(p, since, until) \ (IS_ELKHARTLAKE(p) && IS_REVID(p, since, until)) -#define TGL_REVID_A0 0x0 -#define TGL_REVID_B0 0x1 -#define TGL_REVID_C0 0x2 +enum { + TGL_REVID_A0, + TGL_REVID_B0, + TGL_REVID_B1, + TGL_REVID_C0, + TGL_REVID_D0, +}; + +extern const struct i915_rev_steppings tgl_uy_revids[]; +extern const struct i915_rev_steppings tgl_revids[]; + +static inline const struct i915_rev_steppings * +tgl_revids_get(struct drm_i915_private *dev_priv) +{ + if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) + return tgl_uy_revids; + else + return tgl_revids; +} + +#define IS_TGL_DISP_REVID(p, since, until) \ + (IS_TIGERLAKE(p) && \ + tgl_revids_get(p)->disp_stepping >= (since) && \ + tgl_revids_get(p)->disp_stepping <= (until)) + +#define IS_TGL_UY_GT_REVID(p, since, until) \ + ((IS_TGL_U(p) || IS_TGL_Y(p)) && \ + tgl_uy_revids->gt_stepping >= (since) && \ + tgl_uy_revids->gt_stepping <= (until)) -#define IS_TGL_REVID(p, since, until) \ - (IS_TIGERLAKE(p) && IS_REVID(p, since, until)) +#define IS_TGL_GT_REVID(p, since, until) \ + (IS_TIGERLAKE(p) && \ + !(IS_TGL_U(p) || IS_TGL_Y(p)) && \ + tgl_revids->gt_stepping >= (since) && \ + tgl_revids->gt_stepping <= (until)) #define RKL_REVID_A0 0x0 #define RKL_REVID_B0 0x1 @@ -1665,6 +1708,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) #define HAS_PSR_HW_TRACKING(dev_priv) \ (INTEL_INFO(dev_priv)->display.has_psr_hw_tracking) +#define HAS_PSR2_SEL_FETCH(dev_priv) (INTEL_GEN(dev_priv) >= 12) #define HAS_TRANSCODER(dev_priv, trans) ((INTEL_INFO(dev_priv)->cpu_transcoder_mask & BIT(trans)) != 0) #define HAS_RC6(dev_priv) (INTEL_INFO(dev_priv)->has_rc6) @@ -1790,11 +1834,18 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915) } struct i915_vma * __must_check +i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags); + +static inline struct i915_vma * __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - u64 size, - u64 alignment, - u64 flags); + u64 size, u64 alignment, u64 flags) +{ + return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags); +} int i915_gem_object_unbind(struct drm_i915_gem_object *obj, unsigned long flags); @@ -1831,7 +1882,6 @@ void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file); -void i915_gem_release(struct drm_device *dev, struct drm_file *file); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); @@ -1899,8 +1949,8 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, - u32 batch_offset, - u32 batch_length, + unsigned long batch_offset, + unsigned long batch_length, struct i915_vma *shadow, bool trampoline); #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9aa3066cb75d..bb0c12975f38 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -335,12 +335,20 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, u64 remain; int ret; - ret = i915_gem_object_prepare_read(obj, &needs_clflush); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; + ret = i915_gem_object_prepare_read(obj, &needs_clflush); + if (ret) { + i915_gem_object_unlock(obj); + return ret; + } + fence = i915_gem_object_lock_fence(obj); i915_gem_object_finish_access(obj); + i915_gem_object_unlock(obj); + if (!fence) return -ENOMEM; @@ -420,7 +428,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - ret = i915_gem_object_lock_interruptible(obj); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) goto out_unpin; @@ -619,7 +627,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - ret = i915_gem_object_lock_interruptible(obj); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) goto out_unpin; @@ -734,12 +742,20 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, u64 remain; int ret; - ret = i915_gem_object_prepare_write(obj, &needs_clflush); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; + ret = i915_gem_object_prepare_write(obj, &needs_clflush); + if (ret) { + i915_gem_object_unlock(obj); + return ret; + } + fence = i915_gem_object_lock_fence(obj); i915_gem_object_finish_access(obj); + i915_gem_object_unlock(obj); + if (!fence) return -ENOMEM; @@ -946,11 +962,10 @@ static void discard_ggtt_vma(struct i915_vma *vma) } struct i915_vma * -i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view, - u64 size, - u64 alignment, - u64 flags) +i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &i915->ggtt; @@ -1016,7 +1031,7 @@ new_vma: return ERR_PTR(ret); } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); if (ret) return ERR_PTR(ret); @@ -1290,7 +1305,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); drm_WARN_ON(&i915->drm, i915_gem_object_set_to_cpu_domain(obj, true)); i915_gem_object_unlock(obj); @@ -1301,21 +1316,6 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) return 0; } -void i915_gem_release(struct drm_device *dev, struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct i915_request *request; - - /* Clean up our request list when the client is going away, so that - * later retire_requests won't dereference our soon-to-be-gone - * file_priv. - */ - spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_link) - request->file_priv = NULL; - spin_unlock(&file_priv->mm.lock); -} - int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv; @@ -1331,9 +1331,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) file_priv->dev_priv = i915; file_priv->file = file; - spin_lock_init(&file_priv->mm.lock); - INIT_LIST_HEAD(&file_priv->mm.request_list); - file_priv->bsd_engine = -1; file_priv->hang_timestamp = jiffies; @@ -1344,6 +1341,58 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) return ret; } +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) +{ + ww_acquire_init(&ww->ctx, &reservation_ww_class); + INIT_LIST_HEAD(&ww->obj_list); + ww->intr = intr; + ww->contended = NULL; +} + +static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) +{ + struct drm_i915_gem_object *obj; + + while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); + } +} + +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) +{ + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); +} + +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) +{ + i915_gem_ww_ctx_unlock_all(ww); + WARN_ON(ww->contended); + ww_acquire_fini(&ww->ctx); +} + +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) +{ + int ret = 0; + + if (WARN_ON(!ww->contended)) + return -EINVAL; + + i915_gem_ww_ctx_unlock_all(ww); + if (ww->intr) + ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); + else + dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); + + if (!ret) + list_add_tail(&ww->contended->obj_link, &ww->obj_list); + + ww->contended = NULL; + + return ret; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gem_device.c" #include "selftests/i915_gem.c" diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index f333e88a2b6e..a4cad3f154ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -116,4 +116,16 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } +struct i915_gem_ww_ctx { + struct ww_acquire_ctx ctx; + struct list_head obj_list; + bool intr; + struct drm_i915_gem_object *contended; +}; + +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 421613219ae9..f96032c60a12 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3e6cbb0d1150..a635ec8d0b94 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -311,6 +311,8 @@ static int compress_page(struct i915_vma_compress *c, if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; + + cond_resched(); } while (zstream->avail_in); /* Fallback to uncompressed if we increase size? */ @@ -397,6 +399,7 @@ static int compress_page(struct i915_vma_compress *c, if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; + cond_resched(); return 0; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1fa67700d8f4..759f523c6a6b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -41,6 +41,7 @@ #include "display/intel_lpe_audio.h" #include "display/intel_psr.h" +#include "gt/intel_breadcrumbs.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" @@ -131,40 +132,24 @@ static const u32 hpd_bxt[HPD_NUM_PINS] = { }; static const u32 hpd_gen11[HPD_NUM_PINS] = { - [HPD_PORT_C] = GEN11_TC1_HOTPLUG | GEN11_TBT1_HOTPLUG, - [HPD_PORT_D] = GEN11_TC2_HOTPLUG | GEN11_TBT2_HOTPLUG, - [HPD_PORT_E] = GEN11_TC3_HOTPLUG | GEN11_TBT3_HOTPLUG, - [HPD_PORT_F] = GEN11_TC4_HOTPLUG | GEN11_TBT4_HOTPLUG, -}; - -static const u32 hpd_gen12[HPD_NUM_PINS] = { - [HPD_PORT_D] = GEN11_TC1_HOTPLUG | GEN11_TBT1_HOTPLUG, - [HPD_PORT_E] = GEN11_TC2_HOTPLUG | GEN11_TBT2_HOTPLUG, - [HPD_PORT_F] = GEN11_TC3_HOTPLUG | GEN11_TBT3_HOTPLUG, - [HPD_PORT_G] = GEN11_TC4_HOTPLUG | GEN11_TBT4_HOTPLUG, - [HPD_PORT_H] = GEN12_TC5_HOTPLUG | GEN12_TBT5_HOTPLUG, - [HPD_PORT_I] = GEN12_TC6_HOTPLUG | GEN12_TBT6_HOTPLUG, + [HPD_PORT_TC1] = GEN11_TC_HOTPLUG(PORT_TC1) | GEN11_TBT_HOTPLUG(PORT_TC1), + [HPD_PORT_TC2] = GEN11_TC_HOTPLUG(PORT_TC2) | GEN11_TBT_HOTPLUG(PORT_TC2), + [HPD_PORT_TC3] = GEN11_TC_HOTPLUG(PORT_TC3) | GEN11_TBT_HOTPLUG(PORT_TC3), + [HPD_PORT_TC4] = GEN11_TC_HOTPLUG(PORT_TC4) | GEN11_TBT_HOTPLUG(PORT_TC4), + [HPD_PORT_TC5] = GEN11_TC_HOTPLUG(PORT_TC5) | GEN11_TBT_HOTPLUG(PORT_TC5), + [HPD_PORT_TC6] = GEN11_TC_HOTPLUG(PORT_TC6) | GEN11_TBT_HOTPLUG(PORT_TC6), }; static const u32 hpd_icp[HPD_NUM_PINS] = { [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), - [HPD_PORT_C] = SDE_TC_HOTPLUG_ICP(PORT_TC1), - [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC2), - [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC3), - [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC4), -}; - -static const u32 hpd_tgp[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), - [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), [HPD_PORT_C] = SDE_DDI_HOTPLUG_ICP(PORT_C), - [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC1), - [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC2), - [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC3), - [HPD_PORT_G] = SDE_TC_HOTPLUG_ICP(PORT_TC4), - [HPD_PORT_H] = SDE_TC_HOTPLUG_ICP(PORT_TC5), - [HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6), + [HPD_PORT_TC1] = SDE_TC_HOTPLUG_ICP(PORT_TC1), + [HPD_PORT_TC2] = SDE_TC_HOTPLUG_ICP(PORT_TC2), + [HPD_PORT_TC3] = SDE_TC_HOTPLUG_ICP(PORT_TC3), + [HPD_PORT_TC4] = SDE_TC_HOTPLUG_ICP(PORT_TC4), + [HPD_PORT_TC5] = SDE_TC_HOTPLUG_ICP(PORT_TC5), + [HPD_PORT_TC6] = SDE_TC_HOTPLUG_ICP(PORT_TC6), }; static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) @@ -180,9 +165,7 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) return; } - if (INTEL_GEN(dev_priv) >= 12) - hpd->hpd = hpd_gen12; - else if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 11) hpd->hpd = hpd_gen11; else if (IS_GEN9_LP(dev_priv)) hpd->hpd = hpd_bxt; @@ -196,9 +179,8 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) if (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv)) return; - if (HAS_PCH_TGP(dev_priv) || HAS_PCH_JSP(dev_priv)) - hpd->pch_hpd = hpd_tgp; - else if (HAS_PCH_ICP(dev_priv) || HAS_PCH_MCC(dev_priv)) + if (HAS_PCH_TGP(dev_priv) || HAS_PCH_JSP(dev_priv) || + HAS_PCH_ICP(dev_priv) || HAS_PCH_MCC(dev_priv)) hpd->pch_hpd = hpd_icp; else if (HAS_PCH_CNP(dev_priv) || HAS_PCH_SPT(dev_priv)) hpd->pch_hpd = hpd_spt; @@ -1048,33 +1030,17 @@ out: static bool gen11_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { - case HPD_PORT_C: - return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC1); - case HPD_PORT_D: - return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC2); - case HPD_PORT_E: - return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC3); - case HPD_PORT_F: - return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC4); - default: - return false; - } -} - -static bool gen12_port_hotplug_long_detect(enum hpd_pin pin, u32 val) -{ - switch (pin) { - case HPD_PORT_D: + case HPD_PORT_TC1: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC1); - case HPD_PORT_E: + case HPD_PORT_TC2: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC2); - case HPD_PORT_F: + case HPD_PORT_TC3: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC3); - case HPD_PORT_G: + case HPD_PORT_TC4: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC4); - case HPD_PORT_H: + case HPD_PORT_TC5: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC5); - case HPD_PORT_I: + case HPD_PORT_TC6: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC6); default: return false; @@ -1112,33 +1078,17 @@ static bool icp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val) static bool icp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { - case HPD_PORT_C: - return val & ICP_TC_HPD_LONG_DETECT(PORT_TC1); - case HPD_PORT_D: - return val & ICP_TC_HPD_LONG_DETECT(PORT_TC2); - case HPD_PORT_E: - return val & ICP_TC_HPD_LONG_DETECT(PORT_TC3); - case HPD_PORT_F: - return val & ICP_TC_HPD_LONG_DETECT(PORT_TC4); - default: - return false; - } -} - -static bool tgp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) -{ - switch (pin) { - case HPD_PORT_D: + case HPD_PORT_TC1: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC1); - case HPD_PORT_E: + case HPD_PORT_TC2: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC2); - case HPD_PORT_F: + case HPD_PORT_TC3: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC3); - case HPD_PORT_G: + case HPD_PORT_TC4: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC4); - case HPD_PORT_H: + case HPD_PORT_TC5: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC5); - case HPD_PORT_I: + case HPD_PORT_TC6: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC6); default: return false; @@ -1892,19 +1842,16 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { u32 ddi_hotplug_trigger, tc_hotplug_trigger; u32 pin_mask = 0, long_mask = 0; - bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val); if (HAS_PCH_TGP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; - tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; } else if (HAS_PCH_JSP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = 0; } else if (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); - tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; } else { drm_WARN(&dev_priv->drm, !HAS_PCH_ICP(dev_priv), "Unrecognized PCH type 0x%x\n", @@ -1912,7 +1859,6 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; - tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; } if (ddi_hotplug_trigger) { @@ -1936,7 +1882,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, tc_hotplug_trigger, dig_hotplug_reg, dev_priv->hotplug.pch_hpd, - tc_port_hotplug_long_detect); + icp_tc_port_hotplug_long_detect); } if (pin_mask) @@ -2184,12 +2130,6 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) u32 pin_mask = 0, long_mask = 0; u32 trigger_tc = iir & GEN11_DE_TC_HOTPLUG_MASK; u32 trigger_tbt = iir & GEN11_DE_TBT_HOTPLUG_MASK; - long_pulse_detect_func long_pulse_detect; - - if (INTEL_GEN(dev_priv) >= 12) - long_pulse_detect = gen12_port_hotplug_long_detect; - else - long_pulse_detect = gen11_port_hotplug_long_detect; if (trigger_tc) { u32 dig_hotplug_reg; @@ -2200,7 +2140,7 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tc, dig_hotplug_reg, dev_priv->hotplug.hpd, - long_pulse_detect); + gen11_port_hotplug_long_detect); } if (trigger_tbt) { @@ -2212,7 +2152,7 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tbt, dig_hotplug_reg, dev_priv->hotplug.hpd, - long_pulse_detect); + gen11_port_hotplug_long_detect); } if (pin_mask) @@ -3047,6 +2987,18 @@ static u32 intel_hpd_enabled_irqs(struct drm_i915_private *dev_priv, return enabled_irqs; } +static u32 intel_hpd_hotplug_irqs(struct drm_i915_private *dev_priv, + const u32 hpd[HPD_NUM_PINS]) +{ + struct intel_encoder *encoder; + u32 hotplug_irqs = 0; + + for_each_intel_encoder(&dev_priv->drm, encoder) + hotplug_irqs |= hpd[encoder->hpd_pin]; + + return hotplug_irqs; +} + static void ibx_hpd_detection_setup(struct drm_i915_private *dev_priv) { u32 hotplug; @@ -3076,50 +3028,50 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - if (HAS_PCH_IBX(dev_priv)) - hotplug_irqs = SDE_HOTPLUG_MASK; - else - hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.pch_hpd); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); ibx_hpd_detection_setup(dev_priv); } -static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv, - u32 ddi_hotplug_enable_mask, - u32 tc_hotplug_enable_mask) +static void icp_ddi_hpd_detection_setup(struct drm_i915_private *dev_priv, + u32 enable_mask) { u32 hotplug; hotplug = I915_READ(SHOTPLUG_CTL_DDI); - hotplug |= ddi_hotplug_enable_mask; + hotplug |= enable_mask; I915_WRITE(SHOTPLUG_CTL_DDI, hotplug); +} - if (tc_hotplug_enable_mask) { - hotplug = I915_READ(SHOTPLUG_CTL_TC); - hotplug |= tc_hotplug_enable_mask; - I915_WRITE(SHOTPLUG_CTL_TC, hotplug); - } +static void icp_tc_hpd_detection_setup(struct drm_i915_private *dev_priv, + u32 enable_mask) +{ + u32 hotplug; + + hotplug = I915_READ(SHOTPLUG_CTL_TC); + hotplug |= enable_mask; + I915_WRITE(SHOTPLUG_CTL_TC, hotplug); } static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, - u32 sde_ddi_mask, u32 sde_tc_mask, u32 ddi_enable_mask, u32 tc_enable_mask) { u32 hotplug_irqs, enabled_irqs; - hotplug_irqs = sde_ddi_mask | sde_tc_mask; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.pch_hpd); if (INTEL_PCH_TYPE(dev_priv) <= PCH_TGP) I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - icp_hpd_detection_setup(dev_priv, ddi_enable_mask, tc_enable_mask); + icp_ddi_hpd_detection_setup(dev_priv, ddi_enable_mask); + if (tc_enable_mask) + icp_tc_hpd_detection_setup(dev_priv, tc_enable_mask); } /* @@ -3129,7 +3081,6 @@ static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, - SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1)); } @@ -3141,7 +3092,6 @@ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, - SDE_DDI_MASK_TGP, 0, TGP_DDI_HPD_ENABLE_MASK, 0); } @@ -3153,14 +3103,18 @@ static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) hotplug |= GEN11_HOTPLUG_CTL_ENABLE(PORT_TC1) | GEN11_HOTPLUG_CTL_ENABLE(PORT_TC2) | GEN11_HOTPLUG_CTL_ENABLE(PORT_TC3) | - GEN11_HOTPLUG_CTL_ENABLE(PORT_TC4); + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC4) | + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC5) | + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC6); I915_WRITE(GEN11_TC_HOTPLUG_CTL, hotplug); hotplug = I915_READ(GEN11_TBT_HOTPLUG_CTL); hotplug |= GEN11_HOTPLUG_CTL_ENABLE(PORT_TC1) | GEN11_HOTPLUG_CTL_ENABLE(PORT_TC2) | GEN11_HOTPLUG_CTL_ENABLE(PORT_TC3) | - GEN11_HOTPLUG_CTL_ENABLE(PORT_TC4); + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC4) | + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC5) | + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC6); I915_WRITE(GEN11_TBT_HOTPLUG_CTL, hotplug); } @@ -3170,7 +3124,7 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) u32 val; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); - hotplug_irqs = GEN11_DE_TC_HOTPLUG_MASK | GEN11_DE_TBT_HOTPLUG_MASK; + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.hpd); val = I915_READ(GEN11_DE_HPD_IMR); val &= ~hotplug_irqs; @@ -3181,10 +3135,10 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) gen11_hpd_detection_setup(dev_priv); if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) - icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP, + icp_hpd_irq_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, TGP_TC_HPD_ENABLE_MASK); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP, + icp_hpd_irq_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE_MASK); } @@ -3220,8 +3174,8 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); - hotplug_irqs = SDE_HOTPLUG_MASK_SPT; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.pch_hpd); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); @@ -3248,22 +3202,13 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - if (INTEL_GEN(dev_priv) >= 8) { - hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.hpd); + if (INTEL_GEN(dev_priv) >= 8) bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); - } else if (INTEL_GEN(dev_priv) >= 7) { - hotplug_irqs = DE_DP_A_HOTPLUG_IVB; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); - - ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); - } else { - hotplug_irqs = DE_DP_A_HOTPLUG; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); - + else ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); - } ilk_hpd_detection_setup(dev_priv); @@ -3312,7 +3257,7 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) u32 hotplug_irqs, enabled_irqs; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); - hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK; + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.hpd); bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); @@ -3533,17 +3478,18 @@ static void icp_irq_postinstall(struct drm_i915_private *dev_priv) gen3_assert_iir_is_zero(&dev_priv->uncore, SDEIIR); I915_WRITE(SDEIMR, ~mask); - if (HAS_PCH_TGP(dev_priv)) - icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, - TGP_TC_HPD_ENABLE_MASK); - else if (HAS_PCH_JSP(dev_priv)) - icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0); - else if (HAS_PCH_MCC(dev_priv)) - icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, - ICP_TC_HPD_ENABLE(PORT_TC1)); - else - icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, - ICP_TC_HPD_ENABLE_MASK); + if (HAS_PCH_TGP(dev_priv)) { + icp_ddi_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK); + icp_tc_hpd_detection_setup(dev_priv, TGP_TC_HPD_ENABLE_MASK); + } else if (HAS_PCH_JSP(dev_priv)) { + icp_ddi_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK); + } else if (HAS_PCH_MCC(dev_priv)) { + icp_ddi_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK); + icp_tc_hpd_detection_setup(dev_priv, ICP_TC_HPD_ENABLE(PORT_TC1)); + } else { + icp_ddi_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK); + icp_tc_hpd_detection_setup(dev_priv, ICP_TC_HPD_ENABLE_MASK); + } } static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8d8db9ff0a48..7f139ea4a90b 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -102,6 +102,11 @@ i915_param_named(psr_safest_params, bool, 0400, "is helpful to detect if PSR issues are related to bad values set in " " VBT. (0=use VBT parameters, 1=use safest parameters)"); +i915_param_named_unsafe(enable_psr2_sel_fetch, bool, 0400, + "Enable PSR2 selective fetch " + "(0=disabled, 1=enabled) " + "Default: 0"); + i915_param_named_unsafe(force_probe, charp, 0400, "Force probe the driver for specified devices. " "See CONFIG_DRM_I915_FORCE_PROBE for details."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 53fb5ba8fbed..330c03e2b4f7 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -54,6 +54,7 @@ struct drm_printer; param(int, enable_fbc, -1, 0600) \ param(int, enable_psr, -1, 0600) \ param(bool, psr_safest_params, false, 0600) \ + param(bool, enable_psr2_sel_fetch, false, 0600) \ param(int, disable_power_well, -1, 0400) \ param(int, enable_ips, 1, 0600) \ param(int, invert_brightness, 0, 0600) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2338f92ce490..366ddfc8df6b 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -890,6 +890,7 @@ static const struct intel_device_info rkl_info = { .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), .require_force_probe = 1, + .display.has_hti = 1, .display.has_psr_hw_tracking = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0), diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c6f6370283cf..e94976976571 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1195,24 +1195,39 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) struct i915_gem_engines_iter it; struct i915_gem_context *ctx = stream->ctx; struct intel_context *ce; - int err; + struct i915_gem_ww_ctx ww; + int err = -ENODEV; for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { if (ce->engine != stream->engine) /* first match! */ continue; - /* - * As the ID is the gtt offset of the context's vma we - * pin the vma to ensure the ID remains fixed. - */ - err = intel_context_pin(ce); - if (err == 0) { - stream->pinned_ctx = ce; - break; - } + err = 0; + break; } i915_gem_context_unlock_engines(ctx); + if (err) + return ERR_PTR(err); + + i915_gem_ww_ctx_init(&ww, true); +retry: + /* + * As the ID is the gtt offset of the context's vma we + * pin the vma to ensure the ID remains fixed. + */ + err = intel_context_pin_ww(ce, &ww); + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (err) + return ERR_PTR(err); + + stream->pinned_ctx = ce; return stream->pinned_ctx; } @@ -1923,15 +1938,22 @@ emit_oa_config(struct i915_perf_stream *stream, { struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; int err; vma = get_oa_vma(stream, oa_config); if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (err) + goto err; + + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) - goto err_vma_put; + goto err; intel_engine_pm_get(ce->engine); rq = i915_request_create(ce); @@ -1953,11 +1975,9 @@ emit_oa_config(struct i915_perf_stream *stream, goto err_add_request; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, 0); if (!err) err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); if (err) goto err_add_request; @@ -1971,7 +1991,14 @@ err_add_request: i915_request_add(rq); err_vma_unpin: i915_vma_unpin(vma); -err_vma_put: +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + + i915_gem_ww_ctx_fini(&ww); i915_vma_put(vma); return err; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4e796ff4d7d0..d805d4da6181 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1382,7 +1382,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define DPIO_CMNRST (1 << 0) #define DPIO_PHY(pipe) ((pipe) >> 1) -#define DPIO_PHY_IOSF_PORT(phy) (dev_priv->dpio_phy_iosf_port[phy]) /* * Per pipe/PLL DPIO regs @@ -1898,6 +1897,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define PWR_DOWN_LN_3_1_0 (0xb << 4) #define PWR_DOWN_LN_MASK (0xf << 4) #define PWR_DOWN_LN_SHIFT 4 +#define EDP4K2K_MODE_OVRD_EN (1 << 3) +#define EDP4K2K_MODE_OVRD_OPTIMIZED (1 << 2) #define ICL_PORT_CL_DW12(phy) _MMIO(_ICL_PORT_CL_DW(12, phy)) #define ICL_LANE_ENABLE_AUX (1 << 0) @@ -2919,6 +2920,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MBUS_BBOX_CTL_S1 _MMIO(0x45040) #define MBUS_BBOX_CTL_S2 _MMIO(0x45044) +#define HDPORT_STATE _MMIO(0x45050) +#define HDPORT_DPLL_USED_MASK REG_GENMASK(14, 12) +#define HDPORT_PHY_USED_DP(phy) REG_BIT(2 * (phy) + 2) +#define HDPORT_PHY_USED_HDMI(phy) REG_BIT(2 * (phy) + 1) +#define HDPORT_ENABLED REG_BIT(0) + /* Make render/texture TLB fetches lower priorty than associated data * fetches. This is not turned on by default */ @@ -7752,32 +7759,20 @@ enum { #define GEN11_DE_HPD_IMR _MMIO(0x44474) #define GEN11_DE_HPD_IIR _MMIO(0x44478) #define GEN11_DE_HPD_IER _MMIO(0x4447c) -#define GEN12_TC6_HOTPLUG (1 << 21) -#define GEN12_TC5_HOTPLUG (1 << 20) -#define GEN11_TC4_HOTPLUG (1 << 19) -#define GEN11_TC3_HOTPLUG (1 << 18) -#define GEN11_TC2_HOTPLUG (1 << 17) -#define GEN11_TC1_HOTPLUG (1 << 16) #define GEN11_TC_HOTPLUG(tc_port) (1 << ((tc_port) + 16)) -#define GEN11_DE_TC_HOTPLUG_MASK (GEN12_TC6_HOTPLUG | \ - GEN12_TC5_HOTPLUG | \ - GEN11_TC4_HOTPLUG | \ - GEN11_TC3_HOTPLUG | \ - GEN11_TC2_HOTPLUG | \ - GEN11_TC1_HOTPLUG) -#define GEN12_TBT6_HOTPLUG (1 << 5) -#define GEN12_TBT5_HOTPLUG (1 << 4) -#define GEN11_TBT4_HOTPLUG (1 << 3) -#define GEN11_TBT3_HOTPLUG (1 << 2) -#define GEN11_TBT2_HOTPLUG (1 << 1) -#define GEN11_TBT1_HOTPLUG (1 << 0) +#define GEN11_DE_TC_HOTPLUG_MASK (GEN11_TC_HOTPLUG(PORT_TC6) | \ + GEN11_TC_HOTPLUG(PORT_TC5) | \ + GEN11_TC_HOTPLUG(PORT_TC4) | \ + GEN11_TC_HOTPLUG(PORT_TC3) | \ + GEN11_TC_HOTPLUG(PORT_TC2) | \ + GEN11_TC_HOTPLUG(PORT_TC1)) #define GEN11_TBT_HOTPLUG(tc_port) (1 << (tc_port)) -#define GEN11_DE_TBT_HOTPLUG_MASK (GEN12_TBT6_HOTPLUG | \ - GEN12_TBT5_HOTPLUG | \ - GEN11_TBT4_HOTPLUG | \ - GEN11_TBT3_HOTPLUG | \ - GEN11_TBT2_HOTPLUG | \ - GEN11_TBT1_HOTPLUG) +#define GEN11_DE_TBT_HOTPLUG_MASK (GEN11_TBT_HOTPLUG(PORT_TC6) | \ + GEN11_TBT_HOTPLUG(PORT_TC5) | \ + GEN11_TBT_HOTPLUG(PORT_TC4) | \ + GEN11_TBT_HOTPLUG(PORT_TC3) | \ + GEN11_TBT_HOTPLUG(PORT_TC2) | \ + GEN11_TBT_HOTPLUG(PORT_TC1)) #define GEN11_TBT_HOTPLUG_CTL _MMIO(0x44030) #define GEN11_TC_HOTPLUG_CTL _MMIO(0x44038) @@ -7870,6 +7865,7 @@ enum { # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2) #define CHICKEN_PAR1_1 _MMIO(0x42080) +#define DIS_RAM_BYPASS_PSR2_MAN_TRACK (1 << 16) #define SKL_DE_COMPRESSED_HASH_MODE (1 << 15) #define DPA_MASK_VBLANK_SRD (1 << 15) #define FORCE_ARB_IDLE_PLANES (1 << 14) @@ -8711,6 +8707,7 @@ enum { #define PCH_GMBUSUNIT_CLOCK_GATE_DISABLE (1 << 31) #define PCH_DPLUNIT_CLOCK_GATE_DISABLE (1 << 30) #define PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1 << 29) +#define PCH_DPMGUNIT_CLOCK_GATE_DISABLE (1 << 15) #define PCH_CPUNIT_CLOCK_GATE_DISABLE (1 << 14) #define CNP_PWM_CGE_GATING_DISABLE (1 << 13) #define PCH_LP_PARTITION_LEVEL_DISABLE (1 << 12) @@ -9217,8 +9214,8 @@ enum { #define DISPLAY_IPS_CONTROL 0x19 #define TGL_PCODE_TCCOLD 0x26 #define TGL_PCODE_EXIT_TCCOLD_DATA_L_EXIT_FAILED REG_BIT(0) -#define TGL_PCODE_EXIT_TCCOLD_DATA_H_BLOCK_REQ 0 -#define TGL_PCODE_EXIT_TCCOLD_DATA_H_UNBLOCK_REQ REG_BIT(0) +#define TGL_PCODE_EXIT_TCCOLD_DATA_L_BLOCK_REQ 0 +#define TGL_PCODE_EXIT_TCCOLD_DATA_L_UNBLOCK_REQ REG_BIT(0) /* See also IPS_CTL */ #define IPS_PCODE_CONTROL (1 << 30) #define HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL 0x1A @@ -9305,6 +9302,7 @@ enum { #define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) #define GEN10_SAMPLER_MODE _MMIO(0xE18C) +#define ENABLE_SMALLPL REG_BIT(15) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) /* IVYBRIDGE DPF */ @@ -10277,12 +10275,18 @@ enum skl_power_gate { #define ICL_DPCLKA_CFGCR0 _MMIO(0x164280) #define ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy) (1 << _PICK(phy, 10, 11, 24)) +#define RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy) REG_BIT((phy) + 10) #define ICL_DPCLKA_CFGCR0_TC_CLK_OFF(tc_port) (1 << ((tc_port) < PORT_TC4 ? \ (tc_port) + 12 : \ (tc_port) - PORT_TC4 + 21)) #define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy) ((phy) * 2) #define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy) (3 << ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) #define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll, phy) ((pll) << ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) +#define RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy) _PICK(phy, 0, 2, 4, 27) +#define RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy) \ + (3 << RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) +#define RKL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll, phy) \ + ((pll) << RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) /* CNL PLL */ #define DPLL0_ENABLE 0x46010 @@ -10503,19 +10507,21 @@ enum skl_power_gate { #define _TGL_DPLL0_CFGCR0 0x164284 #define _TGL_DPLL1_CFGCR0 0x16428C -/* TODO: add DPLL4 */ #define _TGL_TBTPLL_CFGCR0 0x16429C #define TGL_DPLL_CFGCR0(pll) _MMIO_PLL3(pll, _TGL_DPLL0_CFGCR0, \ _TGL_DPLL1_CFGCR0, \ _TGL_TBTPLL_CFGCR0) +#define RKL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _TGL_DPLL0_CFGCR0, \ + _TGL_DPLL1_CFGCR0) #define _TGL_DPLL0_CFGCR1 0x164288 #define _TGL_DPLL1_CFGCR1 0x164290 -/* TODO: add DPLL4 */ #define _TGL_TBTPLL_CFGCR1 0x1642A0 #define TGL_DPLL_CFGCR1(pll) _MMIO_PLL3(pll, _TGL_DPLL0_CFGCR1, \ _TGL_DPLL1_CFGCR1, \ _TGL_TBTPLL_CFGCR1) +#define RKL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _TGL_DPLL0_CFGCR1, \ + _TGL_DPLL1_CFGCR1) #define _DKL_PHY1_BASE 0x168000 #define _DKL_PHY2_BASE 0x169000 @@ -12336,4 +12342,10 @@ enum skl_power_gate { #define DSB_ENABLE (1 << 31) #define DSB_STATUS (1 << 0) +#define TGL_ROOT_DEVICE_ID 0x9A00 +#define TGL_ROOT_DEVICE_MASK 0xFF00 +#define TGL_ROOT_DEVICE_SKU_MASK 0xF +#define TGL_ROOT_DEVICE_SKU_ULX 0x2 +#define TGL_ROOT_DEVICE_SKU_ULT 0x4 + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 781a6783affe..0e813819b041 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -31,6 +31,7 @@ #include <linux/sched/signal.h> #include "gem/i915_gem_context.h" +#include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" #include "gt/intel_ring.h" #include "gt/intel_rps.h" @@ -186,48 +187,34 @@ static void irq_execute_cb_hook(struct irq_work *wrk) irq_execute_cb(wrk); } -static void __notify_execute_cb(struct i915_request *rq) +static __always_inline void +__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk)) { struct execute_cb *cb, *cn; - lockdep_assert_held(&rq->lock); - - GEM_BUG_ON(!i915_request_is_active(rq)); if (llist_empty(&rq->execute_cb)) return; - llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode) - irq_work_queue(&cb->work); - - /* - * XXX Rollback on __i915_request_unsubmit() - * - * In the future, perhaps when we have an active time-slicing scheduler, - * it will be interesting to unsubmit parallel execution and remove - * busywaits from the GPU until their master is restarted. This is - * quite hairy, we have to carefully rollback the fence and do a - * preempt-to-idle cycle on the target engine, all the while the - * master execute_cb may refire. - */ - init_llist_head(&rq->execute_cb); + llist_for_each_entry_safe(cb, cn, + llist_del_all(&rq->execute_cb), + work.llnode) + fn(&cb->work); } -static inline void -remove_from_client(struct i915_request *request) +static void __notify_execute_cb_irq(struct i915_request *rq) { - struct drm_i915_file_private *file_priv; + __notify_execute_cb(rq, irq_work_queue); +} - if (!READ_ONCE(request->file_priv)) - return; +static bool irq_work_imm(struct irq_work *wrk) +{ + wrk->func(wrk); + return false; +} - rcu_read_lock(); - file_priv = xchg(&request->file_priv, NULL); - if (file_priv) { - spin_lock(&file_priv->mm.lock); - list_del(&request->client_link); - spin_unlock(&file_priv->mm.lock); - } - rcu_read_unlock(); +static void __notify_execute_cb_imm(struct i915_request *rq) +{ + __notify_execute_cb(rq, irq_work_imm); } static void free_capture_list(struct i915_request *request) @@ -274,9 +261,16 @@ static void remove_from_engine(struct i915_request *rq) locked = engine; } list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + spin_unlock_irq(&locked->active.lock); + + __notify_execute_cb_imm(rq); } bool i915_request_retire(struct i915_request *rq) @@ -288,6 +282,7 @@ bool i915_request_retire(struct i915_request *rq) GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); trace_i915_request_retire(rq); + i915_request_mark_complete(rq); /* * We know the GPU must have read the request to have @@ -305,32 +300,30 @@ bool i915_request_retire(struct i915_request *rq) __i915_request_fill(rq, POISON_FREE); rq->ring->head = rq->postfix; + if (!i915_request_signaled(rq)) { + spin_lock_irq(&rq->lock); + dma_fence_signal_locked(&rq->fence); + spin_unlock_irq(&rq->lock); + } + + if (i915_request_has_waitboost(rq)) { + GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); + atomic_dec(&rq->engine->gt->rps.num_waiters); + } + /* * We only loosely track inflight requests across preemption, * and so we may find ourselves attempting to retire a _completed_ * request that we have removed from the HW and put back on a run * queue. + * + * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be + * after removing the breadcrumb and signaling it, so that we do not + * inadvertently attach the breadcrumb to a completed request. */ remove_from_engine(rq); - - spin_lock_irq(&rq->lock); - i915_request_mark_complete(rq); - if (!i915_request_signaled(rq)) - dma_fence_signal_locked(&rq->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) - i915_request_cancel_breadcrumb(rq); - if (i915_request_has_waitboost(rq)) { - GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); - atomic_dec(&rq->engine->gt->rps.num_waiters); - } - if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - __notify_execute_cb(rq); - } GEM_BUG_ON(!llist_empty(&rq->execute_cb)); - spin_unlock_irq(&rq->lock); - remove_from_client(rq); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ intel_context_exit(rq->context); @@ -357,12 +350,6 @@ void i915_request_retire_upto(struct i915_request *rq) } while (i915_request_retire(tmp) && tmp != rq); } -static void __llist_add(struct llist_node *node, struct llist_head *head) -{ - node->next = head->first; - head->first = node; -} - static struct i915_request * const * __engine_active(struct intel_engine_cs *engine) { @@ -460,18 +447,24 @@ __await_execution(struct i915_request *rq, cb->work.func = irq_execute_cb_hook; } - spin_lock_irq(&signal->lock); - if (i915_request_is_active(signal) || __request_in_flight(signal)) { - if (hook) { - hook(rq, &signal->fence); - i915_request_put(signal); - } - i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); - } else { - __llist_add(&cb->work.llnode, &signal->execute_cb); + /* + * Register the callback first, then see if the signaler is already + * active. This ensures that if we race with the + * __notify_execute_cb from i915_request_submit() and we are not + * included in that list, we get a second bite of the cherry and + * execute it ourselves. After this point, a future + * i915_request_submit() will notify us. + * + * In i915_request_retire() we set the ACTIVE bit on a completed + * request (then flush the execute_cb). So by registering the + * callback first, then checking the ACTIVE bit, we serialise with + * the completed/retired request. + */ + if (llist_add(&cb->work.llnode, &signal->execute_cb)) { + if (i915_request_is_active(signal) || + __request_in_flight(signal)) + __notify_execute_cb_imm(signal); } - spin_unlock_irq(&signal->lock); return 0; } @@ -549,8 +542,13 @@ bool __i915_request_submit(struct i915_request *request) if (i915_request_completed(request)) goto xfer; + if (unlikely(intel_context_is_closed(request->context) && + !intel_engine_has_heartbeat(engine))) + intel_context_set_banned(request->context); + if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); + if (unlikely(fatal_error(request->fence.error))) __i915_request_skip(request); @@ -587,19 +585,21 @@ xfer: clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); } - /* We may be recursing from the signal callback of another i915 fence */ - if (!i915_request_signaled(request)) { - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - - __notify_execute_cb(request); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &request->fence.flags) && - !i915_request_enable_breadcrumb(request)) - intel_engine_signal_breadcrumbs(engine); + /* + * XXX Rollback bonded-execution on __i915_request_unsubmit()? + * + * In the future, perhaps when we have an active time-slicing scheduler, + * it will be interesting to unsubmit parallel execution and remove + * busywaits from the GPU until their master is restarted. This is + * quite hairy, we have to carefully rollback the fence and do a + * preempt-to-idle cycle on the target engine, all the while the + * master execute_cb may refire. + */ + __notify_execute_cb_irq(request); - spin_unlock(&request->lock); - GEM_BUG_ON(!llist_empty(&request->execute_cb)); - } + /* We may be recursing from the signal callback of another i915 fence */ + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + i915_request_enable_breadcrumb(request); return result; } @@ -621,27 +621,27 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; + /* + * Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); /* - * Only unwind in reverse order, required so that the per-context list - * is kept in seqno/ring order. + * Before we remove this breadcrumb from the signal list, we have + * to ensure that a concurrent dma_fence_enable_signaling() does not + * attach itself. We first mark the request as no longer active and + * make sure that is visible to other cores, and then remove the + * breadcrumb if attached. */ - - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) i915_request_cancel_breadcrumb(request); - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); - - spin_unlock(&request->lock); - /* We've already spun, don't charge on resubmitting. */ if (request->sched.semaphores && i915_request_started(request)) request->sched.semaphores = 0; @@ -778,7 +778,6 @@ static void __i915_request_ctor(void *arg) dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); - rq->file_priv = NULL; rq->capture_list = NULL; init_llist_head(&rq->execute_cb); @@ -868,7 +867,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) /* No zalloc, everything must be cleared after use */ rq->batch = NULL; - GEM_BUG_ON(rq->file_priv); GEM_BUG_ON(rq->capture_list); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); @@ -1661,7 +1659,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) return this_cpu != cpu; } -static bool __i915_spin_request(const struct i915_request * const rq, int state) +static bool __i915_spin_request(struct i915_request * const rq, int state) { unsigned long timeout_ns; unsigned int cpu; @@ -1694,7 +1692,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state) timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns); timeout_ns += local_clock_ns(&cpu); do { - if (i915_request_completed(rq)) + if (dma_fence_is_signaled(&rq->fence)) return true; if (signal_pending_state(state, current)) @@ -1718,7 +1716,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) { struct request_wait *wait = container_of(cb, typeof(*wait), cb); - wake_up_process(wait->tsk); + wake_up_process(fetch_and_zero(&wait->tsk)); } /** @@ -1787,10 +1785,8 @@ long i915_request_wait(struct i915_request *rq, * duration, which we currently lack. */ if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && - __i915_spin_request(rq, state)) { - dma_fence_signal(&rq->fence); + __i915_spin_request(rq, state)) goto out; - } /* * This client is about to stall waiting for the GPU. In many cases @@ -1804,25 +1800,36 @@ long i915_request_wait(struct i915_request *rq, * but at a cost of spending more power processing the workload * (bad for battery). */ - if (flags & I915_WAIT_PRIORITY) { - if (!i915_request_started(rq) && - INTEL_GEN(rq->engine->i915) >= 6) - intel_rps_boost(rq); - } + if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq)) + intel_rps_boost(rq); wait.tsk = current; if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) goto out; + /* + * Flush the submission tasklet, but only if it may help this request. + * + * We sometimes experience some latency between the HW interrupts and + * tasklet execution (mostly due to ksoftirqd latency, but it can also + * be due to lazy CS events), so lets run the tasklet manually if there + * is a chance it may submit this request. If the request is not ready + * to run, as it is waiting for other fences to be signaled, flushing + * the tasklet is busy work without any advantage for this client. + * + * If the HW is being lazy, this is the last chance before we go to + * sleep to catch any pending events. We will check periodically in + * the heartbeat to flush the submission tasklets as a last resort + * for unhappy HW. + */ + if (i915_request_is_ready(rq)) + intel_engine_flush_submission(rq->engine); + for (;;) { set_current_state(state); - if (i915_request_completed(rq)) { - dma_fence_signal(&rq->fence); + if (dma_fence_is_signaled(&rq->fence)) break; - } - - intel_engine_flush_submission(rq->engine); if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1838,7 +1845,9 @@ long i915_request_wait(struct i915_request *rq, } __set_current_state(TASK_RUNNING); - dma_fence_remove_callback(&rq->fence, &wait.cb); + if (READ_ONCE(wait.tsk)) + dma_fence_remove_callback(&rq->fence, &wait.cb); + GEM_BUG_ON(!list_empty(&wait.cb.node)); out: mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 590762820761..16b721080195 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -284,10 +284,6 @@ struct i915_request { /** timeline->request entry for this request */ struct list_head link; - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_link; - I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; @@ -365,10 +361,6 @@ void i915_request_submit(struct i915_request *request); void __i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request); -/* Note: part of the intel_breadcrumbs family */ -bool i915_request_enable_breadcrumb(struct i915_request *request); -void i915_request_cancel_breadcrumb(struct i915_request *request); - long i915_request_wait(struct i915_request *rq, unsigned int flags, long timeout) diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index ed2be3489f8e..7b64e7137270 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -34,17 +34,25 @@ static void i915_save_display(struct drm_i915_private *dev_priv) { + struct pci_dev *pdev = dev_priv->drm.pdev; + /* Display arbitration control */ if (INTEL_GEN(dev_priv) <= 4) dev_priv->regfile.saveDSPARB = I915_READ(DSPARB); - /* save FBC interval */ - if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv)) - dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL); + if (IS_GEN(dev_priv, 4)) + pci_read_config_word(pdev, GCDGMBUS, + &dev_priv->regfile.saveGCDGMBUS); } static void i915_restore_display(struct drm_i915_private *dev_priv) { + struct pci_dev *pdev = dev_priv->drm.pdev; + + if (IS_GEN(dev_priv, 4)) + pci_write_config_word(pdev, GCDGMBUS, + dev_priv->regfile.saveGCDGMBUS); + /* Display arbitration */ if (INTEL_GEN(dev_priv) <= 4) I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB); @@ -52,31 +60,17 @@ static void i915_restore_display(struct drm_i915_private *dev_priv) /* only restore FBC info on the platform that supports FBC*/ intel_fbc_global_disable(dev_priv); - /* restore FBC interval */ - if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv)) - I915_WRITE(FBC_CONTROL, dev_priv->regfile.saveFBC_CONTROL); - intel_vga_redisable(dev_priv); + + intel_gmbus_reset(dev_priv); } int i915_save_state(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev_priv->drm.pdev; int i; i915_save_display(dev_priv); - if (IS_GEN(dev_priv, 4)) - pci_read_config_word(pdev, GCDGMBUS, - &dev_priv->regfile.saveGCDGMBUS); - - /* Cache mode state */ - if (INTEL_GEN(dev_priv) < 7) - dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); - - /* Memory Arbitration state */ - dev_priv->regfile.saveMI_ARB_STATE = I915_READ(MI_ARB_STATE); - /* Scratch space */ if (IS_GEN(dev_priv, 2) && IS_MOBILE(dev_priv)) { for (i = 0; i < 7; i++) { @@ -102,22 +96,10 @@ int i915_save_state(struct drm_i915_private *dev_priv) int i915_restore_state(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev_priv->drm.pdev; int i; - if (IS_GEN(dev_priv, 4)) - pci_write_config_word(pdev, GCDGMBUS, - dev_priv->regfile.saveGCDGMBUS); i915_restore_display(dev_priv); - /* Cache mode state */ - if (INTEL_GEN(dev_priv) < 7) - I915_WRITE(CACHE_MODE_0, dev_priv->regfile.saveCACHE_MODE_0 | - 0xffff0000); - - /* Memory arbitration state */ - I915_WRITE(MI_ARB_STATE, dev_priv->regfile.saveMI_ARB_STATE | 0xffff0000); - /* Scratch space */ if (IS_GEN(dev_priv, 2) && IS_MOBILE(dev_priv)) { for (i = 0; i < 7; i++) { @@ -138,7 +120,5 @@ int i915_restore_state(struct drm_i915_private *dev_priv) I915_WRITE(SWF3(i), dev_priv->regfile.saveSWF3[i]); } - intel_gmbus_reset(dev_priv); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bc64f773dcdb..ffb5287e055a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -291,6 +291,8 @@ i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_vma_work { struct dma_fence_work base; + struct i915_address_space *vm; + struct i915_vm_pt_stash stash; struct i915_vma *vma; struct drm_i915_gem_object *pinned; struct i915_sw_dma_fence_cb cb; @@ -302,13 +304,10 @@ static int __vma_bind(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); struct i915_vma *vma = vw->vma; - int err; - - err = vma->ops->bind_vma(vma->vm, vma, vw->cache_level, vw->flags); - if (err) - atomic_or(I915_VMA_ERROR, &vma->flags); - return err; + vma->ops->bind_vma(vw->vm, &vw->stash, + vma, vw->cache_level, vw->flags); + return 0; } static void __vma_release(struct dma_fence_work *work) @@ -317,6 +316,9 @@ static void __vma_release(struct dma_fence_work *work) if (vw->pinned) __i915_gem_object_unpin_pages(vw->pinned); + + i915_vm_free_pt_stash(vw->vm, &vw->stash); + i915_vm_put(vw->vm); } static const struct dma_fence_work_ops bind_ops = { @@ -376,7 +378,6 @@ int i915_vma_bind(struct i915_vma *vma, { u32 bind_flags; u32 vma_flags; - int ret; GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->size > vma->node.size); @@ -433,9 +434,7 @@ int i915_vma_bind(struct i915_vma *vma, work->pinned = vma->obj; } } else { - ret = vma->ops->bind_vma(vma->vm, vma, cache_level, bind_flags); - if (ret) - return ret; + vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } atomic_or(bind_flags, &vma->flags); @@ -853,13 +852,19 @@ static void vma_unbind_pages(struct i915_vma *vma) __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); } -int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u64 size, u64 alignment, u64 flags) { struct i915_vma_work *work = NULL; intel_wakeref_t wakeref = 0; unsigned int bound; int err; +#ifdef CONFIG_PROVE_LOCKING + if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex)) + WARN_ON(!ww); +#endif + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); @@ -873,16 +878,32 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (err) return err; + if (flags & PIN_GLOBAL) + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + if (flags & vma->vm->bind_async_flags) { work = i915_vma_work(); if (!work) { err = -ENOMEM; - goto err_pages; + goto err_rpm; } - } - if (flags & PIN_GLOBAL) - wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + work->vm = i915_vm_get(vma->vm); + + /* Allocate enough page directories to used PTE */ + if (vma->vm->allocate_va_range) { + err = i915_vm_alloc_pt_stash(vma->vm, + &work->stash, + vma->size); + if (err) + goto err_fence; + + err = i915_vm_pin_pt_stash(vma->vm, + &work->stash); + if (err) + goto err_fence; + } + } /* * Differentiate between user/kernel vma inside the aliasing-ppgtt. @@ -971,9 +992,9 @@ err_unlock: err_fence: if (work) dma_fence_work_commit_imm(&work->base); +err_rpm: if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); -err_pages: vma_put_pages(vma); return err; } @@ -989,7 +1010,8 @@ static void flush_idle_contexts(struct intel_gt *gt) intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); } -int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) +int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u32 align, unsigned int flags) { struct i915_address_space *vm = vma->vm; int err; @@ -997,7 +1019,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); do { - err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); + err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); if (err != -ENOSPC) { if (!err) { err = i915_vma_wait_for_bind(vma); @@ -1167,6 +1189,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) list_del(&vma->obj->userfault_link); } +static int +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma) +{ + return __i915_request_await_exclusive(rq, &vma->active); +} + int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) { int err; @@ -1174,8 +1202,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) GEM_BUG_ON(!i915_vma_is_pinned(vma)); /* Wait for the vma to be bound before we start! */ - err = i915_request_await_active(rq, &vma->active, - I915_ACTIVE_AWAIT_EXCL); + err = __i915_request_await_bind(rq, vma); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index d0d01f909548..5b3a3c653454 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -237,8 +237,17 @@ static inline void i915_vma_unlock(struct i915_vma *vma) } int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); -int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags); +i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u64 size, u64 alignment, u64 flags); + +static inline int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + return i915_vma_pin_ww(vma, NULL, size, alignment, flags); +} + +int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u32 align, unsigned int flags); static inline int i915_vma_pin_count(const struct i915_vma *vma) { diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 40c590db3c76..adc836f15fde 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -346,6 +346,25 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) mask = BIT(INTEL_SUBPLATFORM_PORTF); } + if (IS_TIGERLAKE(i915)) { + struct pci_dev *root, *pdev = i915->drm.pdev; + + root = list_first_entry(&pdev->bus->devices, typeof(*root), bus_list); + + drm_WARN_ON(&i915->drm, mask); + drm_WARN_ON(&i915->drm, (root->device & TGL_ROOT_DEVICE_MASK) != + TGL_ROOT_DEVICE_ID); + + switch (root->device & TGL_ROOT_DEVICE_SKU_MASK) { + case TGL_ROOT_DEVICE_SKU_ULX: + mask = BIT(INTEL_SUBPLATFORM_ULX); + break; + case TGL_ROOT_DEVICE_SKU_ULT: + mask = BIT(INTEL_SUBPLATFORM_ULT); + break; + } + } + GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_BITS); RUNTIME_INFO(i915)->platform_mask[pi] |= mask; @@ -497,6 +516,14 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) S32_MAX), USEC_PER_SEC)); } + + if (!HAS_DISPLAY(dev_priv)) { + dev_priv->drm.driver_features &= ~(DRIVER_MODESET | + DRIVER_ATOMIC); + memset(&info->display, 0, sizeof(info->display)); + memset(runtime->num_sprites, 0, sizeof(runtime->num_sprites)); + memset(runtime->num_scalers, 0, sizeof(runtime->num_scalers)); + } } void intel_driver_caps_print(const struct intel_driver_caps *caps, diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index fd2385457ab6..6a3d607218aa 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -146,6 +146,7 @@ enum intel_ppgtt_type { func(has_gmch); \ func(has_hdcp); \ func(has_hotplug); \ + func(has_hti); \ func(has_ipc); \ func(has_modular_fia); \ func(has_overlay); \ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cfabbe0481ab..34e0d22d456b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -100,12 +100,6 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_MEMORY_WAKE); - - if (IS_SKYLAKE(dev_priv)) { - /* WaDisableDopClockGating */ - I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL) - & ~GEN7_DOP_CLOCK_GATE_ENABLE); - } } static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7142,7 +7136,7 @@ static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(POWERGATE_ENABLE) | vd_pg_enable); /* Wa_1409825376:tgl (pre-prod)*/ - if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0)) + if (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B1)) I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); @@ -7223,12 +7217,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) gen9_init_clock_gating(dev_priv); /* WaDisableSDEUnitClockGating:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + if (IS_KBL_GT_REVID(dev_priv, 0, KBL_REVID_B0)) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaDisableGamClockGating:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + if (IS_KBL_GT_REVID(dev_priv, 0, KBL_REVID_B0)) I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); @@ -7251,6 +7245,10 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) { gen9_init_clock_gating(dev_priv); + /* WaDisableDopClockGating:skl */ + I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL) & + ~GEN7_DOP_CLOCK_GATE_ENABLE); + /* WAC6entrylatency:skl */ I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) | FBC_LLC_FULLY_OPEN); diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 916ccd1c0e96..5b3279262123 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -231,9 +231,21 @@ void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) SB_CRWRDA_NP, reg, &val); } +static u32 vlv_dpio_phy_iosf_port(struct drm_i915_private *i915, enum dpio_phy phy) +{ + /* + * IOSF_PORT_DPIO: VLV x2 PHY (DP/HDMI B and C), CHV x1 PHY (DP/HDMI D) + * IOSF_PORT_DPIO_2: CHV x2 PHY (DP/HDMI B and C) + */ + if (IS_CHERRYVIEW(i915)) + return phy == DPIO_PHY0 ? IOSF_PORT_DPIO_2 : IOSF_PORT_DPIO; + else + return IOSF_PORT_DPIO; +} + u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg) { - int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)]; + u32 port = vlv_dpio_phy_iosf_port(i915, DPIO_PHY(pipe)); u32 val = 0; vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MRD_NP, reg, &val); @@ -252,7 +264,7 @@ u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg) void vlv_dpio_write(struct drm_i915_private *i915, enum pipe pipe, int reg, u32 val) { - int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)]; + u32 port = vlv_dpio_phy_iosf_port(i915, DPIO_PHY(pipe)); vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MWR_NP, reg, &val); } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 8d5a933e6af6..263ffcb832b7 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1993,13 +1993,14 @@ int __intel_wait_for_register_fw(struct intel_uncore *uncore, unsigned int slow_timeout_ms, u32 *out_value) { - u32 reg_value; + u32 reg_value = 0; #define done (((reg_value = intel_uncore_read_fw(uncore, reg)) & mask) == value) int ret; /* Catch any overuse of this function */ might_sleep_if(slow_timeout_ms); GEM_BUG_ON(fast_timeout_us > 20000); + GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms); ret = -ETIMEDOUT; if (fast_timeout_us && fast_timeout_us <= 20000) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 88d400b9df88..23a6132c5f4e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -199,11 +199,52 @@ out: return err; } +static int igt_gem_ww_ctx(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *obj2; + struct i915_gem_ww_ctx ww; + int err = 0; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto put1; + } + + i915_gem_ww_ctx_init(&ww, true); +retry: + /* Lock the objects, twice for good measure (-EALREADY handling) */ + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_lock_interruptible(obj, &ww); + if (!err) + err = i915_gem_object_lock_interruptible(obj2, &ww); + if (!err) + err = i915_gem_object_lock(obj2, &ww); + + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + i915_gem_object_put(obj2); +put1: + i915_gem_object_put(obj); + return err; +} + int i915_gem_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_gem_suspend), SUBTEST(igt_gem_hibernate), + SUBTEST(igt_gem_ww_ctx), }; if (intel_gt_is_wedged(&i915->gt)) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 028baae9631f..f88473d396f4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -536,7 +536,7 @@ int i915_gem_evict_mock_selftests(void) with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = i915_subtests(tests, &i915->gt); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 0016ffc7d914..c53a222e3dec 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -172,35 +172,45 @@ static int igt_ppgtt_alloc(void *arg) /* Check we can allocate the entire range */ for (size = 4096; size <= limit; size <<= 2) { - err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, size); + struct i915_vm_pt_stash stash = {}; + + err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size); + if (err) + goto err_ppgtt_cleanup; + + err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); if (err) { - if (err == -ENOMEM) { - pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n", - size, ilog2(size)); - err = 0; /* virtual space too large! */ - } + i915_vm_free_pt_stash(&ppgtt->vm, &stash); goto err_ppgtt_cleanup; } + ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, size); cond_resched(); ppgtt->vm.clear_range(&ppgtt->vm, 0, size); + + i915_vm_free_pt_stash(&ppgtt->vm, &stash); } /* Check we can incrementally allocate the entire range */ for (last = 0, size = 4096; size <= limit; last = size, size <<= 2) { - err = ppgtt->vm.allocate_va_range(&ppgtt->vm, - last, size - last); + struct i915_vm_pt_stash stash = {}; + + err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size - last); + if (err) + goto err_ppgtt_cleanup; + + err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); if (err) { - if (err == -ENOMEM) { - pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n", - last, size - last, ilog2(size)); - err = 0; /* virtual space too large! */ - } + i915_vm_free_pt_stash(&ppgtt->vm, &stash); goto err_ppgtt_cleanup; } + ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, + last, size - last); cond_resched(); + + i915_vm_free_pt_stash(&ppgtt->vm, &stash); } err_ppgtt_cleanup: @@ -284,9 +294,23 @@ static int lowlevel_hole(struct i915_address_space *vm, break; } - if (vm->allocate_va_range && - vm->allocate_va_range(vm, addr, BIT_ULL(size))) - break; + if (vm->allocate_va_range) { + struct i915_vm_pt_stash stash = {}; + + if (i915_vm_alloc_pt_stash(vm, &stash, + BIT_ULL(size))) + break; + + if (i915_vm_pin_pt_stash(vm, &stash)) { + i915_vm_free_pt_stash(vm, &stash); + break; + } + + vm->allocate_va_range(vm, &stash, + addr, BIT_ULL(size)); + + i915_vm_free_pt_stash(vm, &stash); + } mock_vma->pages = obj->mm.pages; mock_vma->node.size = BIT_ULL(size); @@ -1703,7 +1727,7 @@ int i915_gem_gtt_mock_selftests(void) mock_fini_ggtt(ggtt); kfree(ggtt); out_put: - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } @@ -1881,6 +1905,7 @@ static int igt_cs_tlb(void *arg) continue; while (!__igt_timeout(end_time, NULL)) { + struct i915_vm_pt_stash stash = {}; struct i915_request *rq; u64 offset; @@ -1888,10 +1913,6 @@ static int igt_cs_tlb(void *arg) 0, vm->total - PAGE_SIZE, chunk_size, PAGE_SIZE); - err = vm->allocate_va_range(vm, offset, chunk_size); - if (err) - goto end; - memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); vma = i915_vma_instance(bbe, vm, NULL); @@ -1904,6 +1925,20 @@ static int igt_cs_tlb(void *arg) if (err) goto end; + err = i915_vm_alloc_pt_stash(vm, &stash, chunk_size); + if (err) + goto end; + + err = i915_vm_pin_pt_stash(vm, &stash); + if (err) { + i915_vm_free_pt_stash(vm, &stash); + goto end; + } + + vm->allocate_va_range(vm, &stash, offset, chunk_size); + + i915_vm_free_pt_stash(vm, &stash); + /* Prime the TLB with the dummy pages */ for (i = 0; i < count; i++) { vma->node.start = offset + i * PAGE_SIZE; diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index c2d001d9c0ec..debbac660519 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -307,7 +307,7 @@ static int live_noa_gpr(void *arg) } /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */ - scratch = kmap(ce->vm->scratch[0].base.page); + scratch = kmap(__px_page(ce->vm->scratch[0])); memset(scratch, POISON_FREE, PAGE_SIZE); rq = intel_context_create_request(ce); @@ -405,7 +405,7 @@ static int live_noa_gpr(void *arg) out_rq: i915_request_put(rq); out_ce: - kunmap(ce->vm->scratch[0].base.page); + kunmap(__px_page(ce->vm->scratch[0])); intel_context_put(ce); out: stream_destroy(stream); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 57dd6f5122ee..64bbb8288249 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -331,7 +331,7 @@ static int __igt_breadcrumbs_smoketest(void *arg) if (!wait) { i915_sw_fence_commit(submit); heap_fence_put(submit); - err = ENOMEM; + err = -ENOMEM; break; } @@ -527,7 +527,7 @@ int i915_request_mock_selftests(void) with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } @@ -862,6 +862,8 @@ static int live_all_engines(void *arg) goto out_free; } + i915_vma_lock(batch); + idx = 0; for_each_uabi_engine(engine, i915) { request[idx] = intel_engine_create_kernel_request(engine); @@ -872,11 +874,9 @@ static int live_all_engines(void *arg) goto out_request; } - i915_vma_lock(batch); err = i915_request_await_object(request[idx], batch->obj, 0); if (err == 0) err = i915_vma_move_to_active(batch, request[idx], 0); - i915_vma_unlock(batch); GEM_BUG_ON(err); err = engine->emit_bb_start(request[idx], @@ -891,6 +891,8 @@ static int live_all_engines(void *arg) idx++; } + i915_vma_unlock(batch); + idx = 0; for_each_uabi_engine(engine, i915) { if (i915_request_completed(request[idx])) { @@ -981,12 +983,13 @@ static int live_sequential_engines(void *arg) goto out_free; } + i915_vma_lock(batch); request[idx] = intel_engine_create_kernel_request(engine); if (IS_ERR(request[idx])) { err = PTR_ERR(request[idx]); pr_err("%s: Request allocation failed for %s with err=%d\n", __func__, engine->name, err); - goto out_request; + goto out_unlock; } if (prev) { @@ -996,16 +999,14 @@ static int live_sequential_engines(void *arg) i915_request_add(request[idx]); pr_err("%s: Request await failed for %s with err=%d\n", __func__, engine->name, err); - goto out_request; + goto out_unlock; } } - i915_vma_lock(batch); err = i915_request_await_object(request[idx], batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, request[idx], 0); - i915_vma_unlock(batch); GEM_BUG_ON(err); err = engine->emit_bb_start(request[idx], @@ -1020,6 +1021,11 @@ static int live_sequential_engines(void *arg) prev = request[idx]; idx++; + +out_unlock: + i915_vma_unlock(batch); + if (err) + goto out_request; } idx = 0; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index af89c7fc8f59..1b6125e4c1ac 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -841,7 +841,7 @@ int i915_vma_mock_selftests(void) mock_fini_ggtt(ggtt); kfree(ggtt); out_put: - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } @@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg) unsigned int x, y; int err; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 6e80d99048e4..334b0648e253 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -509,7 +509,7 @@ static int igt_lmem_write_cpu(void *arg) if (err) goto out_unpin; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_wc_domain(obj, true); i915_gem_object_unlock(obj); if (err) @@ -522,9 +522,9 @@ static int igt_lmem_write_cpu(void *arg) goto out_unpin; } - /* We want to throw in a random width/align */ - bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32), - sizeof(u32)); + /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */ + bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32)); + GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32))); i = 0; do { @@ -791,7 +791,7 @@ int intel_memory_region_mock_selftests(void) intel_memory_region_put(mem); out_unref: - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 397c313a8b69..b6c42fd872ad 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -79,8 +79,6 @@ static void mock_device_release(struct drm_device *dev) out: i915_params_free(&i915->params); - put_device(&i915->drm.pdev->dev); - i915->drm.pdev = NULL; } static struct drm_driver mock_driver = { @@ -123,17 +121,10 @@ struct drm_i915_private *mock_gem_device(void) #endif struct drm_i915_private *i915; struct pci_dev *pdev; - int err; pdev = kzalloc(sizeof(*pdev), GFP_KERNEL); if (!pdev) return NULL; - i915 = kzalloc(sizeof(*i915), GFP_KERNEL); - if (!i915) { - kfree(pdev); - return NULL; - } - device_initialize(&pdev->dev); pdev->class = PCI_BASE_CLASS_DISPLAY << 16; pdev->dev.release = release_dev; @@ -144,8 +135,23 @@ struct drm_i915_private *mock_gem_device(void) /* HACK to disable iommu for the fake device; force identity mapping */ pdev->dev.iommu = &fake_iommu; #endif + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + put_device(&pdev->dev); + return NULL; + } + + i915 = devm_drm_dev_alloc(&pdev->dev, &mock_driver, + struct drm_i915_private, drm); + if (IS_ERR(i915)) { + pr_err("Failed to allocate mock GEM device: err=%ld\n", PTR_ERR(i915)); + devres_release_group(&pdev->dev, NULL); + put_device(&pdev->dev); + + return NULL; + } pci_set_drvdata(pdev, i915); + i915->drm.pdev = pdev; dev_pm_domain_set(&pdev->dev, &pm_domain); pm_runtime_enable(&pdev->dev); @@ -153,16 +159,6 @@ struct drm_i915_private *mock_gem_device(void) if (pm_runtime_enabled(&pdev->dev)) WARN_ON(pm_runtime_get_sync(&pdev->dev)); - err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); - if (err) { - pr_err("Failed to initialise mock GEM device: err=%d\n", err); - put_device(&pdev->dev); - kfree(i915); - - return NULL; - } - i915->drm.pdev = pdev; - drmm_add_final_kfree(&i915->drm, i915); i915_params_copy(&i915->params, &i915_modparams); @@ -222,7 +218,15 @@ err_drv: intel_gt_driver_late_release(&i915->gt); intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return NULL; } + +void mock_destroy_device(struct drm_i915_private *i915) +{ + struct device *dev = i915->drm.dev; + + devres_release_group(dev, NULL); + put_device(dev); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h index b5dc4e394555..953cfe4fab34 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.h +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -7,4 +7,6 @@ struct drm_i915_private; struct drm_i915_private *mock_gem_device(void); void mock_device_flush(struct drm_i915_private *i915); +void mock_destroy_device(struct drm_i915_private *i915); + #endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index b173086411ef..7270fc8ca801 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -38,14 +38,14 @@ static void mock_insert_entries(struct i915_address_space *vm, { } -static int mock_bind_ppgtt(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void mock_bind_ppgtt(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma)); - return 0; } static void mock_unbind_ppgtt(struct i915_address_space *vm, @@ -74,9 +74,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.file = ERR_PTR(-ENODEV); + ppgtt->vm.dma = &i915->drm.pdev->dev; i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); + ppgtt->vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->vm.clear_range = mock_clear_range; ppgtt->vm.insert_page = mock_insert_page; ppgtt->vm.insert_entries = mock_insert_entries; @@ -90,13 +93,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) return ppgtt; } -static int mock_bind_ggtt(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void mock_bind_ggtt(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); - return 0; } static void mock_unbind_ggtt(struct i915_address_space *vm, @@ -116,6 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->mappable_end = resource_size(&ggtt->gmadr); ggtt->vm.total = 4096 * PAGE_SIZE; + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.clear_range = mock_clear_range; ggtt->vm.insert_page = mock_insert_page; ggtt->vm.insert_entries = mock_insert_entries; |