diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gvt')
26 files changed, 1254 insertions, 453 deletions
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 7c9ec4f4f36c..fe754022e356 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -61,7 +61,7 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) } mutex_lock(&dev_priv->drm.struct_mutex); - ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node, + ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node, size, I915_GTT_PAGE_SIZE, I915_COLOR_UNEVICTABLE, start, end, flags); @@ -131,7 +131,7 @@ void intel_vgpu_write_fence(struct intel_vgpu *vgpu, assert_rpm_wakelock_held(dev_priv); - if (WARN_ON(fence > vgpu_fence_sz(vgpu))) + if (WARN_ON(fence >= vgpu_fence_sz(vgpu))) return; reg = vgpu->fence.regs[fence]; diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 7f562410f9cf..a614db310ea2 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -172,6 +172,7 @@ struct decode_info { #define OP_MEDIA_INTERFACE_DESCRIPTOR_LOAD OP_3D_MEDIA(0x2, 0x0, 0x2) #define OP_MEDIA_GATEWAY_STATE OP_3D_MEDIA(0x2, 0x0, 0x3) #define OP_MEDIA_STATE_FLUSH OP_3D_MEDIA(0x2, 0x0, 0x4) +#define OP_MEDIA_POOL_STATE OP_3D_MEDIA(0x2, 0x0, 0x5) #define OP_MEDIA_OBJECT OP_3D_MEDIA(0x2, 0x1, 0x0) #define OP_MEDIA_OBJECT_PRT OP_3D_MEDIA(0x2, 0x1, 0x2) @@ -873,7 +874,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) { gvt_vgpu_err("%s access to non-render register (%x)\n", cmd, offset); - return 0; + return -EBADRQC; } if (is_shadowed_mmio(offset)) { @@ -1279,7 +1280,9 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s, if (!info->async_flip) return 0; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv) + || IS_BROXTON(dev_priv)) { stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0); tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & GENMASK(12, 10)) >> 10; @@ -1307,7 +1310,9 @@ static int gen8_update_plane_mmio_from_mi_display_flip( set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12), info->surf_val << 12); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv) + || IS_BROXTON(dev_priv)) { set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0), info->stride_val); set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10), @@ -1331,7 +1336,9 @@ static int decode_mi_display_flip(struct parser_exec_state *s, if (IS_BROADWELL(dev_priv)) return gen8_decode_mi_display_flip(s, info); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv) + || IS_BROXTON(dev_priv)) return skl_decode_mi_display_flip(s, info); return -ENODEV; @@ -1340,26 +1347,14 @@ static int decode_mi_display_flip(struct parser_exec_state *s, static int check_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - - if (IS_BROADWELL(dev_priv) - || IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv)) - return gen8_check_mi_display_flip(s, info); - return -ENODEV; + return gen8_check_mi_display_flip(s, info); } static int update_plane_mmio_from_mi_display_flip( struct parser_exec_state *s, struct mi_display_flip_command_info *info) { - struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - - if (IS_BROADWELL(dev_priv) - || IS_SKYLAKE(dev_priv) - || IS_KABYLAKE(dev_priv)) - return gen8_update_plane_mmio_from_mi_display_flip(s, info); - return -ENODEV; + return gen8_update_plane_mmio_from_mi_display_flip(s, info); } static int cmd_handler_mi_display_flip(struct parser_exec_state *s) @@ -1638,15 +1633,10 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, */ static int batch_buffer_needs_scan(struct parser_exec_state *s) { - struct intel_gvt *gvt = s->vgpu->gvt; - - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv)) { - /* BDW decides privilege based on address space */ - if (cmd_val(s, 0) & (1 << 8) && + /* Decide privilege based on address space */ + if (cmd_val(s, 0) & (1 << 8) && !(s->vgpu->scan_nonprivbb & (1 << s->ring_id))) - return 0; - } + return 0; return 1; } @@ -2372,6 +2362,9 @@ static struct cmd_info cmd_info[] = { {"MEDIA_STATE_FLUSH", OP_MEDIA_STATE_FLUSH, F_LEN_VAR, R_RCS, D_ALL, 0, 16, NULL}, + {"MEDIA_POOL_STATE", OP_MEDIA_POOL_STATE, F_LEN_VAR, R_RCS, D_ALL, + 0, 16, NULL}, + {"MEDIA_OBJECT", OP_MEDIA_OBJECT, F_LEN_VAR, R_RCS, D_ALL, 0, 16, NULL}, {"MEDIA_CURBE_LOAD", OP_MEDIA_CURBE_LOAD, F_LEN_VAR, R_RCS, D_ALL, diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 4b072ade8c38..3019dbc39aef 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -171,6 +171,29 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int pipe; + if (IS_BROXTON(dev_priv)) { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~(BXT_DE_PORT_HP_DDIA | + BXT_DE_PORT_HP_DDIB | + BXT_DE_PORT_HP_DDIC); + + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= + BXT_DE_PORT_HP_DDIA; + } + + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= + BXT_DE_PORT_HP_DDIB; + } + + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= + BXT_DE_PORT_HP_DDIC; + } + + return; + } + vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); @@ -273,8 +296,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) for_each_pipe(dev_priv, pipe) { vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= ~DISPLAY_PLANE_ENABLE; vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE; - vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~CURSOR_MODE; - vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= CURSOR_MODE_DISABLE; + vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~MCURSOR_MODE; + vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= MCURSOR_MODE_DISABLE; } vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; @@ -337,26 +360,28 @@ void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt) struct intel_gvt_irq *irq = &gvt->irq; struct intel_vgpu *vgpu; int pipe, id; + int found = false; - if (WARN_ON(!mutex_is_locked(&gvt->lock))) - return; - + mutex_lock(&gvt->lock); for_each_active_vgpu(gvt, vgpu, id) { for (pipe = 0; pipe < I915_MAX_PIPES; pipe++) { - if (pipe_is_enabled(vgpu, pipe)) - goto out; + if (pipe_is_enabled(vgpu, pipe)) { + found = true; + break; + } } + if (found) + break; } /* all the pipes are disabled */ - hrtimer_cancel(&irq->vblank_timer.timer); - return; - -out: - hrtimer_start(&irq->vblank_timer.timer, - ktime_add_ns(ktime_get(), irq->vblank_timer.period), - HRTIMER_MODE_ABS); - + if (!found) + hrtimer_cancel(&irq->vblank_timer.timer); + else + hrtimer_start(&irq->vblank_timer.timer, + ktime_add_ns(ktime_get(), irq->vblank_timer.period), + HRTIMER_MODE_ABS); + mutex_unlock(&gvt->lock); } static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe) @@ -393,8 +418,10 @@ static void emulate_vblank(struct intel_vgpu *vgpu) { int pipe; + mutex_lock(&vgpu->vgpu_lock); for_each_pipe(vgpu->gvt->dev_priv, pipe) emulate_vblank_on_pipe(vgpu, pipe); + mutex_unlock(&vgpu->vgpu_lock); } /** @@ -409,11 +436,10 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt) struct intel_vgpu *vgpu; int id; - if (WARN_ON(!mutex_is_locked(&gvt->lock))) - return; - + mutex_lock(&gvt->lock); for_each_active_vgpu(gvt, vgpu, id) emulate_vblank(vgpu); + mutex_unlock(&gvt->lock); } /** diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 6f4f8e941fc2..6e3f56684f4e 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -164,7 +164,9 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, obj->read_domains = I915_GEM_DOMAIN_GTT; obj->write_domain = 0; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv) + || IS_BROXTON(dev_priv)) { unsigned int tiling_mode = 0; unsigned int stride = 0; @@ -192,6 +194,14 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, return obj; } +static bool validate_hotspot(struct intel_vgpu_cursor_plane_format *c) +{ + if (c && c->x_hot <= c->width && c->y_hot <= c->height) + return true; + else + return false; +} + static int vgpu_get_plane_info(struct drm_device *dev, struct intel_vgpu *vgpu, struct intel_vgpu_fb_info *info, @@ -229,12 +239,14 @@ static int vgpu_get_plane_info(struct drm_device *dev, info->x_pos = c.x_pos; info->y_pos = c.y_pos; - /* The invalid cursor hotspot value is delivered to host - * until we find a way to get the cursor hotspot info of - * guest OS. - */ - info->x_hot = UINT_MAX; - info->y_hot = UINT_MAX; + if (validate_hotspot(&c)) { + info->x_hot = c.x_hot; + info->y_hot = c.y_hot; + } else { + info->x_hot = UINT_MAX; + info->y_hot = UINT_MAX; + } + info->size = (((info->stride * c.height * c.bpp) / 8) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; } else { diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index f61337632969..4b98539025c5 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -77,6 +77,20 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu) return chr; } +static inline int bxt_get_port_from_gmbus0(u32 gmbus0) +{ + int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; + int port = -EINVAL; + + if (port_select == 1) + port = PORT_B; + else if (port_select == 2) + port = PORT_C; + else if (port_select == 3) + port = PORT_D; + return port; +} + static inline int get_port_from_gmbus0(u32 gmbus0) { int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK; @@ -105,6 +119,7 @@ static void reset_gmbus_controller(struct intel_vgpu *vgpu) static int gmbus0_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int port, pin_select; memcpy(&vgpu_vreg(vgpu, offset), p_data, bytes); @@ -116,7 +131,10 @@ static int gmbus0_mmio_write(struct intel_vgpu *vgpu, if (pin_select == 0) return 0; - port = get_port_from_gmbus0(pin_select); + if (IS_BROXTON(dev_priv)) + port = bxt_get_port_from_gmbus0(pin_select); + else + port = get_port_from_gmbus0(pin_select); if (WARN_ON(port < 0)) return 0; diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h index 427e40e64d41..714d709829a2 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.h +++ b/drivers/gpu/drm/i915/gvt/execlist.h @@ -146,14 +146,11 @@ struct execlist_ring_context { u32 nop4; u32 lri_cmd_2; struct execlist_mmio_pair ctx_timestamp; - struct execlist_mmio_pair pdp3_UDW; - struct execlist_mmio_pair pdp3_LDW; - struct execlist_mmio_pair pdp2_UDW; - struct execlist_mmio_pair pdp2_LDW; - struct execlist_mmio_pair pdp1_UDW; - struct execlist_mmio_pair pdp1_LDW; - struct execlist_mmio_pair pdp0_UDW; - struct execlist_mmio_pair pdp0_LDW; + /* + * pdps[8]={ pdp3_UDW, pdp3_LDW, pdp2_UDW, pdp2_LDW, + * pdp1_UDW, pdp1_LDW, pdp0_UDW, pdp0_LDW} + */ + struct execlist_mmio_pair pdps[8]; }; struct intel_vgpu_elsp_dwords { diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 1c120683e958..face664be3e8 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -36,6 +36,7 @@ #include <uapi/drm/drm_fourcc.h> #include "i915_drv.h" #include "gvt.h" +#include "i915_pvinfo.h" #define PRIMARY_FORMAT_NUM 16 struct pixel_format { @@ -150,7 +151,9 @@ static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe, u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(pipe)) & stride_mask; u32 stride = stride_reg; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv) + || IS_BROXTON(dev_priv)) { switch (tiled) { case PLANE_CTL_TILED_LINEAR: stride = stride_reg * 64; @@ -214,7 +217,9 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, if (!plane->enabled) return -ENODEV; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv) + || IS_BROXTON(dev_priv)) { plane->tiled = (val & PLANE_CTL_TILED_MASK) >> _PLANE_CTL_TILED_SHIFT; fmt = skl_format_to_drm( @@ -256,7 +261,9 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, } plane->stride = intel_vgpu_get_stride(vgpu, pipe, (plane->tiled << 10), - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) ? + (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv) + || IS_BROXTON(dev_priv)) ? (_PRI_PLANE_STRIDE_MASK >> 6) : _PRI_PLANE_STRIDE_MASK, plane->bpp); @@ -300,16 +307,16 @@ static int cursor_mode_to_drm(int mode) int cursor_pixel_formats_index = 4; switch (mode) { - case CURSOR_MODE_128_ARGB_AX: + case MCURSOR_MODE_128_ARGB_AX: cursor_pixel_formats_index = 0; break; - case CURSOR_MODE_256_ARGB_AX: + case MCURSOR_MODE_256_ARGB_AX: cursor_pixel_formats_index = 1; break; - case CURSOR_MODE_64_ARGB_AX: + case MCURSOR_MODE_64_ARGB_AX: cursor_pixel_formats_index = 2; break; - case CURSOR_MODE_64_32B_AX: + case MCURSOR_MODE_64_32B_AX: cursor_pixel_formats_index = 3; break; @@ -342,8 +349,8 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, return -ENODEV; val = vgpu_vreg_t(vgpu, CURCNTR(pipe)); - mode = val & CURSOR_MODE; - plane->enabled = (mode != CURSOR_MODE_DISABLE); + mode = val & MCURSOR_MODE; + plane->enabled = (mode != MCURSOR_MODE_DISABLE); if (!plane->enabled) return -ENODEV; @@ -384,6 +391,8 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, plane->y_pos = (val & _CURSOR_POS_Y_MASK) >> _CURSOR_POS_Y_SHIFT; plane->y_sign = (val & _CURSOR_SIGN_Y_MASK) >> _CURSOR_SIGN_Y_SHIFT; + plane->x_hot = vgpu_vreg_t(vgpu, vgtif_reg(cursor_x_hot)); + plane->y_hot = vgpu_vreg_t(vgpu, vgtif_reg(cursor_y_hot)); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index a73e1d418c22..4ac18b447247 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -162,7 +162,7 @@ static int verify_firmware(struct intel_gvt *gvt, h = (struct gvt_firmware_header *)fw->data; - crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4; + crc32_start = offsetofend(struct gvt_firmware_header, crc32); mem = fw->data + crc32_start; #define VERIFY(s, a, b) do { \ diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 4efec8fa6c1d..00aad8164dec 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -216,16 +216,22 @@ static struct gtt_type_table_entry gtt_type_table[] = { GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_2M_ENTRY), + /* We take IPS bit as 'PSE' for PTE level. */ GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_4K_ENTRY, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_INVALID, - GTT_TYPE_INVALID), + GTT_TYPE_PPGTT_PTE_64K_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, GTT_TYPE_PPGTT_PTE_4K_ENTRY, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_INVALID, - GTT_TYPE_INVALID), + GTT_TYPE_PPGTT_PTE_64K_ENTRY), + GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY, + GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_PT, + GTT_TYPE_INVALID, + GTT_TYPE_PPGTT_PTE_64K_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, GTT_TYPE_PPGTT_PDE_ENTRY, GTT_TYPE_PPGTT_PDE_PT, @@ -339,8 +345,14 @@ static inline int gtt_set_entry64(void *pt, #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) +#define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16) #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) +#define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52) +#define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */ + +#define GTT_64K_PTE_STRIDE 16 + static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { unsigned long pfn; @@ -349,6 +361,8 @@ static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; + else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) + pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT; else pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; return pfn; @@ -362,6 +376,9 @@ static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { e->val64 &= ~ADDR_2M_MASK; pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); + } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) { + e->val64 &= ~ADDR_64K_MASK; + pfn &= (ADDR_64K_MASK >> PAGE_SHIFT); } else { e->val64 &= ~ADDR_4K_MASK; pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); @@ -372,16 +389,41 @@ static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) { - /* Entry doesn't have PSE bit. */ - if (get_pse_type(e->type) == GTT_TYPE_INVALID) - return false; + return !!(e->val64 & _PAGE_PSE); +} - e->type = get_entry_type(e->type); - if (!(e->val64 & _PAGE_PSE)) +static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e) +{ + if (gen8_gtt_test_pse(e)) { + switch (e->type) { + case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + e->val64 &= ~_PAGE_PSE; + e->type = GTT_TYPE_PPGTT_PDE_ENTRY; + break; + case GTT_TYPE_PPGTT_PTE_1G_ENTRY: + e->type = GTT_TYPE_PPGTT_PDP_ENTRY; + e->val64 &= ~_PAGE_PSE; + break; + default: + WARN_ON(1); + } + } +} + +static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e) +{ + if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) return false; - e->type = get_pse_type(e->type); - return true; + return !!(e->val64 & GEN8_PDE_IPS_64K); +} + +static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e) +{ + if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) + return; + + e->val64 &= ~GEN8_PDE_IPS_64K; } static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) @@ -408,6 +450,21 @@ static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) e->val64 |= _PAGE_PRESENT; } +static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) +{ + return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED); +} + +static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e) +{ + e->val64 |= GTT_SPTE_FLAG_64K_SPLITED; +} + +static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e) +{ + e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED; +} + /* * Per-platform GMA routines. */ @@ -440,6 +497,12 @@ static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { .set_present = gtt_entry_set_present, .test_present = gen8_gtt_test_present, .test_pse = gen8_gtt_test_pse, + .clear_pse = gen8_gtt_clear_pse, + .clear_ips = gen8_gtt_clear_ips, + .test_ips = gen8_gtt_test_ips, + .clear_64k_splited = gen8_gtt_clear_64k_splited, + .set_64k_splited = gen8_gtt_set_64k_splited, + .test_64k_splited = gen8_gtt_test_64k_splited, .get_pfn = gen8_gtt_get_pfn, .set_pfn = gen8_gtt_set_pfn, }; @@ -453,6 +516,27 @@ static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { .gma_to_pml4_index = gen8_gma_to_pml4_index, }; +/* Update entry type per pse and ips bit. */ +static void update_entry_type_for_real(struct intel_gvt_gtt_pte_ops *pte_ops, + struct intel_gvt_gtt_entry *entry, bool ips) +{ + switch (entry->type) { + case GTT_TYPE_PPGTT_PDE_ENTRY: + case GTT_TYPE_PPGTT_PDP_ENTRY: + if (pte_ops->test_pse(entry)) + entry->type = get_pse_type(entry->type); + break; + case GTT_TYPE_PPGTT_PTE_4K_ENTRY: + if (ips) + entry->type = get_pse_type(entry->type); + break; + default: + GEM_BUG_ON(!gtt_type_is_entry(entry->type)); + } + + GEM_BUG_ON(entry->type == GTT_TYPE_INVALID); +} + /* * MM helpers. */ @@ -468,8 +552,7 @@ static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : mm->ppgtt_mm.shadow_pdps, entry, index, false, 0, mm->vgpu); - - pte_ops->test_pse(entry); + update_entry_type_for_real(pte_ops, entry, false); } static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, @@ -574,7 +657,8 @@ static inline int ppgtt_spt_get_entry( if (ret) return ret; - ops->test_pse(e); + update_entry_type_for_real(ops, e, guest ? + spt->guest_page.pde_ips : false); gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", type, e->type, index, e->val64); @@ -653,10 +737,12 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); - if (spt->guest_page.oos_page) - detach_oos_page(spt->vgpu, spt->guest_page.oos_page); + if (spt->guest_page.gfn) { + if (spt->guest_page.oos_page) + detach_oos_page(spt->vgpu, spt->guest_page.oos_page); - intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); + intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); + } list_del_init(&spt->post_shadow_list); free_spt(spt); @@ -717,8 +803,9 @@ static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); +/* Allocate shadow page table without guest page. */ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( - struct intel_vgpu *vgpu, int type, unsigned long gfn) + struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) { struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct intel_vgpu_ppgtt_spt *spt = NULL; @@ -753,26 +840,12 @@ retry: spt->shadow_page.vaddr = page_address(spt->shadow_page.page); spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; - /* - * Init guest_page. - */ - spt->guest_page.type = type; - spt->guest_page.gfn = gfn; - - ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn, - ppgtt_write_protection_handler, spt); - if (ret) - goto err_unmap_dma; - ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); if (ret) - goto err_unreg_page_track; + goto err_unmap_dma; - trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); return spt; -err_unreg_page_track: - intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn); err_unmap_dma: dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); err_free_spt: @@ -780,6 +853,37 @@ err_free_spt: return ERR_PTR(ret); } +/* Allocate shadow page table associated with specific gfn. */ +static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( + struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type, + unsigned long gfn, bool guest_pde_ips) +{ + struct intel_vgpu_ppgtt_spt *spt; + int ret; + + spt = ppgtt_alloc_spt(vgpu, type); + if (IS_ERR(spt)) + return spt; + + /* + * Init guest_page. + */ + ret = intel_vgpu_register_page_track(vgpu, gfn, + ppgtt_write_protection_handler, spt); + if (ret) { + ppgtt_free_spt(spt); + return ERR_PTR(ret); + } + + spt->guest_page.type = type; + spt->guest_page.gfn = gfn; + spt->guest_page.pde_ips = guest_pde_ips; + + trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); + + return spt; +} + #define pt_entry_size_shift(spt) \ ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) @@ -787,24 +891,38 @@ err_free_spt: (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) #define for_each_present_guest_entry(spt, e, i) \ - for (i = 0; i < pt_entries(spt); i++) \ + for (i = 0; i < pt_entries(spt); \ + i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ if (!ppgtt_get_guest_entry(spt, e, i) && \ spt->vgpu->gvt->gtt.pte_ops->test_present(e)) #define for_each_present_shadow_entry(spt, e, i) \ - for (i = 0; i < pt_entries(spt); i++) \ + for (i = 0; i < pt_entries(spt); \ + i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ if (!ppgtt_get_shadow_entry(spt, e, i) && \ spt->vgpu->gvt->gtt.pte_ops->test_present(e)) -static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) +#define for_each_shadow_entry(spt, e, i) \ + for (i = 0; i < pt_entries(spt); \ + i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ + if (!ppgtt_get_shadow_entry(spt, e, i)) + +static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) { int v = atomic_read(&spt->refcount); trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); - atomic_inc(&spt->refcount); } +static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt) +{ + int v = atomic_read(&spt->refcount); + + trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); + return atomic_dec_return(&spt->refcount); +} + static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, @@ -843,7 +961,8 @@ static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, pfn = ops->get_pfn(entry); type = spt->shadow_page.type; - if (pfn == vgpu->gtt.scratch_pt[type].page_mfn) + /* Uninitialized spte or unshadowed spte. */ + if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn) return; intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); @@ -855,14 +974,11 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) struct intel_gvt_gtt_entry e; unsigned long index; int ret; - int v = atomic_read(&spt->refcount); trace_spt_change(spt->vgpu->id, "die", spt, spt->guest_page.gfn, spt->shadow_page.type); - trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); - - if (atomic_dec_return(&spt->refcount) > 0) + if (ppgtt_put_spt(spt) > 0) return 0; for_each_present_shadow_entry(spt, &e, index) { @@ -871,9 +987,15 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) gvt_vdbg_mm("invalidate 4K entry\n"); ppgtt_invalidate_pte(spt, &e); break; + case GTT_TYPE_PPGTT_PTE_64K_ENTRY: + /* We don't setup 64K shadow entry so far. */ + WARN(1, "suspicious 64K gtt entry\n"); + continue; case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + gvt_vdbg_mm("invalidate 2M entry\n"); + continue; case GTT_TYPE_PPGTT_PTE_1G_ENTRY: - WARN(1, "GVT doesn't support 2M/1GB page\n"); + WARN(1, "GVT doesn't support 1GB page\n"); continue; case GTT_TYPE_PPGTT_PML4_ENTRY: case GTT_TYPE_PPGTT_PDP_ENTRY: @@ -899,6 +1021,22 @@ fail: return ret; } +static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + if (INTEL_GEN(dev_priv) == 9 || INTEL_GEN(dev_priv) == 10) { + u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & + GAMW_ECO_ENABLE_64K_IPS_FIELD; + + return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD; + } else if (INTEL_GEN(dev_priv) >= 11) { + /* 64K paging only controlled by IPS bit in PTE now. */ + return true; + } else + return false; +} + static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( @@ -906,35 +1044,54 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *spt = NULL; + bool ips = false; int ret; GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); + if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY) + ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we); + spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); - if (spt) + if (spt) { ppgtt_get_spt(spt); - else { + + if (ips != spt->guest_page.pde_ips) { + spt->guest_page.pde_ips = ips; + + gvt_dbg_mm("reshadow PDE since ips changed\n"); + clear_page(spt->shadow_page.vaddr); + ret = ppgtt_populate_spt(spt); + if (ret) { + ppgtt_put_spt(spt); + goto err; + } + } + } else { int type = get_next_pt_type(we->type); - spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we)); + spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); if (IS_ERR(spt)) { ret = PTR_ERR(spt); - goto fail; + goto err; } ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); if (ret) - goto fail; + goto err_free_spt; ret = ppgtt_populate_spt(spt); if (ret) - goto fail; + goto err_free_spt; trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, spt->shadow_page.type); } return spt; -fail: + +err_free_spt: + ppgtt_free_spt(spt); +err: gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", spt, we->val64, we->type); return ERR_PTR(ret); @@ -948,16 +1105,118 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, se->type = ge->type; se->val64 = ge->val64; + /* Because we always split 64KB pages, so clear IPS in shadow PDE. */ + if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY) + ops->clear_ips(se); + ops->set_pfn(se, s->shadow_page.mfn); } +/** + * Return 1 if 2MB huge gtt shadowing is possilbe, 0 if miscondition, + * negtive if found err. + */ +static int is_2MB_gtt_possible(struct intel_vgpu *vgpu, + struct intel_gvt_gtt_entry *entry) +{ + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + unsigned long pfn; + + if (!HAS_PAGE_SIZES(vgpu->gvt->dev_priv, I915_GTT_PAGE_SIZE_2M)) + return 0; + + pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry)); + if (pfn == INTEL_GVT_INVALID_ADDR) + return -EINVAL; + + return PageTransHuge(pfn_to_page(pfn)); +} + +static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, + struct intel_vgpu_ppgtt_spt *spt, unsigned long index, + struct intel_gvt_gtt_entry *se) +{ + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + struct intel_vgpu_ppgtt_spt *sub_spt; + struct intel_gvt_gtt_entry sub_se; + unsigned long start_gfn; + dma_addr_t dma_addr; + unsigned long sub_index; + int ret; + + gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index); + + start_gfn = ops->get_pfn(se); + + sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT); + if (IS_ERR(sub_spt)) + return PTR_ERR(sub_spt); + + for_each_shadow_entry(sub_spt, &sub_se, sub_index) { + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, + start_gfn + sub_index, PAGE_SIZE, &dma_addr); + if (ret) { + ppgtt_invalidate_spt(spt); + return ret; + } + sub_se.val64 = se->val64; + + /* Copy the PAT field from PDE. */ + sub_se.val64 &= ~_PAGE_PAT; + sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5; + + ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT); + ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index); + } + + /* Clear dirty field. */ + se->val64 &= ~_PAGE_DIRTY; + + ops->clear_pse(se); + ops->clear_ips(se); + ops->set_pfn(se, sub_spt->shadow_page.mfn); + ppgtt_set_shadow_entry(spt, se, index); + return 0; +} + +static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, + struct intel_vgpu_ppgtt_spt *spt, unsigned long index, + struct intel_gvt_gtt_entry *se) +{ + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_entry entry = *se; + unsigned long start_gfn; + dma_addr_t dma_addr; + int i, ret; + + gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index); + + GEM_BUG_ON(index % GTT_64K_PTE_STRIDE); + + start_gfn = ops->get_pfn(se); + + entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; + ops->set_64k_splited(&entry); + + for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, + start_gfn + i, PAGE_SIZE, &dma_addr); + if (ret) + return ret; + + ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT); + ppgtt_set_shadow_entry(spt, &entry, index + i); + } + return 0; +} + static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, struct intel_vgpu_ppgtt_spt *spt, unsigned long index, struct intel_gvt_gtt_entry *ge) { struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry se = *ge; - unsigned long gfn; + unsigned long gfn, page_size = PAGE_SIZE; dma_addr_t dma_addr; int ret; @@ -970,16 +1229,33 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, case GTT_TYPE_PPGTT_PTE_4K_ENTRY: gvt_vdbg_mm("shadow 4K gtt entry\n"); break; + case GTT_TYPE_PPGTT_PTE_64K_ENTRY: + gvt_vdbg_mm("shadow 64K gtt entry\n"); + /* + * The layout of 64K page is special, the page size is + * controlled by uper PDE. To be simple, we always split + * 64K page to smaller 4K pages in shadow PT. + */ + return split_64KB_gtt_entry(vgpu, spt, index, &se); case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + gvt_vdbg_mm("shadow 2M gtt entry\n"); + ret = is_2MB_gtt_possible(vgpu, ge); + if (ret == 0) + return split_2MB_gtt_entry(vgpu, spt, index, &se); + else if (ret < 0) + return ret; + page_size = I915_GTT_PAGE_SIZE_2M; + break; case GTT_TYPE_PPGTT_PTE_1G_ENTRY: - gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n"); + gvt_vgpu_err("GVT doesn't support 1GB entry\n"); return -EINVAL; default: GEM_BUG_ON(1); }; /* direct shadow */ - ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size, + &dma_addr); if (ret) return -ENXIO; @@ -1062,8 +1338,12 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, ret = ppgtt_invalidate_spt(s); if (ret) goto fail; - } else + } else { + /* We don't setup 64K shadow entry so far. */ + WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY, + "suspicious 64K entry\n"); ppgtt_invalidate_pte(spt, se); + } return 0; fail: @@ -1286,7 +1566,7 @@ static int ppgtt_handle_guest_write_page_table( struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry old_se; int new_present; - int ret; + int i, ret; new_present = ops->test_present(we); @@ -1308,8 +1588,27 @@ static int ppgtt_handle_guest_write_page_table( goto fail; if (!new_present) { - ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn); - ppgtt_set_shadow_entry(spt, &old_se, index); + /* For 64KB splited entries, we need clear them all. */ + if (ops->test_64k_splited(&old_se) && + !(index % GTT_64K_PTE_STRIDE)) { + gvt_vdbg_mm("remove splited 64K shadow entries\n"); + for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { + ops->clear_64k_splited(&old_se); + ops->set_pfn(&old_se, + vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &old_se, index + i); + } + } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY || + old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { + ops->clear_pse(&old_se); + ops->set_pfn(&old_se, + vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &old_se, index); + } else { + ops->set_pfn(&old_se, + vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &old_se, index); + } } return 0; @@ -1391,7 +1690,17 @@ static int ppgtt_handle_guest_write_page_table_bytes( ppgtt_get_guest_entry(spt, &we, index); - ops->test_pse(&we); + /* + * For page table which has 64K gtt entry, only PTE#0, PTE#16, + * PTE#32, ... PTE#496 are used. Unused PTEs update should be + * ignored. + */ + if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY && + (index % GTT_64K_PTE_STRIDE)) { + gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n", + index); + return 0; + } if (bytes == info->gtt_entry_size) { ret = ppgtt_handle_guest_write_page_table(spt, &we, index); @@ -1939,7 +2248,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, } ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, - &dma_addr); + PAGE_SIZE, &dma_addr); if (ret) { gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial @@ -2031,7 +2340,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. */ - if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) { + if (type > GTT_TYPE_PPGTT_PTE_PT) { struct intel_gvt_gtt_entry se; memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); @@ -2315,13 +2624,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) gvt_dbg_core("init gtt\n"); - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv)) { - gvt->gtt.pte_ops = &gen8_gtt_pte_ops; - gvt->gtt.gma_ops = &gen8_gtt_gma_ops; - } else { - return -ENODEV; - } + gvt->gtt.pte_ops = &gen8_gtt_pte_ops; + gvt->gtt.gma_ops = &gen8_gtt_gma_ops; page = (void *)get_zeroed_page(GFP_KERNEL); if (!page) { diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 97e62647418a..7a9b36176efb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -63,6 +63,12 @@ struct intel_gvt_gtt_pte_ops { void (*clear_present)(struct intel_gvt_gtt_entry *e); void (*set_present)(struct intel_gvt_gtt_entry *e); bool (*test_pse)(struct intel_gvt_gtt_entry *e); + void (*clear_pse)(struct intel_gvt_gtt_entry *e); + bool (*test_ips)(struct intel_gvt_gtt_entry *e); + void (*clear_ips)(struct intel_gvt_gtt_entry *e); + bool (*test_64k_splited)(struct intel_gvt_gtt_entry *e); + void (*clear_64k_splited)(struct intel_gvt_gtt_entry *e); + void (*set_64k_splited)(struct intel_gvt_gtt_entry *e); void (*set_pfn)(struct intel_gvt_gtt_entry *e, unsigned long pfn); unsigned long (*get_pfn)(struct intel_gvt_gtt_entry *e); }; @@ -95,6 +101,7 @@ typedef enum { GTT_TYPE_GGTT_PTE, GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_64K_ENTRY, GTT_TYPE_PPGTT_PTE_2M_ENTRY, GTT_TYPE_PPGTT_PTE_1G_ENTRY, @@ -222,6 +229,7 @@ struct intel_vgpu_ppgtt_spt { struct { intel_gvt_gtt_type_t type; + bool pde_ips; /* for 64KB PTEs */ void *vaddr; struct page *page; unsigned long mfn; @@ -229,6 +237,7 @@ struct intel_vgpu_ppgtt_spt { struct { intel_gvt_gtt_type_t type; + bool pde_ips; /* for 64KB PTEs */ unsigned long gfn; unsigned long write_cnt; struct intel_vgpu_oos_page *oos_page; diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 61bd14fcb649..46c8b720e336 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -176,6 +176,7 @@ static const struct intel_gvt_ops intel_gvt_ops = { .emulate_mmio_write = intel_vgpu_emulate_mmio_write, .vgpu_create = intel_gvt_create_vgpu, .vgpu_destroy = intel_gvt_destroy_vgpu, + .vgpu_release = intel_gvt_release_vgpu, .vgpu_reset = intel_gvt_reset_vgpu, .vgpu_activate = intel_gvt_activate_vgpu, .vgpu_deactivate = intel_gvt_deactivate_vgpu, @@ -238,18 +239,15 @@ static void init_device_info(struct intel_gvt *gvt) struct intel_gvt_device_info *info = &gvt->device_info; struct pci_dev *pdev = gvt->dev_priv->drm.pdev; - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv)) { - info->max_support_vgpus = 8; - info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE; - info->mmio_size = 2 * 1024 * 1024; - info->mmio_bar = 0; - info->gtt_start_offset = 8 * 1024 * 1024; - info->gtt_entry_size = 8; - info->gtt_entry_size_shift = 3; - info->gmadr_bytes_in_cmd = 8; - info->max_surface_size = 36 * 1024 * 1024; - } + info->max_support_vgpus = 8; + info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE; + info->mmio_size = 2 * 1024 * 1024; + info->mmio_bar = 0; + info->gtt_start_offset = 8 * 1024 * 1024; + info->gtt_entry_size = 8; + info->gtt_entry_size_shift = 3; + info->gmadr_bytes_in_cmd = 8; + info->max_surface_size = 36 * 1024 * 1024; info->msi_cap_offset = pdev->msi_cap; } @@ -271,11 +269,8 @@ static int gvt_service_thread(void *data) continue; if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK, - (void *)&gvt->service_request)) { - mutex_lock(&gvt->lock); + (void *)&gvt->service_request)) intel_gvt_emulate_vblank(gvt); - mutex_unlock(&gvt->lock); - } if (test_bit(INTEL_GVT_REQUEST_SCHED, (void *)&gvt->service_request) || @@ -321,6 +316,11 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) if (WARN_ON(!gvt)) return; + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); + intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); + intel_gvt_cleanup_vgpu_type_groups(gvt); + intel_gvt_clean_vgpu_types(gvt); + intel_gvt_debugfs_clean(gvt); clean_service_thread(gvt); intel_gvt_clean_cmd_parser(gvt); @@ -328,17 +328,10 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) intel_gvt_clean_workload_scheduler(gvt); intel_gvt_clean_gtt(gvt); intel_gvt_clean_irq(gvt); - intel_gvt_clean_mmio_info(gvt); intel_gvt_free_firmware(gvt); - - intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); - intel_gvt_cleanup_vgpu_type_groups(gvt); - intel_gvt_clean_vgpu_types(gvt); - + intel_gvt_clean_mmio_info(gvt); idr_destroy(&gvt->vgpu_idr); - intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); - kfree(dev_priv->gvt); dev_priv->gvt = NULL; } @@ -379,6 +372,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) idr_init(&gvt->vgpu_idr); spin_lock_init(&gvt->scheduler.mmio_context_lock); mutex_init(&gvt->lock); + mutex_init(&gvt->sched_lock); gvt->dev_priv = dev_priv; init_device_info(gvt); @@ -473,3 +467,7 @@ out_clean_idr: kfree(gvt); return ret; } + +#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) +MODULE_SOFTDEP("pre: kvmgt"); +#endif diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 858967daf04b..31f6cdbe5c42 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -170,12 +170,18 @@ struct intel_vgpu_submission { struct intel_vgpu { struct intel_gvt *gvt; + struct mutex vgpu_lock; int id; unsigned long handle; /* vGPU handle used by hypervisor MPT modules */ bool active; bool pv_notified; bool failsafe; unsigned int resetting_eng; + + /* Both sched_data and sched_ctl can be seen a part of the global gvt + * scheduler structure. So below 2 vgpu data are protected + * by sched_lock, not vgpu_lock. + */ void *sched_data; struct vgpu_sched_ctl sched_ctl; @@ -296,7 +302,13 @@ struct intel_vgpu_type { }; struct intel_gvt { + /* GVT scope lock, protect GVT itself, and all resource currently + * not yet protected by special locks(vgpu and scheduler lock). + */ struct mutex lock; + /* scheduler scope lock, protect gvt and vgpu schedule related data */ + struct mutex sched_lock; + struct drm_i915_private *dev_priv; struct idr vgpu_idr; /* vGPU IDR pool */ @@ -316,6 +328,10 @@ struct intel_gvt { struct task_struct *service_thread; wait_queue_head_t service_thread_wq; + + /* service_request is always used in bit operation, we should always + * use it with atomic bit ops so that no need to use gvt big lock. + */ unsigned long service_request; struct { @@ -363,9 +379,9 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt); #define gvt_aperture_sz(gvt) (gvt->dev_priv->ggtt.mappable_end) #define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.gmadr.start) -#define gvt_ggtt_gm_sz(gvt) (gvt->dev_priv->ggtt.base.total) +#define gvt_ggtt_gm_sz(gvt) (gvt->dev_priv->ggtt.vm.total) #define gvt_ggtt_sz(gvt) \ - ((gvt->dev_priv->ggtt.base.total >> PAGE_SHIFT) << 3) + ((gvt->dev_priv->ggtt.vm.total >> PAGE_SHIFT) << 3) #define gvt_hidden_sz(gvt) (gvt_ggtt_gm_sz(gvt) - gvt_aperture_sz(gvt)) #define gvt_aperture_gmadr_base(gvt) (0) @@ -470,6 +486,7 @@ void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu); struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_type *type); void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_release_vgpu(struct intel_vgpu *vgpu); void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, unsigned int engine_mask); void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu); @@ -547,7 +564,8 @@ struct intel_gvt_ops { unsigned int); struct intel_vgpu *(*vgpu_create)(struct intel_gvt *, struct intel_vgpu_type *); - void (*vgpu_destroy)(struct intel_vgpu *); + void (*vgpu_destroy)(struct intel_vgpu *vgpu); + void (*vgpu_release)(struct intel_vgpu *vgpu); void (*vgpu_reset)(struct intel_vgpu *); void (*vgpu_activate)(struct intel_vgpu *); void (*vgpu_deactivate)(struct intel_vgpu *); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 8f1caacdc78a..7a58ca555197 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -55,6 +55,8 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt) return D_SKL; else if (IS_KABYLAKE(gvt->dev_priv)) return D_KBL; + else if (IS_BROXTON(gvt->dev_priv)) + return D_BXT; return 0; } @@ -208,6 +210,31 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, return 0; } +static int gamw_echo_dev_rw_ia_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 ips = (*(u32 *)p_data) & GAMW_ECO_ENABLE_64K_IPS_FIELD; + + if (INTEL_GEN(vgpu->gvt->dev_priv) <= 10) { + if (ips == GAMW_ECO_ENABLE_64K_IPS_FIELD) + gvt_dbg_core("vgpu%d: ips enabled\n", vgpu->id); + else if (!ips) + gvt_dbg_core("vgpu%d: ips disabled\n", vgpu->id); + else { + /* All engines must be enabled together for vGPU, + * since we don't know which engine the ppgtt will + * bind to when shadowing. + */ + gvt_vgpu_err("Unsupported IPS setting %x, cannot enable 64K gtt.\n", + ips); + return -EINVAL; + } + } + + write_vreg(vgpu, offset, p_data, bytes); + return 0; +} + static int fence_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { @@ -255,7 +282,8 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, new = CALC_MODE_MASK_REG(old, *(u32 *)p_data); if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv)) { + || IS_KABYLAKE(vgpu->gvt->dev_priv) + || IS_BROXTON(vgpu->gvt->dev_priv)) { switch (offset) { case FORCEWAKE_RENDER_GEN9_REG: ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG; @@ -316,6 +344,7 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, } } + /* vgpu_lock already hold by emulate mmio r/w */ intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask); /* sw will wait for the device to ack the reset request */ @@ -420,7 +449,10 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, vgpu_vreg(vgpu, offset) |= I965_PIPECONF_ACTIVE; else vgpu_vreg(vgpu, offset) &= ~I965_PIPECONF_ACTIVE; + /* vgpu_lock already hold by emulate mmio r/w */ + mutex_unlock(&vgpu->vgpu_lock); intel_gvt_check_vblank_emulation(vgpu->gvt); + mutex_lock(&vgpu->vgpu_lock); return 0; } @@ -857,7 +889,8 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, data = vgpu_vreg(vgpu, offset); if ((IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv)) + || IS_KABYLAKE(vgpu->gvt->dev_priv) + || IS_BROXTON(vgpu->gvt->dev_priv)) && offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { /* SKL DPB/C/D aux ctl register changed */ return 0; @@ -1209,8 +1242,8 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, ret = handle_g2v_notification(vgpu, data); break; /* add xhot and yhot to handled list to avoid error log */ - case 0x78830: - case 0x78834: + case _vgtif_reg(cursor_x_hot): + case _vgtif_reg(cursor_y_hot): case _vgtif_reg(pdp[0].lo): case _vgtif_reg(pdp[0].hi): case _vgtif_reg(pdp[1].lo): @@ -1369,6 +1402,16 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, *data0 = 0x1e1a1100; else *data0 = 0x61514b3d; + } else if (IS_BROXTON(vgpu->gvt->dev_priv)) { + /** + * "Read memory latency" command on gen9. + * Below memory latency values are read + * from Broxton MRB. + */ + if (!*data0) + *data0 = 0x16080707; + else + *data0 = 0x16161616; } break; case SKL_PCODE_CDCLK_CONTROL: @@ -1426,8 +1469,11 @@ static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, { u32 v = *(u32 *)p_data; - v &= (1 << 31) | (1 << 29) | (1 << 9) | - (1 << 7) | (1 << 5) | (1 << 3) | (1 << 1); + if (IS_BROXTON(vgpu->gvt->dev_priv)) + v &= (1 << 31) | (1 << 29); + else + v &= (1 << 31) | (1 << 29) | (1 << 9) | + (1 << 7) | (1 << 5) | (1 << 3) | (1 << 1); v |= (v >> 1); return intel_vgpu_default_mmio_write(vgpu, offset, &v, bytes); @@ -1447,6 +1493,109 @@ static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } +static int bxt_de_pll_enable_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 v = *(u32 *)p_data; + + if (v & BXT_DE_PLL_PLL_ENABLE) + v |= BXT_DE_PLL_LOCK; + + vgpu_vreg(vgpu, offset) = v; + + return 0; +} + +static int bxt_port_pll_enable_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 v = *(u32 *)p_data; + + if (v & PORT_PLL_ENABLE) + v |= PORT_PLL_LOCK; + + vgpu_vreg(vgpu, offset) = v; + + return 0; +} + +static int bxt_phy_ctl_family_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 v = *(u32 *)p_data; + u32 data = v & COMMON_RESET_DIS ? BXT_PHY_LANE_ENABLED : 0; + + vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_A) = data; + vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_B) = data; + vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_C) = data; + + vgpu_vreg(vgpu, offset) = v; + + return 0; +} + +static int bxt_port_tx_dw3_read(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 v = vgpu_vreg(vgpu, offset); + + v &= ~UNIQUE_TRANGE_EN_METHOD; + + vgpu_vreg(vgpu, offset) = v; + + return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); +} + +static int bxt_pcs_dw12_grp_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 v = *(u32 *)p_data; + + if (offset == _PORT_PCS_DW12_GRP_A || offset == _PORT_PCS_DW12_GRP_B) { + vgpu_vreg(vgpu, offset - 0x600) = v; + vgpu_vreg(vgpu, offset - 0x800) = v; + } else { + vgpu_vreg(vgpu, offset - 0x400) = v; + vgpu_vreg(vgpu, offset - 0x600) = v; + } + + vgpu_vreg(vgpu, offset) = v; + + return 0; +} + +static int bxt_gt_disp_pwron_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 v = *(u32 *)p_data; + + if (v & BIT(0)) { + vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &= + ~PHY_RESERVED; + vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) |= + PHY_POWER_GOOD; + } + + if (v & BIT(1)) { + vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) &= + ~PHY_RESERVED; + vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) |= + PHY_POWER_GOOD; + } + + + vgpu_vreg(vgpu, offset) = v; + + return 0; +} + +static int bxt_edp_psr_imr_iir_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + vgpu_vreg(vgpu, offset) = 0; + return 0; +} + static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1657,7 +1806,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_DH(GEN8_GAMW_ECO_DEV_RW_IA, D_BDW_PLUS, NULL, + gamw_echo_dev_rw_ia_write); + MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL); MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); @@ -2670,17 +2821,17 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_MMIO(0x45504), D_SKL_PLUS); MMIO_D(_MMIO(0x45520), D_SKL_PLUS); MMIO_D(_MMIO(0x46000), D_SKL_PLUS); - MMIO_DH(_MMIO(0x46010), D_SKL | D_KBL, NULL, skl_lcpll_write); - MMIO_DH(_MMIO(0x46014), D_SKL | D_KBL, NULL, skl_lcpll_write); - MMIO_D(_MMIO(0x6C040), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6C048), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6C050), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6C044), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6C04C), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6C054), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6c058), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6c05c), D_SKL | D_KBL); - MMIO_DH(_MMIO(0x6c060), D_SKL | D_KBL, dpll_status_read, NULL); + MMIO_DH(_MMIO(0x46010), D_SKL_PLUS, NULL, skl_lcpll_write); + MMIO_DH(_MMIO(0x46014), D_SKL_PLUS, NULL, skl_lcpll_write); + MMIO_D(_MMIO(0x6C040), D_SKL_PLUS); + MMIO_D(_MMIO(0x6C048), D_SKL_PLUS); + MMIO_D(_MMIO(0x6C050), D_SKL_PLUS); + MMIO_D(_MMIO(0x6C044), D_SKL_PLUS); + MMIO_D(_MMIO(0x6C04C), D_SKL_PLUS); + MMIO_D(_MMIO(0x6C054), D_SKL_PLUS); + MMIO_D(_MMIO(0x6c058), D_SKL_PLUS); + MMIO_D(_MMIO(0x6c05c), D_SKL_PLUS); + MMIO_DH(_MMIO(0x6c060), D_SKL_PLUS, dpll_status_read, NULL); MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); @@ -2805,53 +2956,57 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_MMIO(0x7239c), D_SKL_PLUS); MMIO_D(_MMIO(0x7039c), D_SKL_PLUS); - MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL); - MMIO_D(_MMIO(0x8f004), D_SKL | D_KBL); - MMIO_D(_MMIO(0x8f034), D_SKL | D_KBL); + MMIO_D(_MMIO(0x8f074), D_SKL_PLUS); + MMIO_D(_MMIO(0x8f004), D_SKL_PLUS); + MMIO_D(_MMIO(0x8f034), D_SKL_PLUS); - MMIO_D(_MMIO(0xb11c), D_SKL | D_KBL); + MMIO_D(_MMIO(0xb11c), D_SKL_PLUS); - MMIO_D(_MMIO(0x51000), D_SKL | D_KBL); + MMIO_D(_MMIO(0x51000), D_SKL_PLUS); MMIO_D(_MMIO(0x6c00c), D_SKL_PLUS); - MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); - MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL_PLUS, + NULL, NULL); + MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL_PLUS, + NULL, NULL); MMIO_D(RPM_CONFIG0, D_SKL_PLUS); MMIO_D(_MMIO(0xd08), D_SKL_PLUS); MMIO_D(RC6_LOCATION, D_SKL_PLUS); MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL); - MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); /* TRTT */ - MMIO_DFH(_MMIO(0x4de0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(_MMIO(0x4de4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(_MMIO(0x4de8), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(_MMIO(0x4dec), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(_MMIO(0x4df0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(_MMIO(0x4df4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write); - MMIO_DH(_MMIO(0x4dfc), D_SKL | D_KBL, NULL, gen9_trtt_chicken_write); + MMIO_DFH(_MMIO(0x4de0), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4de4), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4de8), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4dec), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4df0), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4df4), D_SKL_PLUS, F_CMD_ACCESS, + NULL, gen9_trtte_write); + MMIO_DH(_MMIO(0x4dfc), D_SKL_PLUS, NULL, gen9_trtt_chicken_write); - MMIO_D(_MMIO(0x45008), D_SKL | D_KBL); + MMIO_D(_MMIO(0x45008), D_SKL_PLUS); - MMIO_D(_MMIO(0x46430), D_SKL | D_KBL); + MMIO_D(_MMIO(0x46430), D_SKL_PLUS); - MMIO_D(_MMIO(0x46520), D_SKL | D_KBL); + MMIO_D(_MMIO(0x46520), D_SKL_PLUS); - MMIO_D(_MMIO(0xc403c), D_SKL | D_KBL); + MMIO_D(_MMIO(0xc403c), D_SKL_PLUS); MMIO_D(_MMIO(0xb004), D_SKL_PLUS); MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write); MMIO_D(_MMIO(0x65900), D_SKL_PLUS); - MMIO_D(_MMIO(0x1082c0), D_SKL | D_KBL); - MMIO_D(_MMIO(0x4068), D_SKL | D_KBL); - MMIO_D(_MMIO(0x67054), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6e560), D_SKL | D_KBL); - MMIO_D(_MMIO(0x6e554), D_SKL | D_KBL); - MMIO_D(_MMIO(0x2b20), D_SKL | D_KBL); - MMIO_D(_MMIO(0x65f00), D_SKL | D_KBL); - MMIO_D(_MMIO(0x65f08), D_SKL | D_KBL); - MMIO_D(_MMIO(0x320f0), D_SKL | D_KBL); + MMIO_D(_MMIO(0x1082c0), D_SKL_PLUS); + MMIO_D(_MMIO(0x4068), D_SKL_PLUS); + MMIO_D(_MMIO(0x67054), D_SKL_PLUS); + MMIO_D(_MMIO(0x6e560), D_SKL_PLUS); + MMIO_D(_MMIO(0x6e554), D_SKL_PLUS); + MMIO_D(_MMIO(0x2b20), D_SKL_PLUS); + MMIO_D(_MMIO(0x65f00), D_SKL_PLUS); + MMIO_D(_MMIO(0x65f08), D_SKL_PLUS); + MMIO_D(_MMIO(0x320f0), D_SKL_PLUS); MMIO_D(_MMIO(0x70034), D_SKL_PLUS); MMIO_D(_MMIO(0x71034), D_SKL_PLUS); @@ -2869,11 +3024,188 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_MMIO(0x44500), D_SKL_PLUS); MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS, + MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_D(_MMIO(0x4ab8), D_KBL); - MMIO_D(_MMIO(0x2248), D_SKL_PLUS | D_KBL); + MMIO_D(_MMIO(0x2248), D_KBL | D_SKL); + + return 0; +} + +static int init_bxt_mmio_info(struct intel_gvt *gvt) +{ + struct drm_i915_private *dev_priv = gvt->dev_priv; + int ret; + + MMIO_F(_MMIO(0x80000), 0x3000, 0, 0, 0, D_BXT, NULL, NULL); + + MMIO_D(GEN7_SAMPLER_INSTDONE, D_BXT); + MMIO_D(GEN7_ROW_INSTDONE, D_BXT); + MMIO_D(GEN8_FAULT_TLB_DATA0, D_BXT); + MMIO_D(GEN8_FAULT_TLB_DATA1, D_BXT); + MMIO_D(ERROR_GEN6, D_BXT); + MMIO_D(DONE_REG, D_BXT); + MMIO_D(EIR, D_BXT); + MMIO_D(PGTBL_ER, D_BXT); + MMIO_D(_MMIO(0x4194), D_BXT); + MMIO_D(_MMIO(0x4294), D_BXT); + MMIO_D(_MMIO(0x4494), D_BXT); + + MMIO_RING_D(RING_PSMI_CTL, D_BXT); + MMIO_RING_D(RING_DMA_FADD, D_BXT); + MMIO_RING_D(RING_DMA_FADD_UDW, D_BXT); + MMIO_RING_D(RING_IPEHR, D_BXT); + MMIO_RING_D(RING_INSTPS, D_BXT); + MMIO_RING_D(RING_BBADDR_UDW, D_BXT); + MMIO_RING_D(RING_BBSTATE, D_BXT); + MMIO_RING_D(RING_IPEIR, D_BXT); + + MMIO_F(SOFT_SCRATCH(0), 16 * 4, 0, 0, 0, D_BXT, NULL, NULL); + + MMIO_DH(BXT_P_CR_GT_DISP_PWRON, D_BXT, NULL, bxt_gt_disp_pwron_write); + MMIO_D(BXT_RP_STATE_CAP, D_BXT); + MMIO_DH(BXT_PHY_CTL_FAMILY(DPIO_PHY0), D_BXT, + NULL, bxt_phy_ctl_family_write); + MMIO_DH(BXT_PHY_CTL_FAMILY(DPIO_PHY1), D_BXT, + NULL, bxt_phy_ctl_family_write); + MMIO_D(BXT_PHY_CTL(PORT_A), D_BXT); + MMIO_D(BXT_PHY_CTL(PORT_B), D_BXT); + MMIO_D(BXT_PHY_CTL(PORT_C), D_BXT); + MMIO_DH(BXT_PORT_PLL_ENABLE(PORT_A), D_BXT, + NULL, bxt_port_pll_enable_write); + MMIO_DH(BXT_PORT_PLL_ENABLE(PORT_B), D_BXT, + NULL, bxt_port_pll_enable_write); + MMIO_DH(BXT_PORT_PLL_ENABLE(PORT_C), D_BXT, NULL, + bxt_port_pll_enable_write); + + MMIO_D(BXT_PORT_CL1CM_DW0(DPIO_PHY0), D_BXT); + MMIO_D(BXT_PORT_CL1CM_DW9(DPIO_PHY0), D_BXT); + MMIO_D(BXT_PORT_CL1CM_DW10(DPIO_PHY0), D_BXT); + MMIO_D(BXT_PORT_CL1CM_DW28(DPIO_PHY0), D_BXT); + MMIO_D(BXT_PORT_CL1CM_DW30(DPIO_PHY0), D_BXT); + MMIO_D(BXT_PORT_CL2CM_DW6(DPIO_PHY0), D_BXT); + MMIO_D(BXT_PORT_REF_DW3(DPIO_PHY0), D_BXT); + MMIO_D(BXT_PORT_REF_DW6(DPIO_PHY0), D_BXT); + MMIO_D(BXT_PORT_REF_DW8(DPIO_PHY0), D_BXT); + + MMIO_D(BXT_PORT_CL1CM_DW0(DPIO_PHY1), D_BXT); + MMIO_D(BXT_PORT_CL1CM_DW9(DPIO_PHY1), D_BXT); + MMIO_D(BXT_PORT_CL1CM_DW10(DPIO_PHY1), D_BXT); + MMIO_D(BXT_PORT_CL1CM_DW28(DPIO_PHY1), D_BXT); + MMIO_D(BXT_PORT_CL1CM_DW30(DPIO_PHY1), D_BXT); + MMIO_D(BXT_PORT_CL2CM_DW6(DPIO_PHY1), D_BXT); + MMIO_D(BXT_PORT_REF_DW3(DPIO_PHY1), D_BXT); + MMIO_D(BXT_PORT_REF_DW6(DPIO_PHY1), D_BXT); + MMIO_D(BXT_PORT_REF_DW8(DPIO_PHY1), D_BXT); + + MMIO_D(BXT_PORT_PLL_EBB_0(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PLL_EBB_4(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PCS_DW10_LN01(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PCS_DW10_GRP(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PCS_DW12_LN01(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PCS_DW12_LN23(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_DH(BXT_PORT_PCS_DW12_GRP(DPIO_PHY0, DPIO_CH0), D_BXT, + NULL, bxt_pcs_dw12_grp_write); + MMIO_D(BXT_PORT_TX_DW2_LN0(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_TX_DW2_GRP(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_DH(BXT_PORT_TX_DW3_LN0(DPIO_PHY0, DPIO_CH0), D_BXT, + bxt_port_tx_dw3_read, NULL); + MMIO_D(BXT_PORT_TX_DW3_GRP(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_TX_DW4_LN0(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_TX_DW4_GRP(DPIO_PHY0, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY0, DPIO_CH0, 0), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY0, DPIO_CH0, 1), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY0, DPIO_CH0, 2), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY0, DPIO_CH0, 3), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH0, 0), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH0, 1), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH0, 2), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH0, 3), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH0, 6), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH0, 8), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH0, 9), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH0, 10), D_BXT); + + MMIO_D(BXT_PORT_PLL_EBB_0(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_PLL_EBB_4(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_PCS_DW10_LN01(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_PCS_DW10_GRP(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_PCS_DW12_LN01(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_PCS_DW12_LN23(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_DH(BXT_PORT_PCS_DW12_GRP(DPIO_PHY0, DPIO_CH1), D_BXT, + NULL, bxt_pcs_dw12_grp_write); + MMIO_D(BXT_PORT_TX_DW2_LN0(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_TX_DW2_GRP(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_DH(BXT_PORT_TX_DW3_LN0(DPIO_PHY0, DPIO_CH1), D_BXT, + bxt_port_tx_dw3_read, NULL); + MMIO_D(BXT_PORT_TX_DW3_GRP(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_TX_DW4_LN0(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_TX_DW4_GRP(DPIO_PHY0, DPIO_CH1), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY0, DPIO_CH1, 0), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY0, DPIO_CH1, 1), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY0, DPIO_CH1, 2), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY0, DPIO_CH1, 3), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH1, 0), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH1, 1), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH1, 2), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH1, 3), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH1, 6), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH1, 8), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH1, 9), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY0, DPIO_CH1, 10), D_BXT); + + MMIO_D(BXT_PORT_PLL_EBB_0(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PLL_EBB_4(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PCS_DW10_LN01(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PCS_DW10_GRP(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PCS_DW12_LN01(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_PCS_DW12_LN23(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_DH(BXT_PORT_PCS_DW12_GRP(DPIO_PHY1, DPIO_CH0), D_BXT, + NULL, bxt_pcs_dw12_grp_write); + MMIO_D(BXT_PORT_TX_DW2_LN0(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_TX_DW2_GRP(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_DH(BXT_PORT_TX_DW3_LN0(DPIO_PHY1, DPIO_CH0), D_BXT, + bxt_port_tx_dw3_read, NULL); + MMIO_D(BXT_PORT_TX_DW3_GRP(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_TX_DW4_LN0(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_TX_DW4_GRP(DPIO_PHY1, DPIO_CH0), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY1, DPIO_CH0, 0), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY1, DPIO_CH0, 1), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY1, DPIO_CH0, 2), D_BXT); + MMIO_D(BXT_PORT_TX_DW14_LN(DPIO_PHY1, DPIO_CH0, 3), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY1, DPIO_CH0, 0), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY1, DPIO_CH0, 1), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY1, DPIO_CH0, 2), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY1, DPIO_CH0, 3), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY1, DPIO_CH0, 6), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY1, DPIO_CH0, 8), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY1, DPIO_CH0, 9), D_BXT); + MMIO_D(BXT_PORT_PLL(DPIO_PHY1, DPIO_CH0, 10), D_BXT); + + MMIO_D(BXT_DE_PLL_CTL, D_BXT); + MMIO_DH(BXT_DE_PLL_ENABLE, D_BXT, NULL, bxt_de_pll_enable_write); + MMIO_D(BXT_DSI_PLL_CTL, D_BXT); + MMIO_D(BXT_DSI_PLL_ENABLE, D_BXT); + + MMIO_D(GEN9_CLKGATE_DIS_0, D_BXT); + + MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_A), D_BXT); + MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_B), D_BXT); + MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_C), D_BXT); + + MMIO_DH(EDP_PSR_IMR, D_BXT, NULL, bxt_edp_psr_imr_iir_write); + MMIO_DH(EDP_PSR_IIR, D_BXT, NULL, bxt_edp_psr_imr_iir_write); + + MMIO_D(RC6_CTX_BASE, D_BXT); + + MMIO_D(GEN8_PUSHBUS_CONTROL, D_BXT); + MMIO_D(GEN8_PUSHBUS_ENABLE, D_BXT); + MMIO_D(GEN8_PUSHBUS_SHIFT, D_BXT); + MMIO_D(GEN6_GFXPAUSE, D_BXT); + MMIO_D(GEN8_L3SQCREG1, D_BXT); + + MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2965,6 +3297,16 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) ret = init_skl_mmio_info(gvt); if (ret) goto err; + } else if (IS_BROXTON(dev_priv)) { + ret = init_broadwell_mmio_info(gvt); + if (ret) + goto err; + ret = init_skl_mmio_info(gvt); + if (ret) + goto err; + ret = init_bxt_mmio_info(gvt); + if (ret) + goto err; } gvt->mmio.mmio_block = mmio_blocks; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index f6dd9f717888..5af11cf1b482 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -53,7 +53,7 @@ struct intel_gvt_mpt { unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn, - dma_addr_t *dma_addr); + unsigned long size, dma_addr_t *dma_addr); void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 7a041b368f68..5daa23ae566b 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -350,7 +350,8 @@ static void update_upstream_irq(struct intel_vgpu *vgpu, clear_bits |= (1 << bit); } - WARN_ON(!up_irq_info); + if (WARN_ON(!up_irq_info)) + return; if (up_irq_info->group == INTEL_GVT_IRQ_INFO_MASTER) { u32 isr = i915_mmio_reg_offset(up_irq_info->reg_base); @@ -580,7 +581,9 @@ static void gen8_init_irq( SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); - } else if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { + } else if (IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv) + || IS_BROXTON(gvt->dev_priv)) { SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT); @@ -690,14 +693,8 @@ int intel_gvt_init_irq(struct intel_gvt *gvt) gvt_dbg_core("init irq framework\n"); - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv)) { - irq->ops = &gen8_irq_ops; - irq->irq_map = gen8_irq_map; - } else { - WARN_ON(1); - return -ENODEV; - } + irq->ops = &gen8_irq_ops; + irq->irq_map = gen8_irq_map; /* common event initialization */ init_events(irq); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index df4e4a07db3d..a45f46d8537f 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -43,6 +43,8 @@ #include <linux/mdev.h> #include <linux/debugfs.h> +#include <linux/nospec.h> + #include "i915_drv.h" #include "gvt.h" @@ -94,6 +96,7 @@ struct gvt_dma { struct rb_node dma_addr_node; gfn_t gfn; dma_addr_t dma_addr; + unsigned long size; struct kref ref; }; @@ -106,36 +109,90 @@ static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); -static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, - dma_addr_t *dma_addr) +static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, + unsigned long size) { - struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - struct page *page; - unsigned long pfn; + int total_pages; + int npage; int ret; - /* Pin the page first. */ - ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); - if (ret != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", - gfn, ret); - return -EINVAL; + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; + + for (npage = 0; npage < total_pages; npage++) { + unsigned long cur_gfn = gfn + npage; + + ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1); + WARN_ON(ret != 1); } +} - if (!pfn_valid(pfn)) { - gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - return -EINVAL; +/* Pin a normal or compound guest page for dma. */ +static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, + unsigned long size, struct page **page) +{ + unsigned long base_pfn = 0; + int total_pages; + int npage; + int ret; + + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; + /* + * We pin the pages one-by-one to avoid allocating a big arrary + * on stack to hold pfns. + */ + for (npage = 0; npage < total_pages; npage++) { + unsigned long cur_gfn = gfn + npage; + unsigned long pfn; + + ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); + if (ret != 1) { + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", + cur_gfn, ret); + goto err; + } + + if (!pfn_valid(pfn)) { + gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); + npage++; + ret = -EFAULT; + goto err; + } + + if (npage == 0) + base_pfn = pfn; + else if (base_pfn + npage != pfn) { + gvt_vgpu_err("The pages are not continuous\n"); + ret = -EINVAL; + npage++; + goto err; + } } + *page = pfn_to_page(base_pfn); + return 0; +err: + gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); + return ret; +} + +static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, + dma_addr_t *dma_addr, unsigned long size) +{ + struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; + struct page *page = NULL; + int ret; + + ret = gvt_pin_guest_page(vgpu, gfn, size, &page); + if (ret) + return ret; + /* Setup DMA mapping. */ - page = pfn_to_page(pfn); - *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); + *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, *dma_addr)) { - gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn); - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); + gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", + page_to_pfn(page), ret); + gvt_unpin_guest_page(vgpu, gfn, size); return -ENOMEM; } @@ -143,14 +200,12 @@ static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, } static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, - dma_addr_t dma_addr) + dma_addr_t dma_addr, unsigned long size) { struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - int ret; - dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - WARN_ON(ret != 1); + dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); + gvt_unpin_guest_page(vgpu, gfn, size); } static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, @@ -191,7 +246,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) } static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, - dma_addr_t dma_addr) + dma_addr_t dma_addr, unsigned long size) { struct gvt_dma *new, *itr; struct rb_node **link, *parent = NULL; @@ -203,6 +258,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, new->vgpu = vgpu; new->gfn = gfn; new->dma_addr = dma_addr; + new->size = size; kref_init(&new->ref); /* gfn_cache maps gfn to struct gvt_dma. */ @@ -260,7 +316,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) break; } dma = rb_entry(node, struct gvt_dma, gfn_node); - gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr); + gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); __gvt_cache_remove_entry(vgpu, dma); mutex_unlock(&vgpu->vdev.cache_lock); } @@ -515,7 +571,8 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, if (!entry) continue; - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); __gvt_cache_remove_entry(vgpu, entry); } mutex_unlock(&vgpu->vdev.cache_lock); @@ -611,7 +668,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1)) return; - intel_gvt_ops->vgpu_deactivate(vgpu); + intel_gvt_ops->vgpu_release(vgpu); ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); @@ -1084,7 +1141,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct vfio_region_info info; struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; - int i, ret; + unsigned int i; + int ret; struct vfio_region_info_cap_sparse_mmap *sparse = NULL; size_t size; int nr_areas = 1; @@ -1169,6 +1227,10 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, if (info.index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) return -EINVAL; + info.index = + array_index_nospec(info.index, + VFIO_PCI_NUM_REGIONS + + vgpu->vdev.num_regions); i = info.index - VFIO_PCI_NUM_REGIONS; @@ -1195,11 +1257,13 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, &sparse->header, sizeof(*sparse) + (sparse->nr_areas * sizeof(*sparse->areas))); - kfree(sparse); - if (ret) + if (ret) { + kfree(sparse); return ret; + } break; default: + kfree(sparse); return -EINVAL; } } @@ -1215,6 +1279,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, sizeof(info), caps.buf, caps.size)) { kfree(caps.buf); + kfree(sparse); return -EFAULT; } info.cap_offset = sizeof(info); @@ -1223,6 +1288,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, kfree(caps.buf); } + kfree(sparse); return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { @@ -1560,7 +1626,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev) kvmgt_protect_table_init(info); gvt_cache_init(vgpu); - mutex_init(&vgpu->dmabuf_lock); init_completion(&vgpu->vblank_done); info->track_node.track_write = kvmgt_page_track_write; @@ -1648,7 +1713,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) } int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, - dma_addr_t *dma_addr) + unsigned long size, dma_addr_t *dma_addr) { struct kvmgt_guest_info *info; struct intel_vgpu *vgpu; @@ -1665,11 +1730,11 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, entry = __gvt_cache_find_gfn(info->vgpu, gfn); if (!entry) { - ret = gvt_dma_map_page(vgpu, gfn, dma_addr); + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); if (ret) goto err_unlock; - ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr); + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); if (ret) goto err_unmap; } else { @@ -1681,7 +1746,7 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, return 0; err_unmap: - gvt_dma_unmap_page(vgpu, gfn, *dma_addr); + gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); err_unlock: mutex_unlock(&info->vgpu->vdev.cache_lock); return ret; @@ -1691,7 +1756,8 @@ static void __gvt_dma_release(struct kref *ref) { struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); - gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr); + gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr, + entry->size); __gvt_cache_remove_entry(entry->vgpu, entry); } diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index b31eb36fc102..994366035364 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -67,7 +67,7 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, return; gvt = vgpu->gvt; - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); if (reg_is_mmio(gvt, offset)) { if (read) @@ -85,7 +85,7 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, memcpy(pt, p_data, bytes); } - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); } /** @@ -109,7 +109,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true); return 0; } - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); @@ -156,7 +156,7 @@ err: gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n", offset, bytes); out: - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); return ret; } @@ -182,7 +182,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, return 0; } - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); @@ -220,7 +220,7 @@ err: gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset, bytes); out: - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index dac8c6401e26..1ffc69eba30e 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -42,15 +42,16 @@ struct intel_vgpu; #define D_BDW (1 << 0) #define D_SKL (1 << 1) #define D_KBL (1 << 2) +#define D_BXT (1 << 3) -#define D_GEN9PLUS (D_SKL | D_KBL) -#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL) +#define D_GEN9PLUS (D_SKL | D_KBL | D_BXT) +#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL | D_BXT) -#define D_SKL_PLUS (D_SKL | D_KBL) -#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL) +#define D_SKL_PLUS (D_SKL | D_KBL | D_BXT) +#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL | D_BXT) #define D_PRE_SKL (D_BDW) -#define D_ALL (D_BDW | D_SKL | D_KBL) +#define D_ALL (D_BDW | D_SKL | D_KBL | D_BXT) typedef int (*gvt_mmio_func)(struct intel_vgpu *, unsigned int, void *, unsigned int); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 5ca9caf7552a..42e1e6bdcc2c 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -364,7 +364,8 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) */ fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ | FW_REG_WRITE); - if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) + if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv))) fw |= FORCEWAKE_RENDER; intel_uncore_forcewake_get(dev_priv, fw); @@ -401,7 +402,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if (IS_KABYLAKE(dev_priv) && ring_id == RCS) + if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)) && ring_id == RCS) return; if (!pre && !gen9_render_mocs.initialized) @@ -446,9 +447,9 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, #define CTX_CONTEXT_CONTROL_VAL 0x03 -bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) +bool is_inhibit_context(struct intel_context *ce) { - u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; + const u32 *reg_state = ce->lrc_reg_state; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); @@ -467,7 +468,9 @@ static void switch_mmio(struct intel_vgpu *pre, u32 old_v, new_v; dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv) + || IS_BROXTON(dev_priv)) switch_mocs(pre, next, ring_id); for (mmio = dev_priv->gvt->engine_mmio_list.mmio; @@ -479,7 +482,8 @@ static void switch_mmio(struct intel_vgpu *pre, * state image on kabylake, it's initialized by lri command and * save or restore with context together. */ - if (IS_KABYLAKE(dev_priv) && mmio->in_context) + if ((IS_KABYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + && mmio->in_context) continue; // save @@ -501,7 +505,7 @@ static void switch_mmio(struct intel_vgpu *pre, * itself. */ if (mmio->in_context && - !is_inhibit_context(s->shadow_ctx, ring_id)) + !is_inhibit_context(&s->shadow_ctx->__engine[ring_id])) continue; if (mmio->mask) @@ -574,7 +578,9 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) { struct engine_mmio *mmio; - if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) + if (IS_SKYLAKE(gvt->dev_priv) || + IS_KABYLAKE(gvt->dev_priv) || + IS_BROXTON(gvt->dev_priv)) gvt->engine_mmio_list.mmio = gen9_engine_mmio_list; else gvt->engine_mmio_list.mmio = gen8_engine_mmio_list; diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index 0439eb8057a8..5c3b9ff9f96a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -49,7 +49,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); -bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id); +bool is_inhibit_context(struct intel_context *ce); int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, struct i915_request *req); diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 32ffcd566cdd..67f19992b226 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -230,17 +230,18 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( /** * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page * @vgpu: a vGPU - * @gpfn: guest pfn + * @gfn: guest pfn + * @size: page size * @dma_addr: retrieve allocated dma addr * * Returns: * 0 on success, negative error code if failed. */ static inline int intel_gvt_hypervisor_dma_map_guest_page( - struct intel_vgpu *vgpu, unsigned long gfn, + struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, dma_addr_t *dma_addr) { - return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, + return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, size, dma_addr); } diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c index 53e2bd79c97d..256d0db8bbb1 100644 --- a/drivers/gpu/drm/i915/gvt/page_track.c +++ b/drivers/gpu/drm/i915/gvt/page_track.c @@ -157,11 +157,10 @@ int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn) int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, void *data, unsigned int bytes) { - struct intel_gvt *gvt = vgpu->gvt; struct intel_vgpu_page_track *page_track; int ret = 0; - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT); if (!page_track) { @@ -179,6 +178,6 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, } out: - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index d053cbe1dc94..09d7bb72b4ff 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -228,7 +228,7 @@ void intel_gvt_schedule(struct intel_gvt *gvt) struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; ktime_t cur_time; - mutex_lock(&gvt->lock); + mutex_lock(&gvt->sched_lock); cur_time = ktime_get(); if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, @@ -244,7 +244,7 @@ void intel_gvt_schedule(struct intel_gvt *gvt) vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time); tbs_sched_func(sched_data); - mutex_unlock(&gvt->lock); + mutex_unlock(&gvt->sched_lock); } static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data) @@ -359,39 +359,65 @@ static struct intel_gvt_sched_policy_ops tbs_schedule_ops = { int intel_gvt_init_sched_policy(struct intel_gvt *gvt) { + int ret; + + mutex_lock(&gvt->sched_lock); gvt->scheduler.sched_ops = &tbs_schedule_ops; + ret = gvt->scheduler.sched_ops->init(gvt); + mutex_unlock(&gvt->sched_lock); - return gvt->scheduler.sched_ops->init(gvt); + return ret; } void intel_gvt_clean_sched_policy(struct intel_gvt *gvt) { + mutex_lock(&gvt->sched_lock); gvt->scheduler.sched_ops->clean(gvt); + mutex_unlock(&gvt->sched_lock); } +/* for per-vgpu scheduler policy, there are 2 per-vgpu data: + * sched_data, and sched_ctl. We see these 2 data as part of + * the global scheduler which are proteced by gvt->sched_lock. + * Caller should make their decision if the vgpu_lock should + * be hold outside. + */ + int intel_vgpu_init_sched_policy(struct intel_vgpu *vgpu) { - return vgpu->gvt->scheduler.sched_ops->init_vgpu(vgpu); + int ret; + + mutex_lock(&vgpu->gvt->sched_lock); + ret = vgpu->gvt->scheduler.sched_ops->init_vgpu(vgpu); + mutex_unlock(&vgpu->gvt->sched_lock); + + return ret; } void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu) { + mutex_lock(&vgpu->gvt->sched_lock); vgpu->gvt->scheduler.sched_ops->clean_vgpu(vgpu); + mutex_unlock(&vgpu->gvt->sched_lock); } void intel_vgpu_start_schedule(struct intel_vgpu *vgpu) { struct vgpu_sched_data *vgpu_data = vgpu->sched_data; + mutex_lock(&vgpu->gvt->sched_lock); if (!vgpu_data->active) { gvt_dbg_core("vgpu%d: start schedule\n", vgpu->id); vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu); } + mutex_unlock(&vgpu->gvt->sched_lock); } void intel_gvt_kick_schedule(struct intel_gvt *gvt) { + mutex_lock(&gvt->sched_lock); intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED); + mutex_unlock(&gvt->sched_lock); } void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) @@ -406,6 +432,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id); + mutex_lock(&vgpu->gvt->sched_lock); scheduler->sched_ops->stop_schedule(vgpu); if (scheduler->next_vgpu == vgpu) @@ -425,4 +452,5 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) } } spin_unlock_bh(&scheduler->mmio_context_lock); + mutex_unlock(&vgpu->gvt->sched_lock); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index c2d183b91500..43aa058e29fc 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -45,20 +45,16 @@ static void set_context_pdp_root_pointer( struct execlist_ring_context *ring_context, u32 pdp[8]) { - struct execlist_mmio_pair *pdp_pair = &ring_context->pdp3_UDW; int i; for (i = 0; i < 8; i++) - pdp_pair[i].val = pdp[7 - i]; + ring_context->pdps[i].val = pdp[7 - i]; } static void update_shadow_pdps(struct intel_vgpu_workload *workload) { - struct intel_vgpu *vgpu = workload->vgpu; - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + workload->req->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -128,9 +124,8 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + workload->req->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -205,7 +200,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) static inline bool is_gvt_request(struct i915_request *req) { - return i915_gem_context_force_single_submission(req->ctx); + return i915_gem_context_force_single_submission(req->gem_context); } static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) @@ -280,10 +275,8 @@ static int shadow_context_status_change(struct notifier_block *nb, return NOTIFY_OK; } -static void shadow_context_descriptor_update(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static void shadow_context_descriptor_update(struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); u64 desc = 0; desc = ce->lrc_desc; @@ -292,7 +285,7 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, * like GEN8_CTX_* cached in desc_template */ desc &= U64_MAX << 12; - desc |= ctx->desc_template & ((1ULL << 12) - 1); + desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1); ce->lrc_desc = desc; } @@ -300,12 +293,12 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct i915_request *req = workload->req; void *shadow_ring_buffer_va; u32 *cs; - struct i915_request *req = workload->req; - if (IS_KABYLAKE(req->i915) && - is_inhibit_context(req->ctx, req->engine->id)) + if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)) + && is_inhibit_context(req->hw_context)) intel_vgpu_restore_inhibit_context(vgpu, req); /* allocate shadow ring buffer */ @@ -353,92 +346,67 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) struct intel_vgpu_submission *s = &vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - int ring_id = workload->ring_id; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct intel_ring *ring; + struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id]; + struct intel_context *ce; + struct i915_request *rq; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (workload->shadowed) + if (workload->req) return 0; + /* pin shadow context by gvt even the shadow context will be pinned + * when i915 alloc request. That is because gvt will update the guest + * context from shadow context when workload is completed, and at that + * moment, i915 may already unpined the shadow context to make the + * shadow_ctx pages invalid. So gvt need to pin itself. After update + * the guest context, gvt can unpin the shadow_ctx safely. + */ + ce = intel_context_pin(shadow_ctx, engine); + if (IS_ERR(ce)) { + gvt_vgpu_err("fail to pin shadow context\n"); + return PTR_ERR(ce); + } + shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated)) - shadow_context_descriptor_update(shadow_ctx, - dev_priv->engine[ring_id]); + if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated)) + shadow_context_descriptor_update(ce); ret = intel_gvt_scan_and_shadow_ringbuffer(workload); if (ret) - goto err_scan; + goto err_unpin; if ((workload->ring_id == RCS) && (workload->wa_ctx.indirect_ctx.size != 0)) { ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); if (ret) - goto err_scan; + goto err_shadow; } - /* pin shadow context by gvt even the shadow context will be pinned - * when i915 alloc request. That is because gvt will update the guest - * context from shadow context when workload is completed, and at that - * moment, i915 may already unpined the shadow context to make the - * shadow_ctx pages invalid. So gvt need to pin itself. After update - * the guest context, gvt can unpin the shadow_ctx safely. - */ - ring = intel_context_pin(shadow_ctx, engine); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - gvt_vgpu_err("fail to pin shadow context\n"); + rq = i915_request_alloc(engine, shadow_ctx); + if (IS_ERR(rq)) { + gvt_vgpu_err("fail to allocate gem request\n"); + ret = PTR_ERR(rq); goto err_shadow; } + workload->req = i915_request_get(rq); ret = populate_shadow_context(workload); if (ret) - goto err_unpin; - workload->shadowed = true; - return 0; + goto err_req; -err_unpin: - intel_context_unpin(shadow_ctx, engine); + return 0; +err_req: + rq = fetch_and_zero(&workload->req); + i915_request_put(rq); err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_scan: - return ret; -} - -static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) -{ - int ring_id = workload->ring_id; - struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct i915_request *rq; - struct intel_vgpu *vgpu = workload->vgpu; - struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - int ret; - - rq = i915_request_alloc(dev_priv->engine[ring_id], shadow_ctx); - if (IS_ERR(rq)) { - gvt_vgpu_err("fail to allocate gem request\n"); - ret = PTR_ERR(rq); - goto err_unpin; - } - - gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq); - - workload->req = i915_request_get(rq); - ret = copy_workload_to_ring_buffer(workload); - if (ret) - goto err_unpin; - return 0; - err_unpin: - intel_context_unpin(shadow_ctx, engine); - release_shadow_wa_ctx(&workload->wa_ctx); + intel_context_unpin(ce); return ret; } @@ -508,7 +476,11 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) i915_gem_obj_finish_shmem_access(bb->obj); bb->accessing = false; - i915_vma_move_to_active(bb->vma, workload->req, 0); + ret = i915_vma_move_to_active(bb->vma, + workload->req, + 0); + if (ret) + goto err; } } return 0; @@ -517,21 +489,13 @@ err: return ret; } -static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) +static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - struct intel_vgpu_workload *workload = container_of(wa_ctx, - struct intel_vgpu_workload, - wa_ctx); - int ring_id = workload->ring_id; - struct intel_vgpu_submission *s = &workload->vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; - struct execlist_ring_context *shadow_ring_context; - struct page *page; - - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); + struct intel_vgpu_workload *workload = + container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx); + struct i915_request *rq = workload->req; + struct execlist_ring_context *shadow_ring_context = + (struct execlist_ring_context *)rq->hw_context->lrc_reg_state; shadow_ring_context->bb_per_ctx_ptr.val = (shadow_ring_context->bb_per_ctx_ptr.val & @@ -539,9 +503,6 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) shadow_ring_context->rcs_indirect_ctx.val = (shadow_ring_context->rcs_indirect_ctx.val & (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; - - kunmap_atomic(shadow_ring_context); - return 0; } static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) @@ -633,7 +594,7 @@ static int prepare_workload(struct intel_vgpu_workload *workload) goto err_unpin_mm; } - ret = intel_gvt_generate_request(workload); + ret = copy_workload_to_ring_buffer(workload); if (ret) { gvt_vgpu_err("fail to generate request\n"); goto err_unpin_mm; @@ -670,16 +631,14 @@ err_unpin_mm: static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - int ret = 0; + int ret; gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", ring_id, workload); + mutex_lock(&vgpu->vgpu_lock); mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_scan_and_shadow_workload(workload); @@ -687,10 +646,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) goto out; ret = prepare_workload(workload); - if (ret) { - intel_context_unpin(shadow_ctx, engine); - goto out; - } out: if (ret) @@ -704,6 +659,7 @@ out: } mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&vgpu->vgpu_lock); return ret; } @@ -713,7 +669,7 @@ static struct intel_vgpu_workload *pick_next_workload( struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_vgpu_workload *workload = NULL; - mutex_lock(&gvt->lock); + mutex_lock(&gvt->sched_lock); /* * no current vgpu / will be scheduled out / no workload @@ -759,33 +715,29 @@ static struct intel_vgpu_workload *pick_next_workload( atomic_inc(&workload->vgpu->submission.running_workload_num); out: - mutex_unlock(&gvt->lock); + mutex_unlock(&gvt->sched_lock); return workload; } static void update_guest_context(struct intel_vgpu_workload *workload) { + struct i915_request *rq = workload->req; struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; - struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - int ring_id = workload->ring_id; - struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; unsigned long context_gpa, context_page_num; int i; - gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, - workload->ctx_desc.lrca); - - context_page_num = gvt->dev_priv->engine[ring_id]->context_size; + gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id, + workload->ctx_desc.lrca); + context_page_num = rq->engine->context_size; context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS) context_page_num = 19; i = 2; @@ -832,7 +784,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload) kunmap(page); } -static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) +void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, + unsigned long engine_mask) { struct intel_vgpu_submission *s = &vgpu->submission; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -858,19 +811,17 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) scheduler->current_workload[ring_id]; struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_request *rq = workload->req; int event; - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); + mutex_lock(&gvt->sched_lock); /* For the workload w/ request, needs to wait for the context * switch to make sure request is completed. * For the workload w/o request, directly complete the workload. */ - if (workload->req) { - struct drm_i915_private *dev_priv = - workload->vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = - dev_priv->engine[workload->ring_id]; + if (rq) { wait_event(workload->shadow_ctx_status_wq, !atomic_read(&workload->shadow_ctx_active)); @@ -886,8 +837,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - i915_request_put(fetch_and_zero(&workload->req)); - if (!workload->status && !(vgpu->resetting_eng & ENGINE_MASK(ring_id))) { update_guest_context(workload); @@ -896,10 +845,13 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) INTEL_GVT_EVENT_MAX) intel_vgpu_trigger_virtual_event(vgpu, event); } - mutex_lock(&dev_priv->drm.struct_mutex); + /* unpin shadow ctx as the shadow_ctx update is done */ - intel_context_unpin(s->shadow_ctx, engine); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_lock(&rq->i915->drm.struct_mutex); + intel_context_unpin(rq->hw_context); + mutex_unlock(&rq->i915->drm.struct_mutex); + + i915_request_put(fetch_and_zero(&workload->req)); } gvt_dbg_sched("ring id %d complete workload %p status %d\n", @@ -928,7 +880,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) * cleaned up during the resetting process later, so doing * the workload clean up here doesn't have any impact. **/ - clean_workloads(vgpu, ENGINE_MASK(ring_id)); + intel_vgpu_clean_workloads(vgpu, ENGINE_MASK(ring_id)); } workload->complete(workload); @@ -939,7 +891,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) if (gvt->scheduler.need_reschedule) intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED); - mutex_unlock(&gvt->lock); + mutex_unlock(&gvt->sched_lock); + mutex_unlock(&vgpu->vgpu_lock); } struct workload_thread_param { @@ -957,7 +910,8 @@ static int workload_thread(void *priv) struct intel_vgpu *vgpu = NULL; int ret; bool need_force_wake = IS_SKYLAKE(gvt->dev_priv) - || IS_KABYLAKE(gvt->dev_priv); + || IS_KABYLAKE(gvt->dev_priv) + || IS_BROXTON(gvt->dev_priv); DEFINE_WAIT_FUNC(wait, woken_wake_function); kfree(p); @@ -991,9 +945,7 @@ static int workload_thread(void *priv) intel_uncore_forcewake_get(gvt->dev_priv, FORCEWAKE_ALL); - mutex_lock(&gvt->lock); ret = dispatch_workload(workload); - mutex_unlock(&gvt->lock); if (ret) { vgpu = workload->vgpu; @@ -1130,7 +1082,7 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, if (!s->active) return; - clean_workloads(vgpu, engine_mask); + intel_vgpu_clean_workloads(vgpu, engine_mask); s->ops->reset(vgpu, engine_mask); } @@ -1270,7 +1222,6 @@ alloc_workload(struct intel_vgpu *vgpu) atomic_set(&workload->shadow_ctx_active, 0); workload->status = -EINPROGRESS; - workload->shadowed = false; workload->vgpu = vgpu; return workload; @@ -1285,7 +1236,7 @@ static void read_guest_pdps(struct intel_vgpu *vgpu, u64 gpa; int i; - gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val); + gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); for (i = 0; i < 8; i++) intel_gvt_hypervisor_read_gpa(vgpu, diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 6c644782193e..ca5529d0e48e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -83,7 +83,6 @@ struct intel_vgpu_workload { struct i915_request *req; /* if this workload has been dispatched to i915? */ bool dispatched; - bool shadowed; int status; struct intel_vgpu_mm *shadow_mm; @@ -159,4 +158,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload); +void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, + unsigned long engine_mask); + #endif diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 572a18c2bfb5..a4e8e3cf74fd 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -46,6 +46,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT; vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION; + vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HUGE_GTT; vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = vgpu_aperture_gmadr_base(vgpu); @@ -58,6 +59,9 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu); + vgpu_vreg_t(vgpu, vgtif_reg(cursor_x_hot)) = UINT_MAX; + vgpu_vreg_t(vgpu, vgtif_reg(cursor_y_hot)) = UINT_MAX; + gvt_dbg_core("Populate PVINFO PAGE for vGPU %d\n", vgpu->id); gvt_dbg_core("aperture base [GMADR] 0x%llx size 0x%llx\n", vgpu_aperture_gmadr_base(vgpu), vgpu_aperture_sz(vgpu)); @@ -218,27 +222,43 @@ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) * @vgpu: virtual GPU * * This function is called when user wants to deactivate a virtual GPU. - * All virtual GPU runtime information will be destroyed. + * The virtual GPU will be stopped. * */ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) { - struct intel_gvt *gvt = vgpu->gvt; - - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); vgpu->active = false; if (atomic_read(&vgpu->submission.running_workload_num)) { - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); intel_gvt_wait_vgpu_idle(vgpu); - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); } intel_vgpu_stop_schedule(vgpu); - intel_vgpu_dmabuf_cleanup(vgpu); - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); +} + +/** + * intel_gvt_release_vgpu - release a virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to release a virtual GPU. + * The virtual GPU will be stopped and all runtime information will be + * destroyed. + * + */ +void intel_gvt_release_vgpu(struct intel_vgpu *vgpu) +{ + intel_gvt_deactivate_vgpu(vgpu); + + mutex_lock(&vgpu->vgpu_lock); + intel_vgpu_clean_workloads(vgpu, ALL_ENGINES); + intel_vgpu_dmabuf_cleanup(vgpu); + mutex_unlock(&vgpu->vgpu_lock); } /** @@ -252,14 +272,11 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); WARN(vgpu->active, "vGPU is still active!\n"); intel_gvt_debugfs_remove_vgpu(vgpu); - idr_remove(&gvt->vgpu_idr, vgpu->id); - if (idr_is_empty(&gvt->vgpu_idr)) - intel_gvt_clean_irq(gvt); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_submission(vgpu); intel_vgpu_clean_display(vgpu); @@ -269,10 +286,16 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) intel_vgpu_free_resource(vgpu); intel_vgpu_clean_mmio(vgpu); intel_vgpu_dmabuf_cleanup(vgpu); - vfree(vgpu); + mutex_unlock(&vgpu->vgpu_lock); + mutex_lock(&gvt->lock); + idr_remove(&gvt->vgpu_idr, vgpu->id); + if (idr_is_empty(&gvt->vgpu_idr)) + intel_gvt_clean_irq(gvt); intel_gvt_update_vgpu_types(gvt); mutex_unlock(&gvt->lock); + + vfree(vgpu); } #define IDLE_VGPU_IDR 0 @@ -298,6 +321,7 @@ struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) vgpu->id = IDLE_VGPU_IDR; vgpu->gvt = gvt; + mutex_init(&vgpu->vgpu_lock); for (i = 0; i < I915_NUM_ENGINES; i++) INIT_LIST_HEAD(&vgpu->submission.workload_q_head[i]); @@ -324,7 +348,10 @@ out_free_vgpu: */ void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu) { + mutex_lock(&vgpu->vgpu_lock); intel_vgpu_clean_sched_policy(vgpu); + mutex_unlock(&vgpu->vgpu_lock); + vfree(vgpu); } @@ -342,8 +369,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (!vgpu) return ERR_PTR(-ENOMEM); - mutex_lock(&gvt->lock); - ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU, GFP_KERNEL); if (ret < 0) @@ -353,6 +378,8 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->handle = param->handle; vgpu->gvt = gvt; vgpu->sched_ctl.weight = param->weight; + mutex_init(&vgpu->vgpu_lock); + mutex_init(&vgpu->dmabuf_lock); INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head); INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL); idr_init(&vgpu->object_idr); @@ -400,8 +427,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; - mutex_unlock(&gvt->lock); - return vgpu; out_clean_sched_policy: @@ -424,7 +449,6 @@ out_clean_idr: idr_remove(&gvt->vgpu_idr, vgpu->id); out_free_vgpu: vfree(vgpu); - mutex_unlock(&gvt->lock); return ERR_PTR(ret); } @@ -456,12 +480,12 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz); param.high_gm_sz = BYTES_TO_MB(param.high_gm_sz); + mutex_lock(&gvt->lock); vgpu = __intel_gvt_create_vgpu(gvt, ¶m); - if (IS_ERR(vgpu)) - return vgpu; - - /* calculate left instance change for types */ - intel_gvt_update_vgpu_types(gvt); + if (!IS_ERR(vgpu)) + /* calculate left instance change for types */ + intel_gvt_update_vgpu_types(gvt); + mutex_unlock(&gvt->lock); return vgpu; } @@ -473,7 +497,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, * @engine_mask: engines to reset for GT reset * * This function is called when user wants to reset a virtual GPU through - * device model reset or GT reset. The caller should hold the gvt lock. + * device model reset or GT reset. The caller should hold the vgpu lock. * * vGPU Device Model Level Reset (DMLR) simulates the PCI level reset to reset * the whole vGPU to default state as when it is created. This vGPU function @@ -513,9 +537,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, * scheduler when the reset is triggered by current vgpu. */ if (scheduler->current_vgpu == NULL) { - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); intel_gvt_wait_vgpu_idle(vgpu); - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); } intel_vgpu_reset_submission(vgpu, resetting_eng); @@ -555,7 +579,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, */ void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu) { - mutex_lock(&vgpu->gvt->lock); + mutex_lock(&vgpu->vgpu_lock); intel_gvt_reset_vgpu_locked(vgpu, true, 0); - mutex_unlock(&vgpu->gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); } |