diff options
Diffstat (limited to 'drivers/gpu')
41 files changed, 870 insertions, 99 deletions
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index c4c704e01961..eb009d3ab48f 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -48,6 +48,11 @@ #include "drm_internal.h" #include "drm_legacy.h" +#if defined(CONFIG_MMU) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#include <uapi/asm/mman.h> +#include <drm/drm_vma_manager.h> +#endif + /* from BKL pushdown */ DEFINE_MUTEX(drm_global_mutex); @@ -872,3 +877,139 @@ struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags) return file; } EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile); + +#ifdef CONFIG_MMU +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* + * drm_addr_inflate() attempts to construct an aligned area by inflating + * the area size and skipping the unaligned start of the area. + * adapted from shmem_get_unmapped_area() + */ +static unsigned long drm_addr_inflate(unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags, + unsigned long huge_size) +{ + unsigned long offset, inflated_len; + unsigned long inflated_addr; + unsigned long inflated_offset; + + offset = (pgoff << PAGE_SHIFT) & (huge_size - 1); + if (offset && offset + len < 2 * huge_size) + return addr; + if ((addr & (huge_size - 1)) == offset) + return addr; + + inflated_len = len + huge_size - PAGE_SIZE; + if (inflated_len > TASK_SIZE) + return addr; + if (inflated_len < len) + return addr; + + inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len, + 0, flags); + if (IS_ERR_VALUE(inflated_addr)) + return addr; + if (inflated_addr & ~PAGE_MASK) + return addr; + + inflated_offset = inflated_addr & (huge_size - 1); + inflated_addr += offset - inflated_offset; + if (inflated_offset > offset) + inflated_addr += huge_size; + + if (inflated_addr > TASK_SIZE - len) + return addr; + + return inflated_addr; +} + +/** + * drm_get_unmapped_area() - Get an unused user-space virtual memory area + * suitable for huge page table entries. + * @file: The struct file representing the address space being mmap()'d. + * @uaddr: Start address suggested by user-space. + * @len: Length of the area. + * @pgoff: The page offset into the address space. + * @flags: mmap flags + * @mgr: The address space manager used by the drm driver. This argument can + * probably be removed at some point when all drivers use the same + * address space manager. + * + * This function attempts to find an unused user-space virtual memory area + * that can accommodate the size we want to map, and that is properly + * aligned to facilitate huge page table entries matching actual + * huge pages or huge page aligned memory in buffer objects. Buffer objects + * are assumed to start at huge page boundary pfns (io memory) or be + * populated by huge pages aligned to the start of the buffer object + * (system- or coherent memory). Adapted from shmem_get_unmapped_area. + * + * Return: aligned user-space address. + */ +unsigned long drm_get_unmapped_area(struct file *file, + unsigned long uaddr, unsigned long len, + unsigned long pgoff, unsigned long flags, + struct drm_vma_offset_manager *mgr) +{ + unsigned long addr; + unsigned long inflated_addr; + struct drm_vma_offset_node *node; + + if (len > TASK_SIZE) + return -ENOMEM; + + /* + * @pgoff is the file page-offset the huge page boundaries of + * which typically aligns to physical address huge page boundaries. + * That's not true for DRM, however, where physical address huge + * page boundaries instead are aligned with the offset from + * buffer object start. So adjust @pgoff to be the offset from + * buffer object start. + */ + drm_vma_offset_lock_lookup(mgr); + node = drm_vma_offset_lookup_locked(mgr, pgoff, 1); + if (node) + pgoff -= node->vm_node.start; + drm_vma_offset_unlock_lookup(mgr); + + addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags); + if (IS_ERR_VALUE(addr)) + return addr; + if (addr & ~PAGE_MASK) + return addr; + if (addr > TASK_SIZE - len) + return addr; + + if (len < HPAGE_PMD_SIZE) + return addr; + if (flags & MAP_FIXED) + return addr; + /* + * Our priority is to support MAP_SHARED mapped hugely; + * and support MAP_PRIVATE mapped hugely too, until it is COWed. + * But if caller specified an address hint, respect that as before. + */ + if (uaddr) + return addr; + + inflated_addr = drm_addr_inflate(addr, len, pgoff, flags, + HPAGE_PMD_SIZE); + + if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) && + len >= HPAGE_PUD_SIZE) + inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff, + flags, HPAGE_PUD_SIZE); + return inflated_addr; +} +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +unsigned long drm_get_unmapped_area(struct file *file, + unsigned long uaddr, unsigned long len, + unsigned long pgoff, unsigned long flags, + struct drm_vma_offset_manager *mgr) +{ + return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +EXPORT_SYMBOL_GPL(drm_get_unmapped_area); +#endif /* CONFIG_MMU */ diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index bc6e208949e8..8981abe8b7c9 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -45,7 +45,6 @@ #include <linux/export.h> #include <linux/interval_tree_generic.h> #include <linux/seq_file.h> -#include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/stacktrace.h> @@ -367,11 +366,6 @@ next_hole(struct drm_mm *mm, struct drm_mm_node *node, enum drm_mm_insert_mode mode) { - /* Searching is slow; check if we ran out of time/patience */ - cond_resched(); - if (fatal_signal_pending(current)) - return NULL; - switch (mode) { default: case DRM_MM_INSERT_BEST: @@ -563,7 +557,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, return 0; } - return signal_pending(current) ? -ERESTARTSYS : -ENOSPC; + return -ENOSPC; } EXPORT_SYMBOL(drm_mm_insert_node_in_range); diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 9e065ad0658f..a3cc080a46c6 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -164,6 +164,7 @@ struct decode_info { #define OP_STATE_BASE_ADDRESS OP_3D_MEDIA(0x0, 0x1, 0x01) #define OP_STATE_SIP OP_3D_MEDIA(0x0, 0x1, 0x02) #define OP_3D_MEDIA_0_1_4 OP_3D_MEDIA(0x0, 0x1, 0x04) +#define OP_SWTESS_BASE_ADDRESS OP_3D_MEDIA(0x0, 0x1, 0x03) #define OP_3DSTATE_VF_STATISTICS_GM45 OP_3D_MEDIA(0x1, 0x0, 0x0B) @@ -967,18 +968,6 @@ static int cmd_handler_lri(struct parser_exec_state *s) { int i, ret = 0; int cmd_len = cmd_length(s); - u32 valid_len = CMD_LEN(1); - - /* - * Official intel docs are somewhat sloppy , check the definition of - * MI_LOAD_REGISTER_IMM. - */ - #define MAX_VALID_LEN 127 - if ((cmd_len < valid_len) || (cmd_len > MAX_VALID_LEN)) { - gvt_err("len is not valid: len=%u valid_len=%u\n", - cmd_len, valid_len); - return -EFAULT; - } for (i = 1; i < cmd_len; i += 2) { if (IS_BROADWELL(s->engine->i915) && s->engine->id != RCS0) { @@ -2485,6 +2474,9 @@ static const struct cmd_info cmd_info[] = { {"OP_3D_MEDIA_0_1_4", OP_3D_MEDIA_0_1_4, F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_1(1), 8, NULL}, + {"OP_SWTESS_BASE_ADDRESS", OP_SWTESS_BASE_ADDRESS, + F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_2(1, 2), 3, NULL}, + {"3DSTATE_VS", OP_3DSTATE_VS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL}, {"3DSTATE_SF", OP_3DSTATE_SF, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL}, diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 6e5c9885d9fe..a83df2f84eb9 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -221,7 +221,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= - (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI | + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { @@ -241,7 +241,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= - (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI | + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_C << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { @@ -261,7 +261,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= - (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI | + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_D << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 0182e2a5acff..2faf50e1b051 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -462,11 +462,14 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } -/* ascendingly sorted */ +/* sorted in ascending order */ static i915_reg_t force_nonpriv_white_list[] = { + _MMIO(0xd80), GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec) GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248) - PS_INVOCATION_COUNT,//_MMIO(0x2348) + CL_PRIMITIVES_COUNT, //_MMIO(0x2340) + PS_INVOCATION_COUNT, //_MMIO(0x2348) + PS_DEPTH_COUNT, //_MMIO(0x2350) GEN8_CS_CHICKEN1,//_MMIO(0x2580) _MMIO(0x2690), _MMIO(0x2694), @@ -491,6 +494,7 @@ static i915_reg_t force_nonpriv_white_list[] = { _MMIO(0xe18c), _MMIO(0xe48c), _MMIO(0xe5f4), + _MMIO(0x64844), }; /* a simple bsearch */ diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1c95bf8cbed0..cb11c3184085 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -296,8 +296,8 @@ shadow_context_descriptor_update(struct intel_context *ce, * Update bits 0-11 of the context descriptor which includes flags * like GEN8_CTX_* cached in desc_template */ - desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); - desc |= workload->ctx_desc.addressing_mode << + desc &= ~(0x3ull << GEN8_CTX_ADDRESSING_MODE_SHIFT); + desc |= (u64)workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; ce->lrc_desc = desc; diff --git a/drivers/gpu/drm/nouveau/dispnv04/dac.c b/drivers/gpu/drm/nouveau/dispnv04/dac.c index e8eef88a8382..ffdd447d8706 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/dac.c +++ b/drivers/gpu/drm/nouveau/dispnv04/dac.c @@ -35,7 +35,8 @@ #include <subdev/bios/gpio.h> #include <subdev/gpio.h> -#include <subdev/timer.h> + +#include <nvif/timer.h> int nv04_dac_output_offset(struct drm_encoder *encoder) { diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.c b/drivers/gpu/drm/nouveau/dispnv04/hw.c index 3fdfafa8b0ad..b674d68ef28a 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/hw.c +++ b/drivers/gpu/drm/nouveau/dispnv04/hw.c @@ -26,6 +26,7 @@ #include "hw.h" #include <subdev/bios/pll.h> +#include <nvif/timer.h> #define CHIPSET_NFORCE 0x01a0 #define CHIPSET_NFORCE2 0x01f0 diff --git a/drivers/gpu/drm/nouveau/dispnv50/base507c.c b/drivers/gpu/drm/nouveau/dispnv50/base507c.c index 00a85f1e1a4a..ee782151d332 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/base507c.c +++ b/drivers/gpu/drm/nouveau/dispnv50/base507c.c @@ -23,6 +23,7 @@ #include <nvif/cl507c.h> #include <nvif/event.h> +#include <nvif/timer.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_fourcc.h> diff --git a/drivers/gpu/drm/nouveau/dispnv50/core507d.c b/drivers/gpu/drm/nouveau/dispnv50/core507d.c index e7fcfa6e6467..c5152c39c684 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/core507d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/core507d.c @@ -23,6 +23,7 @@ #include "head.h" #include <nvif/cl507d.h> +#include <nvif/timer.h> #include "nouveau_bo.h" diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c index 3b36dc8d36b2..c03cb987856b 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c @@ -24,6 +24,8 @@ #include <nouveau_bo.h> +#include <nvif/timer.h> + void corec37d_wndw_owner(struct nv50_core *core) { diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c index 397143b639c6..8c5cf096f69b 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c +++ b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c @@ -24,21 +24,36 @@ #include "head.h" #include <nvif/cl507a.h> +#include <nvif/timer.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_plane_helper.h> +bool +curs507a_space(struct nv50_wndw *wndw) +{ + nvif_msec(&nouveau_drm(wndw->plane.dev)->client.device, 2, + if (nvif_rd32(&wndw->wimm.base.user, 0x0008) >= 4) + return true; + ); + WARN_ON(1); + return false; +} + static void curs507a_update(struct nv50_wndw *wndw, u32 *interlock) { - nvif_wr32(&wndw->wimm.base.user, 0x0080, 0x00000000); + if (curs507a_space(wndw)) + nvif_wr32(&wndw->wimm.base.user, 0x0080, 0x00000000); } static void curs507a_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { - nvif_wr32(&wndw->wimm.base.user, 0x0084, asyw->point.y << 16 | - asyw->point.x); + if (curs507a_space(wndw)) { + nvif_wr32(&wndw->wimm.base.user, 0x0084, asyw->point.y << 16 | + asyw->point.x); + } } const struct nv50_wimm_func diff --git a/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c b/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c index 23fb29d41efe..96dff4f09f57 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c +++ b/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c @@ -25,14 +25,17 @@ static void cursc37a_update(struct nv50_wndw *wndw, u32 *interlock) { - nvif_wr32(&wndw->wimm.base.user, 0x0200, 0x00000001); + if (curs507a_space(wndw)) + nvif_wr32(&wndw->wimm.base.user, 0x0200, 0x00000001); } static void cursc37a_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { - nvif_wr32(&wndw->wimm.base.user, 0x0208, asyw->point.y << 16 | - asyw->point.x); + if (curs507a_space(wndw)) { + nvif_wr32(&wndw->wimm.base.user, 0x0208, asyw->point.y << 16 | + asyw->point.x); + } } static const struct nv50_wimm_func diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 4d1c58468dbc..6be9df1820c5 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -45,6 +45,7 @@ #include <nvif/cl5070.h> #include <nvif/cl507d.h> #include <nvif/event.h> +#include <nvif/timer.h> #include "nouveau_drv.h" #include "nouveau_dma.h" diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c index 2e68fc736fe1..4f7ce57f2036 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c +++ b/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c @@ -24,6 +24,8 @@ #include <nouveau_bo.h> +#include <nvif/timer.h> + static void ovly827e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h index caf397475918..a7412b9d3a98 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h @@ -97,6 +97,7 @@ struct nv50_wimm_func { }; extern const struct nv50_wimm_func curs507a; +bool curs507a_space(struct nv50_wndw *); int wndwc37e_new(struct nouveau_drm *, enum drm_plane_type, int, s32, struct nv50_wndw **); diff --git a/drivers/gpu/drm/nouveau/include/nvif/device.h b/drivers/gpu/drm/nouveau/include/nvif/device.h index 25d969dcf67d..c2a572c67a76 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/device.h +++ b/drivers/gpu/drm/nouveau/include/nvif/device.h @@ -23,27 +23,6 @@ int nvif_device_init(struct nvif_object *, u32 handle, s32 oclass, void *, u32, void nvif_device_fini(struct nvif_device *); u64 nvif_device_time(struct nvif_device *); -/* Delay based on GPU time (ie. PTIMER). - * - * Will return -ETIMEDOUT unless the loop was terminated with 'break', - * where it will return the number of nanoseconds taken instead. - */ -#define nvif_nsec(d,n,cond...) ({ \ - struct nvif_device *_device = (d); \ - u64 _nsecs = (n), _time0 = nvif_device_time(_device); \ - s64 _taken = 0; \ - \ - do { \ - cond \ - } while (_taken = nvif_device_time(_device) - _time0, _taken < _nsecs);\ - \ - if (_taken >= _nsecs) \ - _taken = -ETIMEDOUT; \ - _taken; \ -}) -#define nvif_usec(d,u,cond...) nvif_nsec((d), (u) * 1000, ##cond) -#define nvif_msec(d,m,cond...) nvif_usec((d), (m) * 1000, ##cond) - /*XXX*/ #include <subdev/bios.h> #include <subdev/fb.h> diff --git a/drivers/gpu/drm/nouveau/include/nvif/timer.h b/drivers/gpu/drm/nouveau/include/nvif/timer.h new file mode 100644 index 000000000000..57587a985c4b --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/timer.h @@ -0,0 +1,35 @@ +#ifndef __NVIF_TIMER_H__ +#define __NVIF_TIMER_H__ +#include <nvif/os.h> + +struct nvif_timer_wait { + struct nvif_device *device; + u64 limit; + u64 time0; + u64 time1; + int reads; +}; + +void nvif_timer_wait_init(struct nvif_device *, u64 nsec, + struct nvif_timer_wait *); +s64 nvif_timer_wait_test(struct nvif_timer_wait *); + +/* Delay based on GPU time (ie. PTIMER). + * + * Will return -ETIMEDOUT unless the loop was terminated with 'break', + * where it will return the number of nanoseconds taken instead. + */ +#define nvif_nsec(d,n,cond...) ({ \ + struct nvif_timer_wait _wait; \ + s64 _taken = 0; \ + \ + nvif_timer_wait_init((d), (n), &_wait); \ + do { \ + cond \ + } while ((_taken = nvif_timer_wait_test(&_wait)) >= 0); \ + \ + _taken; \ +}) +#define nvif_usec(d,u,cond...) nvif_nsec((d), (u) * 1000, ##cond) +#define nvif_msec(d,m,cond...) nvif_usec((d), (m) * 1000, ##cond) +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/user.h b/drivers/gpu/drm/nouveau/include/nvif/user.h index 03c11826b693..6825574d93c2 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/user.h +++ b/drivers/gpu/drm/nouveau/include/nvif/user.h @@ -10,6 +10,7 @@ struct nvif_user { struct nvif_user_func { void (*doorbell)(struct nvif_user *, u32 token); + u64 (*time)(struct nvif_user *); }; int nvif_user_init(struct nvif_device *); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2b4b21b02e40..c40f127de3d0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1494,8 +1494,13 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) ret = nvif_object_map_handle(&mem->mem.object, &args, argc, &handle, &length); - if (ret != 1) - return ret ? ret : -EINVAL; + if (ret != 1) { + if (WARN_ON(ret == 0)) + return -EINVAL; + if (ret == -ENOSPC) + return -EAGAIN; + return ret; + } reg->bus.base = 0; reg->bus.offset = handle; diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 7dfbbbc1beea..15a3d40edf02 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -222,22 +222,18 @@ nouveau_drm_debugfs_init(struct drm_minor *minor) { struct nouveau_drm *drm = nouveau_drm(minor->dev); struct dentry *dentry; - int i, ret; + int i; for (i = 0; i < ARRAY_SIZE(nouveau_debugfs_files); i++) { - dentry = debugfs_create_file(nouveau_debugfs_files[i].name, - S_IRUGO | S_IWUSR, - minor->debugfs_root, minor->dev, - nouveau_debugfs_files[i].fops); - if (!dentry) - return -ENOMEM; + debugfs_create_file(nouveau_debugfs_files[i].name, + S_IRUGO | S_IWUSR, + minor->debugfs_root, minor->dev, + nouveau_debugfs_files[i].fops); } - ret = drm_debugfs_create_files(nouveau_debugfs_list, - NOUVEAU_DEBUGFS_ENTRIES, - minor->debugfs_root, minor); - if (ret) - return ret; + drm_debugfs_create_files(nouveau_debugfs_list, + NOUVEAU_DEBUGFS_ENTRIES, + minor->debugfs_root, minor); /* Set the size of the vbios since we know it, and it's confusing to * userspace if it wants to seek() but the file has a length of 0 diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 6b1629c14dd7..ca4087f5a15b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -618,6 +618,64 @@ nouveau_drm_device_fini(struct drm_device *dev) kfree(drm); } +/* + * On some Intel PCIe bridge controllers doing a + * D0 -> D3hot -> D3cold -> D0 sequence causes Nvidia GPUs to not reappear. + * Skipping the intermediate D3hot step seems to make it work again. This is + * probably caused by not meeting the expectation the involved AML code has + * when the GPU is put into D3hot state before invoking it. + * + * This leads to various manifestations of this issue: + * - AML code execution to power on the GPU hits an infinite loop (as the + * code waits on device memory to change). + * - kernel crashes, as all PCI reads return -1, which most code isn't able + * to handle well enough. + * + * In all cases dmesg will contain at least one line like this: + * 'nouveau 0000:01:00.0: Refused to change power state, currently in D3' + * followed by a lot of nouveau timeouts. + * + * In the \_SB.PCI0.PEG0.PG00._OFF code deeper down writes bit 0x80 to the not + * documented PCI config space register 0x248 of the Intel PCIe bridge + * controller (0x1901) in order to change the state of the PCIe link between + * the PCIe port and the GPU. There are alternative code paths using other + * registers, which seem to work fine (executed pre Windows 8): + * - 0xbc bit 0x20 (publicly available documentation claims 'reserved') + * - 0xb0 bit 0x10 (link disable) + * Changing the conditions inside the firmware by poking into the relevant + * addresses does resolve the issue, but it seemed to be ACPI private memory + * and not any device accessible memory at all, so there is no portable way of + * changing the conditions. + * On a XPS 9560 that means bits [0,3] on \CPEX need to be cleared. + * + * The only systems where this behavior can be seen are hybrid graphics laptops + * with a secondary Nvidia Maxwell, Pascal or Turing GPU. It's unclear whether + * this issue only occurs in combination with listed Intel PCIe bridge + * controllers and the mentioned GPUs or other devices as well. + * + * documentation on the PCIe bridge controller can be found in the + * "7th Generation IntelĀ® Processor Families for H Platforms Datasheet Volume 2" + * Section "12 PCI Express* Controller (x16) Registers" + */ + +static void quirk_broken_nv_runpm(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + struct nouveau_drm *drm = nouveau_drm(dev); + struct pci_dev *bridge = pci_upstream_bridge(pdev); + + if (!bridge || bridge->vendor != PCI_VENDOR_ID_INTEL) + return; + + switch (bridge->device) { + case 0x1901: + drm->old_pm_cap = pdev->pm_cap; + pdev->pm_cap = 0; + NV_INFO(drm, "Disabling PCI power management to avoid bug\n"); + break; + } +} + static int nouveau_drm_probe(struct pci_dev *pdev, const struct pci_device_id *pent) { @@ -699,6 +757,7 @@ static int nouveau_drm_probe(struct pci_dev *pdev, if (ret) goto fail_drm_dev_init; + quirk_broken_nv_runpm(pdev); return 0; fail_drm_dev_init: @@ -734,7 +793,11 @@ static void nouveau_drm_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + struct nouveau_drm *drm = nouveau_drm(dev); + /* revert our workaround */ + if (drm->old_pm_cap) + pdev->pm_cap = drm->old_pm_cap; nouveau_drm_device_remove(dev); pci_disable_device(pdev); } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index c2c332fbde97..2a6519737800 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -140,6 +140,8 @@ struct nouveau_drm { struct list_head clients; + u8 old_pm_cap; + struct { struct agp_bridge_data *bridge; u32 base; diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index df9bf1fd1bc0..8dfa5cb74826 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -171,6 +171,11 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, mm = get_task_mm(current); down_read(&mm->mmap_sem); + if (!cli->svm.svmm) { + up_read(&mm->mmap_sem); + return -EINVAL; + } + for (addr = args->va_start, end = args->va_start + size; addr < end;) { struct vm_area_struct *vma; unsigned long next; @@ -179,6 +184,7 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, if (!vma) break; + addr = max(addr, vma->vm_start); next = min(vma->vm_end, end); /* This is a best effort so we ignore errors */ nouveau_dmem_migrate_vma(cli->drm, vma, addr, next); @@ -657,9 +663,6 @@ nouveau_svm_fault(struct nvif_notify *notify) limit = start + (ARRAY_SIZE(args.phys) << PAGE_SHIFT); if (start < svmm->unmanaged.limit) limit = min_t(u64, limit, svmm->unmanaged.start); - else - if (limit > svmm->unmanaged.start) - start = max_t(u64, start, svmm->unmanaged.limit); SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit); mm = svmm->notifier.mm; diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild index 50d583d63807..f194d354c1f5 100644 --- a/drivers/gpu/drm/nouveau/nvif/Kbuild +++ b/drivers/gpu/drm/nouveau/nvif/Kbuild @@ -8,6 +8,7 @@ nvif-y += nvif/fifo.o nvif-y += nvif/mem.o nvif-y += nvif/mmu.o nvif-y += nvif/notify.o +nvif-y += nvif/timer.o nvif-y += nvif/vmm.o # Usermode classes diff --git a/drivers/gpu/drm/nouveau/nvif/device.c b/drivers/gpu/drm/nouveau/nvif/device.c index 1ec101ba3b42..0e92db44bbc8 100644 --- a/drivers/gpu/drm/nouveau/nvif/device.c +++ b/drivers/gpu/drm/nouveau/nvif/device.c @@ -27,11 +27,15 @@ u64 nvif_device_time(struct nvif_device *device) { - struct nv_device_time_v0 args = {}; - int ret = nvif_object_mthd(&device->object, NV_DEVICE_V0_TIME, - &args, sizeof(args)); - WARN_ON_ONCE(ret != 0); - return args.time; + if (!device->user.func) { + struct nv_device_time_v0 args = {}; + int ret = nvif_object_mthd(&device->object, NV_DEVICE_V0_TIME, + &args, sizeof(args)); + WARN_ON_ONCE(ret != 0); + return args.time; + } + + return device->user.func->time(&device->user); } void diff --git a/drivers/gpu/drm/nouveau/nvif/timer.c b/drivers/gpu/drm/nouveau/nvif/timer.c new file mode 100644 index 000000000000..602c1a258d10 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/timer.c @@ -0,0 +1,56 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/timer.h> +#include <nvif/device.h> + +s64 +nvif_timer_wait_test(struct nvif_timer_wait *wait) +{ + u64 time = nvif_device_time(wait->device); + + if (wait->reads == 0) { + wait->time0 = time; + wait->time1 = time; + } + + if (wait->time1 == time) { + if (WARN_ON(wait->reads++ == 16)) + return -ETIMEDOUT; + } else { + wait->time1 = time; + wait->reads = 1; + } + + if (wait->time1 - wait->time0 > wait->limit) + return -ETIMEDOUT; + + return wait->time1 - wait->time0; +} + +void +nvif_timer_wait_init(struct nvif_device *device, u64 nsec, + struct nvif_timer_wait *wait) +{ + wait->device = device; + wait->limit = nsec; + wait->reads = 0; +} diff --git a/drivers/gpu/drm/nouveau/nvif/userc361.c b/drivers/gpu/drm/nouveau/nvif/userc361.c index 19f9958e7e01..1116f871b272 100644 --- a/drivers/gpu/drm/nouveau/nvif/userc361.c +++ b/drivers/gpu/drm/nouveau/nvif/userc361.c @@ -21,6 +21,19 @@ */ #include <nvif/user.h> +static u64 +nvif_userc361_time(struct nvif_user *user) +{ + u32 hi, lo; + + do { + hi = nvif_rd32(&user->object, 0x084); + lo = nvif_rd32(&user->object, 0x080); + } while (hi != nvif_rd32(&user->object, 0x084)); + + return ((u64)hi << 32 | lo); +} + static void nvif_userc361_doorbell(struct nvif_user *user, u32 token) { @@ -30,4 +43,5 @@ nvif_userc361_doorbell(struct nvif_user *user, u32 token) const struct nvif_user_func nvif_userc361 = { .doorbell = nvif_userc361_doorbell, + .time = nvif_userc361_time, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index dd8f85b8b3a7..f2f5636efac4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1981,8 +1981,34 @@ gf100_gr_init_(struct nvkm_gr *base) { struct gf100_gr *gr = gf100_gr(base); struct nvkm_subdev *subdev = &base->engine.subdev; + struct nvkm_device *device = subdev->device; + bool reset = device->chipset == 0x137 || device->chipset == 0x138; u32 ret; + /* On certain GP107/GP108 boards, we trigger a weird issue where + * GR will stop responding to PRI accesses after we've asked the + * SEC2 RTOS to boot the GR falcons. This happens with far more + * frequency when cold-booting a board (ie. returning from D3). + * + * The root cause for this is not known and has proven difficult + * to isolate, with many avenues being dead-ends. + * + * A workaround was discovered by Karol, whereby putting GR into + * reset for an extended period right before initialisation + * prevents the problem from occuring. + * + * XXX: As RM does not require any such workaround, this is more + * of a hack than a true fix. + */ + reset = nvkm_boolopt(device->cfgopt, "NvGrResetWar", reset); + if (reset) { + nvkm_mask(device, 0x000200, 0x00001000, 0x00000000); + nvkm_rd32(device, 0x000200); + msleep(50); + nvkm_mask(device, 0x000200, 0x00001000, 0x00001000); + nvkm_rd32(device, 0x000200); + } + nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false); ret = nvkm_falcon_get(&gr->fecs.falcon, subdev); diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 0ce81b1f36af..3ad828eaefe1 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -361,7 +361,6 @@ static int panel_dpi_probe(struct device *dev, struct panel_desc *desc; unsigned int bus_flags; struct videomode vm; - const char *mapping; int ret; np = dev->of_node; @@ -386,16 +385,6 @@ static int panel_dpi_probe(struct device *dev, of_property_read_u32(np, "width-mm", &desc->size.width); of_property_read_u32(np, "height-mm", &desc->size.height); - of_property_read_string(np, "data-mapping", &mapping); - if (!strcmp(mapping, "rgb24")) - desc->bus_format = MEDIA_BUS_FMT_RGB888_1X24; - else if (!strcmp(mapping, "rgb565")) - desc->bus_format = MEDIA_BUS_FMT_RGB565_1X16; - else if (!strcmp(mapping, "bgr666")) - desc->bus_format = MEDIA_BUS_FMT_RGB666_1X18; - else if (!strcmp(mapping, "lvds666")) - desc->bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI; - /* Extract bus_flags from display_timing */ bus_flags = 0; vm.flags = timing->flags; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 389128b8c4dd..0af14835504c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -156,6 +156,89 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_vm_reserve); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/** + * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults + * @vmf: Fault data + * @bo: The buffer object + * @page_offset: Page offset from bo start + * @fault_page_size: The size of the fault in pages. + * @pgprot: The page protections. + * Does additional checking whether it's possible to insert a PUD or PMD + * pfn and performs the insertion. + * + * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if + * a huge fault was not possible, or on insertion error. + */ +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, + struct ttm_buffer_object *bo, + pgoff_t page_offset, + pgoff_t fault_page_size, + pgprot_t pgprot) +{ + pgoff_t i; + vm_fault_t ret; + unsigned long pfn; + pfn_t pfnt; + struct ttm_tt *ttm = bo->ttm; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + /* Fault should not cross bo boundary. */ + page_offset &= ~(fault_page_size - 1); + if (page_offset + fault_page_size > bo->num_pages) + goto out_fallback; + + if (bo->mem.bus.is_iomem) + pfn = ttm_bo_io_mem_pfn(bo, page_offset); + else + pfn = page_to_pfn(ttm->pages[page_offset]); + + /* pfn must be fault_page_size aligned. */ + if ((pfn & (fault_page_size - 1)) != 0) + goto out_fallback; + + /* Check that memory is contiguous. */ + if (!bo->mem.bus.is_iomem) { + for (i = 1; i < fault_page_size; ++i) { + if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i) + goto out_fallback; + } + } else if (bo->bdev->driver->io_mem_pfn) { + for (i = 1; i < fault_page_size; ++i) { + if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i) + goto out_fallback; + } + } + + pfnt = __pfn_to_pfn_t(pfn, PFN_DEV); + if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT)) + ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write); +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT)) + ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write); +#endif + else + WARN_ON_ONCE(ret = VM_FAULT_FALLBACK); + + if (ret != VM_FAULT_NOPAGE) + goto out_fallback; + + return VM_FAULT_NOPAGE; +out_fallback: + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; +} +#else +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, + struct ttm_buffer_object *bo, + pgoff_t page_offset, + pgoff_t fault_page_size, + pgprot_t pgprot) +{ + return VM_FAULT_FALLBACK; +} +#endif + /** * ttm_bo_vm_fault_reserved - TTM fault helper * @vmf: The struct vm_fault given as argument to the fault callback @@ -163,6 +246,7 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve); * @num_prefault: Maximum number of prefault pages. The caller may want to * specify this based on madvice settings and the size of the GPU object * backed by the memory. + * @fault_page_size: The size of the fault in pages. * * This function inserts one or more page table entries pointing to the * memory backing the buffer object, and then returns a return code @@ -176,7 +260,8 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve); */ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, pgprot_t prot, - pgoff_t num_prefault) + pgoff_t num_prefault, + pgoff_t fault_page_size) { struct vm_area_struct *vma = vmf->vma; struct ttm_buffer_object *bo = vma->vm_private_data; @@ -268,6 +353,13 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, prot = pgprot_decrypted(prot); } + /* We don't prefault on huge faults. Yet. */ + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) { + ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset, + fault_page_size, prot); + goto out_io_unlock; + } + /* * Speculatively prefault a number of pages. Only error on * first page. @@ -334,7 +426,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return ret; prot = vma->vm_page_prot; - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) return ret; @@ -344,6 +436,66 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) } EXPORT_SYMBOL(ttm_bo_vm_fault); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/** + * ttm_pgprot_is_wrprotecting - Is a page protection value write-protecting? + * @prot: The page protection value + * + * Return: true if @prot is write-protecting. false otherwise. + */ +static bool ttm_pgprot_is_wrprotecting(pgprot_t prot) +{ + /* + * This is meant to say "pgprot_wrprotect(prot) == prot" in a generic + * way. Unfortunately there is no generic pgprot_wrprotect. + */ + return pte_val(pte_wrprotect(__pte(pgprot_val(prot)))) == + pgprot_val(prot); +} + +static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf, + enum page_entry_size pe_size) +{ + struct vm_area_struct *vma = vmf->vma; + pgprot_t prot; + struct ttm_buffer_object *bo = vma->vm_private_data; + vm_fault_t ret; + pgoff_t fault_page_size = 0; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + switch (pe_size) { + case PE_SIZE_PMD: + fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT; + break; +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + case PE_SIZE_PUD: + fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT; + break; +#endif + default: + WARN_ON_ONCE(1); + return VM_FAULT_FALLBACK; + } + + /* Fallback on write dirty-tracking or COW */ + if (write && ttm_pgprot_is_wrprotecting(vma->vm_page_prot)) + return VM_FAULT_FALLBACK; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + prot = vm_get_page_prot(vma->vm_flags); + ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + dma_resv_unlock(bo->base.resv); + + return ret; +} +#endif + void ttm_bo_vm_open(struct vm_area_struct *vma) { struct ttm_buffer_object *bo = vma->vm_private_data; @@ -445,7 +597,10 @@ static const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, - .access = ttm_bo_vm_access + .access = ttm_bo_vm_access, +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + .huge_fault = ttm_bo_vm_huge_fault, +#endif }; static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c index 8512d970a09f..ac8f75db2ecd 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.c +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c @@ -41,6 +41,10 @@ static int vbox_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!vbox_check_supported(VBE_DISPI_ID_HGSMI)) return -ENODEV; + ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "vboxvideodrmfb"); + if (ret) + return ret; + vbox = kzalloc(sizeof(*vbox), GFP_KERNEL); if (!vbox) return -ENOMEM; diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index cea18dc15f77..340719238753 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -681,11 +681,23 @@ static enum drm_mode_status vc4_hdmi_encoder_mode_valid(struct drm_encoder *crtc, const struct drm_display_mode *mode) { - /* HSM clock must be 108% of the pixel clock. Additionally, - * the AXI clock needs to be at least 25% of pixel clock, but - * HSM ends up being the limiting factor. + /* + * As stated in RPi's vc4 firmware "HDMI state machine (HSM) clock must + * be faster than pixel clock, infinitesimally faster, tested in + * simulation. Otherwise, exact value is unimportant for HDMI + * operation." This conflicts with bcm2835's vc4 documentation, which + * states HSM's clock has to be at least 108% of the pixel clock. + * + * Real life tests reveal that vc4's firmware statement holds up, and + * users are able to use pixel clocks closer to HSM's, namely for + * 1920x1200@60Hz. So it was decided to have leave a 1% margin between + * both clocks. Which, for RPi0-3 implies a maximum pixel clock of + * 162MHz. + * + * Additionally, the AXI clock needs to be at least 25% of + * pixel clock, but HSM ends up being the limiting factor. */ - if (mode->clock > HSM_CLOCK_FREQ / (1000 * 108 / 100)) + if (mode->clock > HSM_CLOCK_FREQ / (1000 * 101 / 100)) return MODE_CLOCK_HIGH; return MODE_OK; diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index 5c3515e8cce1..31f85f09f1fc 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -11,4 +11,5 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \ ttm_object.o ttm_lock.o +vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 71e45b568511..c2247a893ed4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1247,6 +1247,18 @@ static void vmw_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static unsigned long +vmw_get_unmapped_area(struct file *file, unsigned long uaddr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct drm_file *file_priv = file->private_data; + struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); + + return drm_get_unmapped_area(file, uaddr, len, pgoff, flags, + &dev_priv->vma_manager); +} + static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, void *ptr) { @@ -1418,6 +1430,7 @@ static const struct file_operations vmwgfx_driver_fops = { .compat_ioctl = vmw_compat_ioctl, #endif .llseek = noop_llseek, + .get_unmapped_area = vmw_get_unmapped_area, }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 5ddbcb9f6df4..8cdcd6e5f9e1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1000,6 +1000,7 @@ extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma); extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv, size_t gran); + /** * TTM buffer object driver - vmwgfx_ttm_buffer.c */ @@ -1510,6 +1511,17 @@ void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, pgoff_t start, pgoff_t end); vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, + enum page_entry_size pe_size); +#endif + +/* Transparent hugepage support - vmwgfx_thp.c */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern const struct ttm_mem_type_manager_func vmw_thp_func; +#else +#define vmw_thp_func ttm_bo_manager_func +#endif /** * VMW_DEBUG_KMS - Debug output for kernel mode-setting diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 60cfbfadd3f2..d4d66532f9c9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -473,11 +473,11 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf) * a lot of unnecessary write faults. */ if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE) - prot = vma->vm_page_prot; + prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED); else prot = vm_get_page_prot(vma->vm_flags); - ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault); + ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1); if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) return ret; @@ -486,3 +486,75 @@ out_unlock: return ret; } + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, + enum page_entry_size pe_size) +{ + struct vm_area_struct *vma = vmf->vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct vmw_buffer_object *vbo = + container_of(bo, struct vmw_buffer_object, base); + pgprot_t prot; + vm_fault_t ret; + pgoff_t fault_page_size; + bool write = vmf->flags & FAULT_FLAG_WRITE; + bool is_cow_mapping = + (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + + switch (pe_size) { + case PE_SIZE_PMD: + fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT; + break; +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + case PE_SIZE_PUD: + fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT; + break; +#endif + default: + WARN_ON_ONCE(1); + return VM_FAULT_FALLBACK; + } + + /* Always do write dirty-tracking and COW on PTE level. */ + if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping)) + return VM_FAULT_FALLBACK; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + if (vbo->dirty) { + pgoff_t allowed_prefault; + unsigned long page_offset; + + page_offset = vmf->pgoff - + drm_vma_node_start(&bo->base.vma_node); + if (page_offset >= bo->num_pages || + vmw_resources_clean(vbo, page_offset, + page_offset + PAGE_SIZE, + &allowed_prefault)) { + ret = VM_FAULT_SIGBUS; + goto out_unlock; + } + + /* + * Write protect, so we get a new fault on write, and can + * split. + */ + prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED); + } else { + prot = vm_get_page_prot(vma->vm_flags); + } + + ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + +out_unlock: + dma_resv_unlock(bo->base.resv); + + return ret; +} +#endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c new file mode 100644 index 000000000000..b7c816ba7166 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Huge page-table-entry support for IO memory. + * + * Copyright (C) 2007-2019 Vmware, Inc. All rights reservedd. + */ +#include "vmwgfx_drv.h" +#include <drm/ttm/ttm_module.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> + +/** + * struct vmw_thp_manager - Range manager implementing huge page alignment + * + * @mm: The underlying range manager. Protected by @lock. + * @lock: Manager lock. + */ +struct vmw_thp_manager { + struct drm_mm mm; + spinlock_t lock; +}; + +static int vmw_thp_insert_aligned(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long align_pages, + const struct ttm_place *place, + struct ttm_mem_reg *mem, + unsigned long lpfn, + enum drm_mm_insert_mode mode) +{ + if (align_pages >= mem->page_alignment && + (!mem->page_alignment || align_pages % mem->page_alignment == 0)) { + return drm_mm_insert_node_in_range(mm, node, + mem->num_pages, + align_pages, 0, + place->fpfn, lpfn, mode); + } + + return -ENOSPC; +} + +static int vmw_thp_get_node(struct ttm_mem_type_manager *man, + struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_mem_reg *mem) +{ + struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv; + struct drm_mm *mm = &rman->mm; + struct drm_mm_node *node; + unsigned long align_pages; + unsigned long lpfn; + enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST; + int ret; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + lpfn = place->lpfn; + if (!lpfn) + lpfn = man->size; + + mode = DRM_MM_INSERT_BEST; + if (place->flags & TTM_PL_FLAG_TOPDOWN) + mode = DRM_MM_INSERT_HIGH; + + spin_lock(&rman->lock); + if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) { + align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT); + if (mem->num_pages >= align_pages) { + ret = vmw_thp_insert_aligned(mm, node, align_pages, + place, mem, lpfn, mode); + if (!ret) + goto found_unlock; + } + } + + align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT); + if (mem->num_pages >= align_pages) { + ret = vmw_thp_insert_aligned(mm, node, align_pages, place, mem, + lpfn, mode); + if (!ret) + goto found_unlock; + } + + ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages, + mem->page_alignment, 0, + place->fpfn, lpfn, mode); +found_unlock: + spin_unlock(&rman->lock); + + if (unlikely(ret)) { + kfree(node); + } else { + mem->mm_node = node; + mem->start = node->start; + } + + return 0; +} + + + +static void vmw_thp_put_node(struct ttm_mem_type_manager *man, + struct ttm_mem_reg *mem) +{ + struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv; + + if (mem->mm_node) { + spin_lock(&rman->lock); + drm_mm_remove_node(mem->mm_node); + spin_unlock(&rman->lock); + + kfree(mem->mm_node); + mem->mm_node = NULL; + } +} + +static int vmw_thp_init(struct ttm_mem_type_manager *man, + unsigned long p_size) +{ + struct vmw_thp_manager *rman; + + rman = kzalloc(sizeof(*rman), GFP_KERNEL); + if (!rman) + return -ENOMEM; + + drm_mm_init(&rman->mm, 0, p_size); + spin_lock_init(&rman->lock); + man->priv = rman; + return 0; +} + +static int vmw_thp_takedown(struct ttm_mem_type_manager *man) +{ + struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv; + struct drm_mm *mm = &rman->mm; + + spin_lock(&rman->lock); + if (drm_mm_clean(mm)) { + drm_mm_takedown(mm); + spin_unlock(&rman->lock); + kfree(rman); + man->priv = NULL; + return 0; + } + spin_unlock(&rman->lock); + return -EBUSY; +} + +static void vmw_thp_debug(struct ttm_mem_type_manager *man, + struct drm_printer *printer) +{ + struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv; + + spin_lock(&rman->lock); + drm_mm_print(&rman->mm, printer); + spin_unlock(&rman->lock); +} + +const struct ttm_mem_type_manager_func vmw_thp_func = { + .init = vmw_thp_init, + .takedown = vmw_thp_takedown, + .get_node = vmw_thp_get_node, + .put_node = vmw_thp_put_node, + .debug = vmw_thp_debug +}; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 3f3b2c7a208a..bf0bc4697959 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -749,7 +749,7 @@ static int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_VRAM: /* "On-card" video ram */ - man->func = &ttm_bo_manager_func; + man->func = &vmw_thp_func; man->gpu_offset = 0; man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; man->available_caching = TTM_PL_FLAG_CACHED; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c index aa7e50f63b94..3c03b1746661 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c @@ -34,7 +34,10 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma) .page_mkwrite = vmw_bo_vm_mkwrite, .fault = vmw_bo_vm_fault, .open = ttm_bo_vm_open, - .close = ttm_bo_vm_close + .close = ttm_bo_vm_close, +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + .huge_fault = vmw_bo_vm_huge_fault, +#endif }; struct drm_file *file_priv = filp->private_data; struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 4be49c1aef51..374142018171 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -401,7 +401,7 @@ static int xen_drm_drv_dumb_create(struct drm_file *filp, obj = xen_drm_front_gem_create(dev, args->size); if (IS_ERR_OR_NULL(obj)) { - ret = PTR_ERR(obj); + ret = PTR_ERR_OR_ZERO(obj); goto fail; } |