diff options
68 files changed, 783 insertions, 460 deletions
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 3c2cb5d5adfa..0bb0e9c6376c 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -411,3 +411,4 @@ 394 common pkey_mprotect sys_pkey_mprotect 395 common pkey_alloc sys_pkey_alloc 396 common pkey_free sys_pkey_free +397 common statx sys_statx diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8c7c244247b6..3741859765cf 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1073,6 +1073,10 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC +config KEYS_COMPAT + def_bool y + depends on COMPAT && KEYS + endmenu menu "Power management options" diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 05310ad8c5ab..f31c48d0cd68 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void) static inline bool system_uses_ttbr0_pan(void) { return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && - !cpus_have_cap(ARM64_HAS_PAN); + !cpus_have_const_cap(ARM64_HAS_PAN); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 75a0f8acef66..fd691087dc9a 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu) } /** - * cpu_suspend() - function to enter a low-power idle state + * arm_cpuidle_suspend() - function to enter a low-power idle state * @arg: argument to pass to CPU suspend operations * * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 2a07aae5b8a2..c5c45942fb6e 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) return 0; } -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return NOTIFY_DONE; -} - static void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur_kprobe; diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 55d1e9205543..687a358a3733 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -162,7 +162,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); vmemmap_populate(kimg_shadow_start, kimg_shadow_end, - pfn_to_nid(virt_to_pfn(_text))); + pfn_to_nid(virt_to_pfn(lm_alias(_text)))); /* * vmemmap_populate() has populated the shadow region that covers the diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 72277b1028a5..50d35e3185f5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ - defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -#endif static inline void pud_clear(pud_t *pudp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cfb36b8c024..585ee0d42d18 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif -#ifndef __PAGETABLE_PMD_FOLDED +#ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif diff --git a/drivers/base/core.c b/drivers/base/core.c index 684bda4d14a1..6bb60fb6a30b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void) return restart_syscall(); } -void assert_held_device_hotplug(void) -{ - lockdep_assert_held(&device_hotplug_lock); -} - #ifdef CONFIG_BLOCK static inline int device_is_not_partition(struct device *dev) { diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 745844ee973e..d4ca9962a759 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -10,7 +10,6 @@ #include <linux/io.h> #include <linux/platform_device.h> #include <linux/atmel_tc.h> -#include <linux/sched_clock.h> /* @@ -57,14 +56,9 @@ static u64 tc_get_cycles(struct clocksource *cs) return (upper << 16) | lower; } -static u32 tc_get_cv32(void) -{ - return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); -} - static u64 tc_get_cycles32(struct clocksource *cs) { - return tc_get_cv32(); + return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); } static struct clocksource clksrc = { @@ -75,11 +69,6 @@ static struct clocksource clksrc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static u64 notrace tc_read_sched_clock(void) -{ - return tc_get_cv32(); -} - #ifdef CONFIG_GENERIC_CLOCKEVENTS struct tc_clkevt_device { @@ -350,9 +339,6 @@ static int __init tcb_clksrc_init(void) clksrc.read = tc_get_cycles32; /* setup ony channel 0 */ tcb_setup_single_chan(tc, best_divisor_idx); - - /* register sched_clock on chips with single 32 bit counter */ - sched_clock_register(tc_read_sched_clock, 32, divided_rate); } else { /* tclib will give us three clocks no matter what the * underlying platform supports. diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile index 8363cb57915b..8a08e81ee90d 100644 --- a/drivers/gpu/drm/amd/acp/Makefile +++ b/drivers/gpu/drm/amd/acp/Makefile @@ -3,6 +3,4 @@ # of AMDSOC/AMDGPU drm driver. # It provides the HW control for ACP related functionalities. -subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include - AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d2d0f60ff36d..99424cb8020b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -240,6 +240,8 @@ free_partial_kdata: for (; i >= 0; i--) drm_free_large(p->chunks[i].kdata); kfree(p->chunks); + p->chunks = NULL; + p->nchunks = 0; put_ctx: amdgpu_ctx_put(p->ctx); free_chunk: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4120b351a8e5..a3a105ec99e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2590,7 +2590,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, use_bank = 0; } - *pos &= 0x3FFFF; + *pos &= (1UL << 22) - 1; if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || @@ -2666,7 +2666,7 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, use_bank = 0; } - *pos &= 0x3FFFF; + *pos &= (1UL << 22) - 1; if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index f55e45b52fbc..33b504bafb88 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3464,6 +3464,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6667)) { max_sclk = 75000; } + } else if (adev->asic_type == CHIP_OLAND) { + if ((adev->pdev->device == 0x6604) && + (adev->pdev->subsystem_vendor == 0x1028) && + (adev->pdev->subsystem_device == 0x066F)) { + max_sclk = 75000; + } } if (rps->vce_active) { diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 50bdb24ef8d6..4a785d6acfb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1051,7 +1051,7 @@ static int vi_common_early_init(void *handle) /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; if (adev->rev_id != 0x00) { - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | AMD_PG_SUPPORT_CP | diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c index 8cf71f3c6d0e..261b828ad590 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c @@ -178,7 +178,7 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) if (bgate) { cgs_set_powergating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + AMD_PG_STATE_GATE); cgs_set_clockgating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_GATE); diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 08e6a71f5d05..294b53697334 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -63,8 +63,7 @@ static void malidp_crtc_enable(struct drm_crtc *crtc) clk_prepare_enable(hwdev->pxlclk); - /* mclk needs to be set to the same or higher rate than pxlclk */ - clk_set_rate(hwdev->mclk, crtc->state->adjusted_mode.crtc_clock * 1000); + /* We rely on firmware to set mclk to a sensible level. */ clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000); hwdev->modeset(hwdev, &vm); diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c index 488aedf5b58d..9f5513006eee 100644 --- a/drivers/gpu/drm/arm/malidp_hw.c +++ b/drivers/gpu/drm/arm/malidp_hw.c @@ -83,7 +83,7 @@ static const struct malidp_layer malidp550_layers[] = { { DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE, MALIDP_DE_LV_STRIDE0 }, { DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE, MALIDP_DE_LG_STRIDE }, { DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE, MALIDP_DE_LV_STRIDE0 }, - { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, 0 }, + { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE }, }; #define MALIDP_DE_DEFAULT_PREFETCH_START 5 diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 414aada10fe5..d5aec082294c 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -37,6 +37,8 @@ #define LAYER_V_VAL(x) (((x) & 0x1fff) << 16) #define MALIDP_LAYER_COMP_SIZE 0x010 #define MALIDP_LAYER_OFFSET 0x014 +#define MALIDP550_LS_ENABLE 0x01c +#define MALIDP550_LS_R1_IN_SIZE 0x020 /* * This 4-entry look-up-table is used to determine the full 8-bit alpha value @@ -242,6 +244,11 @@ static void malidp_de_plane_update(struct drm_plane *plane, LAYER_V_VAL(plane->state->crtc_y), mp->layer->base + MALIDP_LAYER_OFFSET); + if (mp->layer->id == DE_SMART) + malidp_hw_write(mp->hwdev, + LAYER_H_VAL(src_w) | LAYER_V_VAL(src_h), + mp->layer->base + MALIDP550_LS_R1_IN_SIZE); + /* first clear the rotation bits */ val = malidp_hw_read(mp->hwdev, mp->layer->base + MALIDP_LAYER_CONTROL); val &= ~LAYER_ROT_MASK; @@ -330,9 +337,16 @@ int malidp_de_planes_init(struct drm_device *drm) plane->hwdev = malidp->dev; plane->layer = &map->layers[i]; - /* Skip the features which the SMART layer doesn't have */ - if (id == DE_SMART) + if (id == DE_SMART) { + /* + * Enable the first rectangle in the SMART layer to be + * able to use it as a drm plane. + */ + malidp_hw_write(malidp->dev, 1, + plane->layer->base + MALIDP550_LS_ENABLE); + /* Skip the features which the SMART layer doesn't have. */ continue; + } drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags); malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT, diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h index aff6d4a84e99..b816067a65c5 100644 --- a/drivers/gpu/drm/arm/malidp_regs.h +++ b/drivers/gpu/drm/arm/malidp_regs.h @@ -84,6 +84,7 @@ /* Stride register offsets relative to Lx_BASE */ #define MALIDP_DE_LG_STRIDE 0x18 #define MALIDP_DE_LV_STRIDE0 0x18 +#define MALIDP550_DE_LS_R1_STRIDE 0x28 /* macros to set values into registers */ #define MALIDP_DE_H_FRONTPORCH(x) (((x) & 0xfff) << 0) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a4b42d31391..7febe6eecf72 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -293,6 +293,7 @@ enum plane_id { PLANE_PRIMARY, PLANE_SPRITE0, PLANE_SPRITE1, + PLANE_SPRITE2, PLANE_CURSOR, I915_MAX_PLANES, }; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6908123162d1..10777da73039 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1434,6 +1434,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pwrite(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pwrite) + ret = obj->ops->pwrite(obj, args); + if (ret != -ENODEV) + goto err; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, @@ -2119,6 +2125,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) */ shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); obj->mm.madv = __I915_MADV_PURGED; + obj->mm.pages = ERR_PTR(-EFAULT); } /* Try to discard unwanted pages */ @@ -2218,7 +2225,9 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_reset_page_iter(obj); - obj->ops->put_pages(obj, pages); + if (!IS_ERR(pages)) + obj->ops->put_pages(obj, pages); + unlock: mutex_unlock(&obj->mm.lock); } @@ -2437,7 +2446,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (err) return err; - if (unlikely(!obj->mm.pages)) { + if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { err = ____i915_gem_object_get_pages(obj); if (err) goto unlock; @@ -2515,7 +2524,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, pinned = true; if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { - if (unlikely(!obj->mm.pages)) { + if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { ret = ____i915_gem_object_get_pages(obj); if (ret) goto err_unlock; @@ -2563,6 +2572,75 @@ err_unlock: goto out_unlock; } +static int +i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *arg) +{ + struct address_space *mapping = obj->base.filp->f_mapping; + char __user *user_data = u64_to_user_ptr(arg->data_ptr); + u64 remain, offset; + unsigned int pg; + + /* Before we instantiate/pin the backing store for our use, we + * can prepopulate the shmemfs filp efficiently using a write into + * the pagecache. We avoid the penalty of instantiating all the + * pages, important if the user is just writing to a few and never + * uses the object on the GPU, and using a direct write into shmemfs + * allows it to avoid the cost of retrieving a page (either swapin + * or clearing-before-use) before it is overwritten. + */ + if (READ_ONCE(obj->mm.pages)) + return -ENODEV; + + /* Before the pages are instantiated the object is treated as being + * in the CPU domain. The pages will be clflushed as required before + * use, and we can freely write into the pages directly. If userspace + * races pwrite with any other operation; corruption will ensue - + * that is userspace's prerogative! + */ + + remain = arg->size; + offset = arg->offset; + pg = offset_in_page(offset); + + do { + unsigned int len, unwritten; + struct page *page; + void *data, *vaddr; + int err; + + len = PAGE_SIZE - pg; + if (len > remain) + len = remain; + + err = pagecache_write_begin(obj->base.filp, mapping, + offset, len, 0, + &page, &data); + if (err < 0) + return err; + + vaddr = kmap(page); + unwritten = copy_from_user(vaddr + pg, user_data, len); + kunmap(page); + + err = pagecache_write_end(obj->base.filp, mapping, + offset, len, len - unwritten, + page, data); + if (err < 0) + return err; + + if (unwritten) + return -EFAULT; + + remain -= len; + user_data += len; + offset += len; + pg = 0; + } while (remain); + + return 0; +} + static bool ban_context(const struct i915_gem_context *ctx) { return (i915_gem_context_is_bannable(ctx) && @@ -3029,6 +3107,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); if (args->timeout_ns < 0) args->timeout_ns = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regression from the timespec->ktime conversion. + */ + if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) + args->timeout_ns = 0; } i915_gem_object_put(obj); @@ -3974,8 +4062,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = i915_gem_object_get_pages_gtt, .put_pages = i915_gem_object_put_pages_gtt, + + .pwrite = i915_gem_object_pwrite_gtt, }; struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index c181b1bb3d2c..3be2503aa042 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -293,12 +293,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * those as well to make room for our guard pages. */ if (check_color) { - if (vma->node.start + vma->node.size == node->start) { - if (vma->node.color == node->color) + if (node->start + node->size == target->start) { + if (node->color == target->color) continue; } - if (vma->node.start == node->start + node->size) { - if (vma->node.color == node->color) + if (node->start == target->start + target->size) { + if (node->color == target->color) continue; } } diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index bf90b07163d1..76b80a0be797 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -54,6 +54,9 @@ struct drm_i915_gem_object_ops { struct sg_table *(*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); + int (*pwrite)(struct drm_i915_gem_object *, + const struct drm_i915_gem_pwrite *); + int (*dmabuf_export)(struct drm_i915_gem_object *); void (*release)(struct drm_i915_gem_object *); }; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 155906e84812..df20e9bc1c0f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -512,10 +512,36 @@ err_unpin: return ret; } +static void +i915_vma_remove(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. + */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); +} + int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - unsigned int bound = vma->flags; + const unsigned int bound = vma->flags; int ret; lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); @@ -524,18 +550,18 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { ret = -EBUSY; - goto err; + goto err_unpin; } if ((bound & I915_VMA_BIND_MASK) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - goto err; + goto err_unpin; } ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - goto err; + goto err_remove; if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); @@ -544,7 +570,12 @@ int __i915_vma_do_pin(struct i915_vma *vma, GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; -err: +err_remove: + if ((bound & I915_VMA_BIND_MASK) == 0) { + GEM_BUG_ON(vma->pages); + i915_vma_remove(vma); + } +err_unpin: __i915_vma_unpin(vma); return ret; } @@ -657,9 +688,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (vma->pages != obj->mm.pages) { GEM_BUG_ON(!vma->pages); sg_free_table(vma->pages); @@ -667,18 +695,7 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->pages = NULL; - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + i915_vma_remove(vma); destroy: if (unlikely(i915_vma_is_closed(vma))) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 01341670738f..3282b0f4b134 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3669,10 +3669,6 @@ static void intel_update_pipe_config(struct intel_crtc *crtc, /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ crtc->base.mode = crtc->base.state->mode; - DRM_DEBUG_KMS("Updating pipe size %ix%i -> %ix%i\n", - old_crtc_state->pipe_src_w, old_crtc_state->pipe_src_h, - pipe_config->pipe_src_w, pipe_config->pipe_src_h); - /* * Update pipe size and adjust fitter if needed: the reason for this is * that in compute_mode_changes we check the native mode (not the pfit @@ -4796,23 +4792,17 @@ static void skylake_pfit_enable(struct intel_crtc *crtc) struct intel_crtc_scaler_state *scaler_state = &crtc->config->scaler_state; - DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config); - if (crtc->config->pch_pfit.enabled) { int id; - if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) { - DRM_ERROR("Requesting pfit without getting a scaler first\n"); + if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) return; - } id = scaler_state->scaler_id; I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN | PS_FILTER_MEDIUM | scaler_state->scalers[id].mode); I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos); I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size); - - DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id); } } @@ -14379,6 +14369,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state, } while (progress); } +static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) +{ + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + +static void intel_atomic_helper_free_state_worker(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + + intel_atomic_helper_free_state(dev_priv); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -14545,6 +14553,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * can happen also when the device is completely off. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + + intel_atomic_helper_free_state(dev_priv); } static void intel_atomic_commit_work(struct work_struct *work) @@ -14946,17 +14956,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, to_intel_atomic_state(old_crtc_state->state); bool modeset = needs_modeset(crtc->state); + if (!modeset && + (intel_cstate->base.color_mgmt_changed || + intel_cstate->update_pipe)) { + intel_color_set_csc(crtc->state); + intel_color_load_luts(crtc->state); + } + /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_crtc); if (modeset) goto out; - if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) { - intel_color_set_csc(crtc->state); - intel_color_load_luts(crtc->state); - } - if (intel_cstate->update_pipe) intel_update_pipe_config(intel_crtc, old_intel_cstate); else if (INTEL_GEN(dev_priv) >= 9) @@ -16599,18 +16611,6 @@ fail: drm_modeset_acquire_fini(&ctx); } -static void intel_atomic_helper_free_state(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), atomic_helper.free_work); - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16631,7 +16631,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state); + intel_atomic_helper_free_state_worker); intel_init_quirks(dev); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 1b8ba2e77539..2d449fb5d1d2 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -357,14 +357,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, mask; + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); int i, j; bool *save_enabled; bool fallback = true; int num_connectors_enabled = 0; int num_connectors_detected = 0; - int pass = 0; save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); if (!save_enabled) @@ -374,6 +373,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, mask = BIT(count) - 1; conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -387,7 +387,7 @@ retry: if (conn_configured & BIT(i)) continue; - if (pass == 0 && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -498,10 +498,8 @@ retry: conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask) { - pass++; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249623d45be0..940bab22d464 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4891,6 +4891,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) break; } + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(dev_priv)) + goto skip_hw_write; + I915_WRITE(GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(dev_priv, ei_up)); I915_WRITE(GEN6_RP_UP_THRESHOLD, @@ -4911,6 +4917,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); +skip_hw_write: dev_priv->rps.power = new_power; dev_priv->rps.up_threshold = threshold_up; dev_priv->rps.down_threshold = threshold_down; @@ -7916,10 +7923,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, * @timeout_base_ms: timeout for polling with preemption enabled * * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE - * reports an error or an overall timeout of @timeout_base_ms+10 ms expires. + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. * The request is acknowledged once the PCODE reply dword equals @reply after * applying @reply_mask. Polling is first attempted with preemption enabled - * for @timeout_base_ms and if this times out for another 10 ms with + * for @timeout_base_ms and if this times out for another 50 ms with * preemption disabled. * * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some @@ -7955,14 +7962,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, * worst case) _and_ PCODE was busy for some reason even after a * (queued) request and @timeout_base_ms delay. As a workaround retry * the poll with preemption disabled to maximize the number of - * requests. Increase the timeout from @timeout_base_ms to 10ms to + * requests. Increase the timeout from @timeout_base_ms to 50ms to * account for interrupts that could reduce the number of these - * requests. + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. */ DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); - ret = wait_for_atomic(COND, 10); + ret = wait_for_atomic(COND, 50); preempt_enable(); out: diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9ef54688872a..9481ca9a3ae7 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -254,9 +254,6 @@ skl_update_plane(struct drm_plane *drm_plane, int scaler_id = plane_state->scaler_id; const struct intel_scaler *scaler; - DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n", - plane_id, PS_PLANE_SEL(plane_id)); - scaler = &crtc_state->scaler_state.scalers[scaler_id]; I915_WRITE(SKL_PS_CTRL(pipe, scaler_id), diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe08885a5ba..b7ff592b14f5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -119,6 +119,8 @@ fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma for_each_fw_domain_masked(d, fw_domains, dev_priv) fw_domain_wait_ack(d); + + dev_priv->uncore.fw_domains_active |= fw_domains; } static void @@ -130,6 +132,8 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma fw_domain_put(d); fw_domain_posting_read(d); } + + dev_priv->uncore.fw_domains_active &= ~fw_domains; } static void @@ -240,10 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) if (WARN_ON(domain->wake_count == 0)) domain->wake_count++; - if (--domain->wake_count == 0) { + if (--domain->wake_count == 0) dev_priv->uncore.funcs.force_wake_put(dev_priv, domain->mask); - dev_priv->uncore.fw_domains_active &= ~domain->mask; - } spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); @@ -454,10 +456,8 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, fw_domains &= ~domain->mask; } - if (fw_domains) { + if (fw_domains) dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); - dev_priv->uncore.fw_domains_active |= fw_domains; - } } /** @@ -968,7 +968,6 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv, fw_domain_arm_timer(domain); dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); - dev_priv->uncore.fw_domains_active |= fw_domains; } static inline void __force_wake_auto(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c index af267c35d813..ee5883f59be5 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c +++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c @@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer, struct drm_gem_object *obj = buffer->priv; int ret = 0; - if (WARN_ON(!obj->filp)) - return -EINVAL; - ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index d12b8978142f..72e1588580a1 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2984,6 +2984,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6667)) { max_sclk = 75000; } + } else if (rdev->family == CHIP_OLAND) { + if ((rdev->pdev->device == 0x6604) && + (rdev->pdev->subsystem_vendor == 0x1028) && + (rdev->pdev->subsystem_device == 0x066F)) { + max_sclk = 75000; + } } if (rps->vce_active) { diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index f80bf9385e41..d745e8b50fb8 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -464,6 +464,7 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); + unsigned long flags; WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); mutex_lock(&tilcdc_crtc->enable_lock); @@ -484,7 +485,17 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc) tilcdc_write_mask(dev, LCDC_RASTER_CTRL_REG, LCDC_PALETTE_LOAD_MODE(DATA_ONLY), LCDC_PALETTE_LOAD_MODE_MASK); + + /* There is no real chance for a race here as the time stamp + * is taken before the raster DMA is started. The spin-lock is + * taken to have a memory barrier after taking the time-stamp + * and to avoid a context switch between taking the stamp and + * enabling the raster. + */ + spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + tilcdc_crtc->last_vblank = ktime_get(); tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE); + spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); drm_crtc_vblank_on(crtc); @@ -539,7 +550,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown) } drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq); - tilcdc_crtc->last_vblank = 0; tilcdc_crtc->enabled = false; mutex_unlock(&tilcdc_crtc->enable_lock); @@ -602,7 +612,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc, { struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); struct drm_device *dev = crtc->dev; - unsigned long flags; WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); @@ -614,28 +623,30 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc, drm_framebuffer_reference(fb); crtc->primary->fb = fb; + tilcdc_crtc->event = event; - spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + mutex_lock(&tilcdc_crtc->enable_lock); - if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) { + if (tilcdc_crtc->enabled) { + unsigned long flags; ktime_t next_vblank; s64 tdiff; - next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, - 1000000 / crtc->hwmode.vrefresh); + spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, + 1000000 / crtc->hwmode.vrefresh); tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get())); if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US) tilcdc_crtc->next_fb = fb; - } - - if (tilcdc_crtc->next_fb != fb) - set_scanout(crtc, fb); + else + set_scanout(crtc, fb); - tilcdc_crtc->event = event; + spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); + } - spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); + mutex_unlock(&tilcdc_crtc->enable_lock); return 0; } @@ -1036,5 +1047,5 @@ int tilcdc_crtc_create(struct drm_device *dev) fail: tilcdc_crtc_destroy(crtc); - return -ENOMEM; + return ret; } diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 2b13117fb918..321ecac23027 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) bm_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); if (ret == -EAGAIN) { - memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE); s = read_resync_info(mddev, bm_lockres); if (s) { pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n", @@ -974,6 +973,7 @@ static int leave(struct mddev *mddev) lockres_free(cinfo->bitmap_lockres); unlock_all_bitmaps(mddev); dlm_release_lockspace(cinfo->lockspace, 2); + kfree(cinfo); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 548d1b8014f8..f6ae1d67bcd0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) } EXPORT_SYMBOL(md_flush_request); -void md_unplug(struct blk_plug_cb *cb, bool from_schedule) -{ - struct mddev *mddev = cb->data; - md_wakeup_thread(mddev->thread); - kfree(cb); -} -EXPORT_SYMBOL(md_unplug); - static inline struct mddev *mddev_get(struct mddev *mddev) { atomic_inc(&mddev->active); @@ -1887,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); - sb->super_offset = rdev->sb_start; + sb->super_offset = cpu_to_le64(rdev->sb_start); sb->sb_csum = calc_sb_1_csum(sb); do { md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, @@ -2295,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev) /* Check if any mddev parameters have changed */ if ((mddev->dev_sectors != le64_to_cpu(sb->size)) || (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) || - (mddev->layout != le64_to_cpu(sb->layout)) || + (mddev->layout != le32_to_cpu(sb->layout)) || (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) || (mddev->chunk_sectors != le32_to_cpu(sb->chunksize))) return true; @@ -6458,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->layout = info->layout; mddev->chunk_sectors = info->chunk_size >> 9; - mddev->max_disks = MD_SB_DISKS; - if (mddev->persistent) { - mddev->flags = 0; - mddev->sb_flags = 0; + mddev->max_disks = MD_SB_DISKS; + mddev->flags = 0; + mddev->sb_flags = 0; } set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); @@ -6533,8 +6524,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) return -ENOSPC; } rv = mddev->pers->resize(mddev, num_sectors); - if (!rv) - revalidate_disk(mddev->gendisk); + if (!rv) { + if (mddev->queue) { + set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); + } + } return rv; } diff --git a/drivers/md/md.h b/drivers/md/md.h index b8859cbf84b6..dde8ecb760c8 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev); extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, struct mddev *mddev); -extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force); extern void md_kick_rdev_from_array(struct md_rdev * rdev); struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); -static inline int mddev_check_plugged(struct mddev *mddev) -{ - return !!blk_check_plugged(md_unplug, mddev, - sizeof(struct blk_plug_cb)); -} static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fbc2d7851b49..a34f58772022 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf) static void freeze_array(struct r1conf *conf, int extra) { /* Stop sync I/O and normal I/O and wait for everything to - * go quite. + * go quiet. * This is called in two situations: * 1) management command handlers (reshape, remove disk, quiesce). * 2) one normal I/O request failed. @@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio) split = bio; } - if (bio_data_dir(split) == READ) + if (bio_data_dir(split) == READ) { raid1_read_request(mddev, split); - else + + /* + * If a bio is splitted, the first part of bio will + * pass barrier but the bio is queued in + * current->bio_list (see generic_make_request). If + * there is a raise_barrier() called here, the second + * part of bio can't pass barrier. But since the first + * part bio isn't dispatched to underlaying disks yet, + * the barrier is never released, hence raise_barrier + * will alays wait. We have a deadlock. + * Note, this only happens in read path. For write + * path, the first part of bio is dispatched in a + * schedule() call (because of blk plug) or offloaded + * to raid10d. + * Quitting from the function immediately can change + * the bio order queued in bio_list and avoid the deadlock. + */ + if (split != bio) { + generic_make_request(bio); + break; + } + } else raid1_write_request(mddev, split); } while (split != bio); } @@ -3246,8 +3267,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, newsize); - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) { mddev->recovery_cp = mddev->dev_sectors; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0536658c9d40..e89a8d78a9ed 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1478,11 +1478,24 @@ retry_write: mbio->bi_bdev = (void*)rdev; atomic_inc(&r10_bio->remaining); + + cb = blk_check_plugged(raid10_unplug, mddev, + sizeof(*plug)); + if (cb) + plug = container_of(cb, struct raid10_plug_cb, + cb); + else + plug = NULL; spin_lock_irqsave(&conf->device_lock, flags); - bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; + if (plug) { + bio_list_add(&plug->pending, mbio); + plug->pending_cnt++; + } else { + bio_list_add(&conf->pending_bio_list, mbio); + conf->pending_count++; + } spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) + if (!plug) md_wakeup_thread(mddev->thread); } } @@ -1572,7 +1585,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) split = bio; } + /* + * If a bio is splitted, the first part of bio will pass + * barrier but the bio is queued in current->bio_list (see + * generic_make_request). If there is a raise_barrier() called + * here, the second part of bio can't pass barrier. But since + * the first part bio isn't dispatched to underlaying disks + * yet, the barrier is never released, hence raise_barrier will + * alays wait. We have a deadlock. + * Note, this only happens in read path. For write path, the + * first part of bio is dispatched in a schedule() call + * (because of blk plug) or offloaded to raid10d. + * Quitting from the function immediately can change the bio + * order queued in bio_list and avoid the deadlock. + */ __make_request(mddev, split); + if (split != bio && bio_data_dir(bio) == READ) { + generic_make_request(bio); + break; + } } while (split != bio); /* In case raid10d snuck in to freeze_array */ @@ -3944,10 +3975,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, size); - if (mddev->queue) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); - } if (sectors > mddev->dev_sectors && mddev->recovery_cp > oldsize) { mddev->recovery_cp = oldsize; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4fb09b3fcb41..ed5cd705b985 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs, (test_bit(R5_Wantdrain, &dev->flags) || test_bit(R5_InJournal, &dev->flags))) || (srctype == SYNDROME_SRC_WRITTEN && - dev->written)) { + (dev->written || + test_bit(R5_InJournal, &dev->flags)))) { if (test_bit(R5_InJournal, &dev->flags)) srcs[slot] = sh->dev[i].orig_page; else @@ -7605,8 +7606,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, newsize); - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) { mddev->recovery_cp = mddev->dev_sectors; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index c77a0751a311..f3bf8f4e2d6c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -36,6 +36,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/highmem.h> +#include <linux/refcount.h> #include <xen/xen.h> #include <xen/grant_table.h> @@ -86,7 +87,7 @@ struct grant_map { int index; int count; int flags; - atomic_t users; + refcount_t users; struct unmap_notify notify; struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; @@ -166,7 +167,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->index = 0; add->count = count; - atomic_set(&add->users, 1); + refcount_set(&add->users, 1); return add; @@ -212,7 +213,7 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map) if (!map) return; - if (!atomic_dec_and_test(&map->users)) + if (!refcount_dec_and_test(&map->users)) return; atomic_sub(map->count, &pages_mapped); @@ -400,7 +401,7 @@ static void gntdev_vma_open(struct vm_area_struct *vma) struct grant_map *map = vma->vm_private_data; pr_debug("gntdev_vma_open %p\n", vma); - atomic_inc(&map->users); + refcount_inc(&map->users); } static void gntdev_vma_close(struct vm_area_struct *vma) @@ -1004,7 +1005,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; } - atomic_inc(&map->users); + refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; diff --git a/fs/afs/callback.c b/fs/afs/callback.c index b29447e03ede..25d404d22cae 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work) { struct afs_server *server; struct afs_vnode *vnode, *xvnode; - time_t now; + time64_t now; long timeout; int ret; @@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work) _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find the first vnode to update */ spin_lock(&server->cb_lock); @@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vnode->update_at = get_seconds() + afs_vnode_update_timeout; + vnode->update_at = ktime_get_real_seconds() + + afs_vnode_update_timeout; spin_lock(&server->cb_lock); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 2edbdcbf6432..3062cceb5c2a 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) struct afs_callback *cb; struct afs_server *server; __be32 *bp; - u32 tmp; int ret, loop; _enter("{%u}", call->unmarshall); @@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (ret < 0) return ret; - tmp = ntohl(call->tmp); - _debug("CB count: %u", tmp); - if (tmp != call->count && tmp != 0) + call->count2 = ntohl(call->tmp); + _debug("CB count: %u", call->count2); + if (call->count2 != call->count && call->count2 != 0) return -EBADMSG; call->offset = 0; call->unmarshall++; @@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call) case 4: _debug("extract CB array"); ret = afs_extract_data(call, call->buffer, - call->count * 3 * 4, false); + call->count2 * 3 * 4, false); if (ret < 0) return ret; _debug("unmarshall CB array"); cb = call->request; bp = call->buffer; - for (loop = call->count; loop > 0; loop--, cb++) { + for (loop = call->count2; loop > 0; loop--, cb++) { cb->version = ntohl(*bp++); cb->expiry = ntohl(*bp++); cb->type = ntohl(*bp++); diff --git a/fs/afs/file.c b/fs/afs/file.c index ba7b71fba34b..0d5b8508869b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping, const struct file_operations afs_file_operations = { .open = afs_open, + .flush = afs_flush, .release = afs_release, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, @@ -184,10 +185,13 @@ int afs_page_filler(void *data, struct page *page) if (!req) goto enomem; + /* We request a full page. If the page is a partial one at the + * end of the file, the server will return a short read and the + * unmarshalling code will clear the unfilled space. + */ atomic_set(&req->usage, 1); req->pos = (loff_t)page->index << PAGE_SHIFT; - req->len = min_t(size_t, i_size_read(inode) - req->pos, - PAGE_SIZE); + req->len = PAGE_SIZE; req->nr_pages = 1; req->pages[0] = page; get_page(page); @@ -208,7 +212,13 @@ int afs_page_filler(void *data, struct page *page) fscache_uncache_page(vnode->cache, page); #endif BUG_ON(PageFsCache(page)); - goto error; + + if (ret == -EINTR || + ret == -ENOMEM || + ret == -ERESTARTSYS || + ret == -EAGAIN) + goto error; + goto io_error; } SetPageUptodate(page); @@ -227,10 +237,12 @@ int afs_page_filler(void *data, struct page *page) _leave(" = 0"); return 0; +io_error: + SetPageError(page); + goto error; enomem: ret = -ENOMEM; error: - SetPageError(page); unlock_page(page); _leave(" = %d", ret); return ret; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index ac8e766978dc..19f76ae36982 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -17,6 +17,12 @@ #include "afs_fs.h" /* + * We need somewhere to discard into in case the server helpfully returns more + * than we asked for in FS.FetchData{,64}. + */ +static u8 afs_discard_buffer[64]; + +/* * decode an AFSFid block */ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid) @@ -105,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_mode = mode; } - vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; + vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_version = data_version; @@ -139,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode) vnode->cb_version = ntohl(*bp++); vnode->cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds(); *_bp = bp; } @@ -315,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) void *buffer; int ret; - _enter("{%u,%zu/%u;%u/%llu}", + _enter("{%u,%zu/%u;%llu/%llu}", call->unmarshall, call->offset, call->count, req->remain, req->actual_len); @@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) req->actual_len |= ntohl(call->tmp); _debug("DATA length: %llu", req->actual_len); - /* Check that the server didn't want to send us extra. We - * might want to just discard instead, but that requires - * cooperation from AF_RXRPC. - */ - if (req->actual_len > req->len) - return -EBADMSG; req->remain = req->actual_len; call->offset = req->pos & (PAGE_SIZE - 1); @@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->unmarshall++; begin_page: + ASSERTCMP(req->index, <, req->nr_pages); if (req->remain > PAGE_SIZE - call->offset) size = PAGE_SIZE - call->offset; else @@ -378,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) /* extract the returned data */ case 3: - _debug("extract data %u/%llu %zu/%u", + _debug("extract data %llu/%llu %zu/%u", req->remain, req->actual_len, call->offset, call->count); buffer = kmap(req->pages[req->index]); @@ -389,19 +390,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (call->offset == PAGE_SIZE) { if (req->page_done) req->page_done(call, req); + req->index++; if (req->remain > 0) { - req->index++; call->offset = 0; + if (req->index >= req->nr_pages) { + call->unmarshall = 4; + goto begin_discard; + } goto begin_page; } } + goto no_more_data; + + /* Discard any excess data the server gave us */ + begin_discard: + case 4: + size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain); + call->count = size; + _debug("extract discard %llu/%llu %zu/%u", + req->remain, req->actual_len, call->offset, call->count); + + call->offset = 0; + ret = afs_extract_data(call, afs_discard_buffer, call->count, true); + req->remain -= call->offset; + if (ret < 0) + return ret; + if (req->remain > 0) + goto begin_discard; no_more_data: call->offset = 0; - call->unmarshall++; + call->unmarshall = 5; /* extract the metadata */ - case 4: + case 5: ret = afs_extract_data(call, call->buffer, (21 + 3 + 6) * 4, false); if (ret < 0) @@ -416,16 +438,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->offset = 0; call->unmarshall++; - case 5: + case 6: break; } - if (call->count < PAGE_SIZE) { - buffer = kmap(req->pages[req->index]); - memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap(req->pages[req->index]); + for (; req->index < req->nr_pages; req->index++) { + if (call->count < PAGE_SIZE) + zero_user_segment(req->pages[req->index], + call->count, PAGE_SIZE); if (req->page_done) req->page_done(call, req); + call->count = 0; } _leave(" = 0 [done]"); @@ -711,8 +734,8 @@ int afs_fs_create(struct afs_server *server, memset(bp, 0, padsz); bp = (void *) bp + padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ @@ -980,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server, memset(bp, 0, c_padsz); bp = (void *) bp + c_padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(S_IRWXUGO); /* unix mode */ @@ -1180,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ @@ -1213,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, _enter(",%x,{%x:%u},,", key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); - size = to - offset; + size = (loff_t)to - (loff_t)offset; if (first != last) size += (loff_t)(last - first) << PAGE_SHIFT; pos = (loff_t)first << PAGE_SHIFT; @@ -1257,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1e4897a048d2..aae55dd15108 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_fop = &afs_dir_file_operations; break; case AFS_FTYPE_SYMLINK: - inode->i_mode = S_IFLNK | vnode->status.mode; - inode->i_op = &page_symlink_inode_operations; + /* Symlinks with a mode of 0644 are actually mountpoints. */ + if ((vnode->status.mode & 0777) == 0644) { + inode->i_flags |= S_AUTOMOUNT; + + spin_lock(&vnode->lock); + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + spin_unlock(&vnode->lock); + + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &afs_mntpt_inode_operations; + inode->i_fop = &afs_mntpt_file_operations; + } else { + inode->i_mode = S_IFLNK | vnode->status.mode; + inode->i_op = &page_symlink_inode_operations; + } inode_nohighmem(inode); break; default: @@ -70,27 +83,15 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) set_nlink(inode, vnode->status.nlink); inode->i_uid = vnode->status.owner; - inode->i_gid = GLOBAL_ROOT_GID; + inode->i_gid = vnode->status.group; inode->i_size = vnode->status.size; - inode->i_ctime.tv_sec = vnode->status.mtime_server; + inode->i_ctime.tv_sec = vnode->status.mtime_client; inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; inode->i_generation = vnode->fid.unique; inode->i_version = vnode->status.data_version; inode->i_mapping->a_ops = &afs_fs_aops; - - /* check to see whether a symbolic link is really a mountpoint */ - if (vnode->status.type == AFS_FTYPE_SYMLINK) { - afs_mntpt_check_symlink(vnode, key); - - if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) { - inode->i_mode = S_IFDIR | vnode->status.mode; - inode->i_op = &afs_mntpt_inode_operations; - inode->i_fop = &afs_mntpt_file_operations; - } - } - return 0; } @@ -245,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_version = 0; vnode->cb_expiry = 0; vnode->cb_type = 0; - vnode->cb_expires = get_seconds(); + vnode->cb_expires = ktime_get_real_seconds(); } else { vnode->cb_version = cb->version; vnode->cb_expiry = cb->expiry; vnode->cb_type = cb->type; - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + + ktime_get_real_seconds(); } } @@ -323,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) && !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - if (vnode->cb_expires < get_seconds() + 10) { + if (vnode->cb_expires < ktime_get_real_seconds() + 10) { _debug("callback expired"); set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); } else { @@ -444,7 +446,7 @@ void afs_evict_inode(struct inode *inode) mutex_lock(&vnode->permits_lock); permits = vnode->permits; - rcu_assign_pointer(vnode->permits, NULL); + RCU_INIT_POINTER(vnode->permits, NULL); mutex_unlock(&vnode->permits_lock); if (permits) call_rcu(&permits->rcu, afs_zap_permits); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5dfa56903a2d..a6901360fb81 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -11,6 +11,7 @@ #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/ktime.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/rxrpc.h> @@ -90,7 +91,10 @@ struct afs_call { unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ - unsigned last_to; /* amount of mapping[last] */ + union { + unsigned last_to; /* amount of mapping[last] */ + unsigned count2; /* count used in unmarshalling */ + }; unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ @@ -127,12 +131,11 @@ struct afs_call_type { */ struct afs_read { loff_t pos; /* Where to start reading */ - loff_t len; /* How much to read */ + loff_t len; /* How much we're asking for */ loff_t actual_len; /* How much we're actually getting */ + loff_t remain; /* Amount remaining */ atomic_t usage; - unsigned int remain; /* Amount remaining */ unsigned int index; /* Which page we're reading into */ - unsigned int pg_offset; /* Offset in page we're at */ unsigned int nr_pages; void (*page_done)(struct afs_call *, struct afs_read *); struct page *pages[]; @@ -247,7 +250,7 @@ struct afs_cache_vhash { */ struct afs_vlocation { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct list_head link; /* link in cell volume location list */ struct list_head grave; /* link in master graveyard list */ struct list_head update; /* link in master update list */ @@ -258,7 +261,7 @@ struct afs_vlocation { struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ wait_queue_head_t waitq; /* status change waitqueue */ - time_t update_at; /* time at which record should be updated */ + time64_t update_at; /* time at which record should be updated */ spinlock_t lock; /* access lock */ afs_vlocation_state_t state; /* volume location state */ unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */ @@ -271,7 +274,7 @@ struct afs_vlocation { */ struct afs_server { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct in_addr addr; /* server address */ struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ @@ -374,8 +377,8 @@ struct afs_vnode { struct rb_node server_rb; /* link in server->fs_vnodes */ struct rb_node cb_promise; /* link in server->cb_promises */ struct work_struct cb_broken_work; /* work to be done on callback break */ - time_t cb_expires; /* time at which callback expires */ - time_t cb_expires_at; /* time used to order cb_promise */ + time64_t cb_expires; /* time at which callback expires */ + time64_t cb_expires_at; /* time used to order cb_promise */ unsigned cb_version; /* callback version */ unsigned cb_expiry; /* callback expiry time */ afs_callback_type_t cb_type; /* type of callback */ @@ -557,7 +560,6 @@ extern const struct inode_operations afs_autocell_inode_operations; extern const struct file_operations afs_mntpt_file_operations; extern struct vfsmount *afs_d_automount(struct path *); -extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); extern void afs_mntpt_kill_timer(void); /* @@ -718,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_writeback_all(struct afs_vnode *); +extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 91ea1aa0d8b3..100b207efc9e 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code) case RXKADDATALEN: return -EKEYREJECTED; case RXKADILLEGALLEVEL: return -EKEYREJECTED; + case RXGEN_OPCODE: return -ENOTSUPP; + default: return -EREMOTEIO; } } diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index d4fb0afc0097..bd3b65cde282 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -47,59 +47,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); static unsigned long afs_mntpt_expiry_timeout = 10 * 60; /* - * check a symbolic link to see whether it actually encodes a mountpoint - * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately - */ -int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) -{ - struct page *page; - size_t size; - char *buf; - int ret; - - _enter("{%x:%u,%u}", - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); - - /* read the contents of the symlink into the pagecache */ - page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, - afs_page_filler, key); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto out; - } - - ret = -EIO; - if (PageError(page)) - goto out_free; - - buf = kmap(page); - - /* examine the symlink's contents */ - size = vnode->status.size; - _debug("symlink to %*.*s", (int) size, (int) size, buf); - - if (size > 2 && - (buf[0] == '%' || buf[0] == '#') && - buf[size - 1] == '.' - ) { - _debug("symlink is a mountpoint"); - spin_lock(&vnode->lock); - set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - vnode->vfs_inode.i_flags |= S_AUTOMOUNT; - spin_unlock(&vnode->lock); - } - - ret = 0; - - kunmap(page); -out_free: - put_page(page); -out: - _leave(" = %d", ret); - return ret; -} - -/* * no valid lookup procedure on this sort of dir */ static struct dentry *afs_mntpt_lookup(struct inode *dir, diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 419ef05dcb5e..8f76b13d5549 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call) call->buffer = NULL; } +#define AFS_BVEC_MAX 8 + +/* + * Load the given bvec with the next few pages. + */ +static void afs_load_bvec(struct afs_call *call, struct msghdr *msg, + struct bio_vec *bv, pgoff_t first, pgoff_t last, + unsigned offset) +{ + struct page *pages[AFS_BVEC_MAX]; + unsigned int nr, n, i, to, bytes = 0; + + nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX); + n = find_get_pages_contig(call->mapping, first, nr, pages); + ASSERTCMP(n, ==, nr); + + msg->msg_flags |= MSG_MORE; + for (i = 0; i < nr; i++) { + to = PAGE_SIZE; + if (first + i >= last) { + to = call->last_to; + msg->msg_flags &= ~MSG_MORE; + } + bv[i].bv_page = pages[i]; + bv[i].bv_len = to - offset; + bv[i].bv_offset = offset; + bytes += to - offset; + offset = 0; + } + + iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes); +} + /* * attach the data from a bunch of pages on an inode to a call */ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) { - struct page *pages[8]; - unsigned count, n, loop, offset, to; + struct bio_vec bv[AFS_BVEC_MAX]; + unsigned int bytes, nr, loop, offset; pgoff_t first = call->first, last = call->last; int ret; - _enter(""); - offset = call->first_offset; call->first_offset = 0; do { - _debug("attach %lx-%lx", first, last); - - count = last - first + 1; - if (count > ARRAY_SIZE(pages)) - count = ARRAY_SIZE(pages); - n = find_get_pages_contig(call->mapping, first, count, pages); - ASSERTCMP(n, ==, count); - - loop = 0; - do { - struct bio_vec bvec = {.bv_page = pages[loop], - .bv_offset = offset}; - msg->msg_flags = 0; - to = PAGE_SIZE; - if (first + loop >= last) - to = call->last_to; - else - msg->msg_flags = MSG_MORE; - bvec.bv_len = to - offset; - offset = 0; - - _debug("- range %u-%u%s", - offset, to, msg->msg_flags ? " [more]" : ""); - iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, - &bvec, 1, to - offset); - - /* have to change the state *before* sending the last - * packet as RxRPC might give us the reply before it - * returns from sending the request */ - if (first + loop >= last) - call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, - msg, to - offset); - if (ret < 0) - break; - } while (++loop < count); - first += count; - - for (loop = 0; loop < count; loop++) - put_page(pages[loop]); + afs_load_bvec(call, msg, bv, first, last, offset); + offset = 0; + bytes = msg->msg_iter.count; + nr = msg->msg_iter.nr_segs; + + /* Have to change the state *before* sending the last + * packet as RxRPC might give us the reply before it + * returns from sending the request. + */ + if (first + nr - 1 >= last) + call->state = AFS_CALL_AWAIT_REPLY; + ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, + msg, bytes); + for (loop = 0; loop < nr; loop++) + put_page(bv[loop].bv_page); if (ret < 0) break; + + first += nr; } while (first <= last); - _leave(" = %d", ret); return ret; } @@ -333,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; + size_t offset; + u32 abort_code; int ret; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -381,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_controllen = 0; msg.msg_flags = (call->send_pages ? MSG_MORE : 0); - /* have to change the state *before* sending the last packet as RxRPC - * might give us the reply before it returns from sending the - * request */ + /* We have to change the state *before* sending the last packet as + * rxrpc might give us the reply before it returns from sending the + * request. Further, if the send fails, we may already have been given + * a notification and may have collected it. + */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(afs_socket, rxcall, @@ -405,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return afs_wait_for_call_to_complete(call); error_do_abort: - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); + call->state = AFS_CALL_COMPLETE; + if (ret != -ECONNABORTED) { + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, + -ret, "KSD"); + } else { + abort_code = 0; + offset = 0; + rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, + false, &abort_code); + ret = call->type->abort_to_error(abort_code); + } error_kill_call: afs_put_call(call); _leave(" = %d", ret); @@ -452,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call) case -EINPROGRESS: case -EAGAIN: goto out; + case -ECONNABORTED: + goto call_complete; case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KNC"); - goto do_abort; + goto save_error; case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; + abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KIV"); - goto do_abort; + goto save_error; case -ENODATA: case -EBADMSG: case -EMSGSIZE: @@ -471,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, EBADMSG, "KUM"); - goto do_abort; + goto save_error; } } @@ -482,8 +505,9 @@ out: _leave(""); return; -do_abort: +save_error: call->error = ret; +call_complete: call->state = AFS_CALL_COMPLETE; goto done; } @@ -493,7 +517,6 @@ do_abort: */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -512,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } - abort_why = "KWC"; - ret = call->error; - if (call->state == AFS_CALL_COMPLETE) - break; - abort_why = "KWI"; - ret = -EINTR; - if (signal_pending(current)) + if (call->state == AFS_CALL_COMPLETE || + signal_pending(current)) break; schedule(); } @@ -526,13 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); - /* kill the call */ + /* Kill off the call if it's still live. */ if (call->state < AFS_CALL_COMPLETE) { - _debug("call incomplete"); + _debug("call interrupted"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_CALL_DEAD, -ret, abort_why); + RX_USER_ABORT, -EINTR, "KWI"); } + ret = call->error; _debug("call complete"); afs_put_call(call); _leave(" = %d", ret); diff --git a/fs/afs/security.c b/fs/afs/security.c index 8d010422dc89..ecb86a670180 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode) mutex_lock(&vnode->permits_lock); permits = vnode->permits; - rcu_assign_pointer(vnode->permits, NULL); + RCU_INIT_POINTER(vnode->permits, NULL); mutex_unlock(&vnode->permits_lock); if (permits) @@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask) } else { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; + if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR)) + goto permission_denied; if (mask & (MAY_EXEC | MAY_READ)) { if (!(access & AFS_ACE_READ)) goto permission_denied; + if (!(inode->i_mode & S_IRUSR)) + goto permission_denied; } else if (mask & MAY_WRITE) { if (!(access & AFS_ACE_WRITE)) goto permission_denied; + if (!(inode->i_mode & S_IWUSR)) + goto permission_denied; } } key_put(key); - ret = generic_permission(inode, mask); _leave(" = %d", ret); return ret; diff --git a/fs/afs/server.c b/fs/afs/server.c index d4066ab7dd55..c001b1f2455f 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server) spin_lock(&afs_server_graveyard_lock); if (atomic_read(&server->usage) == 0) { list_move_tail(&server->grave, &afs_server_graveyard); - server->time_of_death = get_seconds(); + server->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_server_reaper, afs_server_timeout * HZ); } @@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work) LIST_HEAD(corpses); struct afs_server *server; unsigned long delay, expiry; - time_t now; + time64_t now; - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_server_graveyard_lock); while (!list_empty(&afs_server_graveyard)) { diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index d7d8dd8c0b31..37b7c3b342a6 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) struct afs_vlocation *xvl; /* wait at least 10 minutes before updating... */ - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); @@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl) if (atomic_read(&vl->usage) == 0) { _debug("buried"); list_move_tail(&vl->grave, &afs_vlocation_graveyard); - vl->time_of_death = get_seconds(); + vl->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_vlocation_reap, afs_vlocation_timeout * HZ); @@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work) LIST_HEAD(corpses); struct afs_vlocation *vl; unsigned long delay, expiry; - time_t now; + time64_t now; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_vlocation_graveyard_lock); while (!list_empty(&afs_vlocation_graveyard)) { @@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work) { struct afs_cache_vlocation vldb; struct afs_vlocation *vl, *xvl; - time_t now; + time64_t now; long timeout; int ret; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find a record to update */ spin_lock(&afs_vlocation_updates_lock); @@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); diff --git a/fs/afs/write.c b/fs/afs/write.c index c83c1a0e851f..2d2fccd5044b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb) * partly or wholly fill a page that's under preparation for writing */ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, - loff_t pos, struct page *page) + loff_t pos, unsigned int len, struct page *page) { struct afs_read *req; - loff_t i_size; int ret; _enter(",,%llu", (unsigned long long)pos); @@ -99,14 +98,10 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, atomic_set(&req->usage, 1); req->pos = pos; + req->len = len; req->nr_pages = 1; req->pages[0] = page; - - i_size = i_size_read(&vnode->vfs_inode); - if (pos + PAGE_SIZE > i_size) - req->len = i_size - pos; - else - req->len = PAGE_SIZE; + get_page(page); ret = afs_vnode_fetch_data(vnode, key, req); afs_put_read(req); @@ -159,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping, kfree(candidate); return -ENOMEM; } - *pagep = page; - /* page won't leak in error case: it eventually gets cleaned off LRU */ if (!PageUptodate(page) && len != PAGE_SIZE) { - ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page); + ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { + unlock_page(page); + put_page(page); kfree(candidate); _leave(" = %d [prep]", ret); return ret; @@ -172,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping, SetPageUptodate(page); } + /* page won't leak in error case: it eventually gets cleaned off LRU */ + *pagep = page; + try_again: spin_lock(&vnode->writeback_lock); @@ -233,7 +231,7 @@ flush_conflicting_wb: if (wb->state == AFS_WBACK_PENDING) wb->state = AFS_WBACK_CONFLICTING; spin_unlock(&vnode->writeback_lock); - if (PageDirty(page)) { + if (clear_page_dirty_for_io(page)) { ret = afs_write_back_from_locked_page(wb, page); if (ret < 0) { afs_put_writeback(candidate); @@ -257,7 +255,9 @@ int afs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct key *key = file->private_data; loff_t i_size, maybe_i_size; + int ret; _enter("{%x:%u},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); @@ -273,6 +273,20 @@ int afs_write_end(struct file *file, struct address_space *mapping, spin_unlock(&vnode->writeback_lock); } + if (!PageUptodate(page)) { + if (copied < len) { + /* Try and load any missing data from the server. The + * unmarshalling routine will take care of clearing any + * bits that are beyond the EOF. + */ + ret = afs_fill_page(vnode, key, pos + copied, + len - copied, page); + if (ret < 0) + return ret; + } + SetPageUptodate(page); + } + set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); @@ -307,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { - ClearPageUptodate(pv.pages[loop]); + struct page *page = pv.pages[loop]; + ClearPageUptodate(page); if (error) - SetPageError(pv.pages[loop]); - end_page_writeback(pv.pages[loop]); + SetPageError(page); + if (PageWriteback(page)) + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; } __pagevec_release(&pv); @@ -335,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, _enter(",%lx", primary_page->index); count = 1; - if (!clear_page_dirty_for_io(primary_page)) - BUG(); if (test_set_page_writeback(primary_page)) BUG(); @@ -502,17 +518,17 @@ static int afs_writepages_region(struct address_space *mapping, */ lock_page(page); - if (page->mapping != mapping) { + if (page->mapping != mapping || !PageDirty(page)) { unlock_page(page); put_page(page); continue; } - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || !PageDirty(page)) { + if (PageWriteback(page)) { unlock_page(page); + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + put_page(page); continue; } @@ -523,6 +539,8 @@ static int afs_writepages_region(struct address_space *mapping, wb->state = AFS_WBACK_WRITING; spin_unlock(&wb->vnode->writeback_lock); + if (!clear_page_dirty_for_io(page)) + BUG(); ret = afs_write_back_from_locked_page(wb, page); unlock_page(page); put_page(page); @@ -746,6 +764,20 @@ out: } /* + * Flush out all outstanding writes on a file opened for writing when it is + * closed. + */ +int afs_flush(struct file *file, fl_owner_t id) +{ + _enter(""); + + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; + + return vfs_fsync(file, 0); +} + +/* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal */ diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index d04547fcf274..eb00bc133bca 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); +extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp, + int size); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index c6809ff41197..96b45cd6c63f 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -629,6 +629,93 @@ xfs_dir2_sf_check( } #endif /* DEBUG */ +/* Verify the consistency of an inline directory. */ +int +xfs_dir2_sf_verify( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, + int size) +{ + struct xfs_dir2_sf_entry *sfep; + struct xfs_dir2_sf_entry *next_sfep; + char *endp; + const struct xfs_dir_ops *dops; + xfs_ino_t ino; + int i; + int i8count; + int offset; + __uint8_t filetype; + + dops = xfs_dir_get_ops(mp, NULL); + + /* + * Give up if the directory is way too short. + */ + XFS_WANT_CORRUPTED_RETURN(mp, size > + offsetof(struct xfs_dir2_sf_hdr, parent)); + XFS_WANT_CORRUPTED_RETURN(mp, size >= + xfs_dir2_sf_hdr_size(sfp->i8count)); + + endp = (char *)sfp + size; + + /* Check .. entry */ + ino = dops->sf_get_parent_ino(sfp); + i8count = ino > XFS_DIR2_MAX_SHORT_INUM; + XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + offset = dops->data_first_offset; + + /* Check all reported entries */ + sfep = xfs_dir2_sf_firstentry(sfp); + for (i = 0; i < sfp->count; i++) { + /* + * struct xfs_dir2_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + XFS_WANT_CORRUPTED_RETURN(mp, + ((char *)sfep + sizeof(*sfep)) < endp); + + /* Don't allow names with known bad length. */ + XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0); + XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN); + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = dops->sf_nextentry(sfp, sfep); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep); + + /* Check that the offsets always increase. */ + XFS_WANT_CORRUPTED_RETURN(mp, + xfs_dir2_sf_get_offset(sfep) >= offset); + + /* Check the inode number. */ + ino = dops->sf_get_ino(sfp, sfep); + i8count += ino > XFS_DIR2_MAX_SHORT_INUM; + XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + + /* Check the file type. */ + filetype = dops->sf_get_ftype(sfep); + XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX); + + offset = xfs_dir2_sf_get_offset(sfep) + + dops->data_entsize(sfep->namelen); + + sfep = next_sfep; + } + XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count); + XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp); + + /* Make sure this whole thing ought to be in local format. */ + XFS_WANT_CORRUPTED_RETURN(mp, offset + + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize); + + return 0; +} + /* * Create a new (shortform) directory. */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 25c1e078aef6..9653e964eda4 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -33,6 +33,8 @@ #include "xfs_trace.h" #include "xfs_attr_sf.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_priv.h" kmem_zone_t *xfs_ifork_zone; @@ -320,6 +322,7 @@ xfs_iformat_local( int whichfork, int size) { + int error; /* * If the size is unreasonable, then something @@ -336,6 +339,14 @@ xfs_iformat_local( return -EFSCORRUPTED; } + if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { + error = xfs_dir2_sf_verify(ip->i_mount, + (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip), + size); + if (error) + return error; + } + xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size); return 0; } @@ -856,7 +867,7 @@ xfs_iextents_copy( * In these cases, the format always takes precedence, because the * format indicates the current state of the fork. */ -void +int xfs_iflush_fork( xfs_inode_t *ip, xfs_dinode_t *dip, @@ -866,6 +877,7 @@ xfs_iflush_fork( char *cp; xfs_ifork_t *ifp; xfs_mount_t *mp; + int error; static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = @@ -874,7 +886,7 @@ xfs_iflush_fork( { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; if (!iip) - return; + return 0; ifp = XFS_IFORK_PTR(ip, whichfork); /* * This can happen if we gave up in iformat in an error path, @@ -882,12 +894,19 @@ xfs_iflush_fork( */ if (!ifp) { ASSERT(whichfork == XFS_ATTR_FORK); - return; + return 0; } cp = XFS_DFORK_PTR(dip, whichfork); mp = ip->i_mount; switch (XFS_IFORK_FORMAT(ip, whichfork)) { case XFS_DINODE_FMT_LOCAL: + if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { + error = xfs_dir2_sf_verify(mp, + (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data, + ifp->if_bytes); + if (error) + return error; + } if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_u1.if_data != NULL); @@ -940,6 +959,7 @@ xfs_iflush_fork( ASSERT(0); break; } + return 0; } /* diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 7fb8365326d1..132dc59fdde6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -140,7 +140,7 @@ typedef struct xfs_ifork { struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); -void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, +int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 003a99b83bd8..ad9396e516f6 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -71,22 +71,11 @@ xfs_dir2_sf_getdents( struct xfs_da_geometry *geo = args->geo; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Give up if the directory is way too short. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return -EIO; - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count)) - return -EFSCORRUPTED; - /* * If the block number in the offset is out of range, we're done. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7eaf1ef74e3c..c7fe2c2123ab 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3475,6 +3475,7 @@ xfs_iflush_int( struct xfs_inode_log_item *iip = ip->i_itemp; struct xfs_dinode *dip; struct xfs_mount *mp = ip->i_mount; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); @@ -3557,9 +3558,14 @@ xfs_iflush_int( if (ip->i_d.di_flushiter == DI_MAX_FLUSH) ip->i_d.di_flushiter = 0; - xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); - if (XFS_IFORK_Q(ip)) - xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); + error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); + if (error) + return error; + if (XFS_IFORK_Q(ip)) { + error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); + if (error) + return error; + } xfs_inobp_check(mp, bp); /* diff --git a/include/linux/device.h b/include/linux/device.h index 30c4570e928d..9ef518af5515 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev) extern void lock_device_hotplug(void); extern void unlock_device_hotplug(void); extern int lock_device_hotplug_sysfs(void); -void assert_held_device_hotplug(void); extern int device_offline(struct device *dev); extern int device_online(struct device *dev); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 1c823bef4c15..5734480c9590 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -6,6 +6,7 @@ struct kmem_cache; struct page; struct vm_struct; +struct task_struct; #ifdef CONFIG_KASAN diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h index 407cb55df6ac..7fb97863c945 100644 --- a/include/uapi/drm/omap_drm.h +++ b/include/uapi/drm/omap_drm.h @@ -33,8 +33,8 @@ extern "C" { #define OMAP_PARAM_CHIPSET_ID 1 /* ie. 0x3430, 0x4430, etc */ struct drm_omap_param { - uint64_t param; /* in */ - uint64_t value; /* in (set_param), out (get_param) */ + __u64 param; /* in */ + __u64 value; /* in (set_param), out (get_param) */ }; #define OMAP_BO_SCANOUT 0x00000001 /* scanout capable (phys contiguous) */ @@ -53,18 +53,18 @@ struct drm_omap_param { #define OMAP_BO_TILED (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32) union omap_gem_size { - uint32_t bytes; /* (for non-tiled formats) */ + __u32 bytes; /* (for non-tiled formats) */ struct { - uint16_t width; - uint16_t height; + __u16 width; + __u16 height; } tiled; /* (for tiled formats) */ }; struct drm_omap_gem_new { union omap_gem_size size; /* in */ - uint32_t flags; /* in */ - uint32_t handle; /* out */ - uint32_t __pad; + __u32 flags; /* in */ + __u32 handle; /* out */ + __u32 __pad; }; /* mask of operations: */ @@ -74,33 +74,33 @@ enum omap_gem_op { }; struct drm_omap_gem_cpu_prep { - uint32_t handle; /* buffer handle (in) */ - uint32_t op; /* mask of omap_gem_op (in) */ + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ }; struct drm_omap_gem_cpu_fini { - uint32_t handle; /* buffer handle (in) */ - uint32_t op; /* mask of omap_gem_op (in) */ + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ /* TODO maybe here we pass down info about what regions are touched * by sw so we can be clever about cache ops? For now a placeholder, * set to zero and we just do full buffer flush.. */ - uint32_t nregions; - uint32_t __pad; + __u32 nregions; + __u32 __pad; }; struct drm_omap_gem_info { - uint32_t handle; /* buffer handle (in) */ - uint32_t pad; - uint64_t offset; /* mmap offset (out) */ + __u32 handle; /* buffer handle (in) */ + __u32 pad; + __u64 offset; /* mmap offset (out) */ /* note: in case of tiled buffers, the user virtual size can be * different from the physical size (ie. how many pages are needed * to back the object) which is returned in DRM_IOCTL_GEM_OPEN.. * This size here is the one that should be used if you want to * mmap() the buffer: */ - uint32_t size; /* virtual size for mmap'ing (out) */ - uint32_t __pad; + __u32 size; /* virtual size for mmap'ing (out) */ + __u32 __pad; }; #define DRM_OMAP_GET_PARAM 0x00 diff --git a/kernel/futex.c b/kernel/futex.c index 229a744b1781..45858ec73941 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2815,7 +2815,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; - struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; @@ -2899,6 +2898,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) + rt_mutex_unlock(&q.pi_state->pi_mutex); /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. @@ -2907,6 +2908,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, spin_unlock(q.lock_ptr); } } else { + struct rt_mutex *pi_mutex; + /* * We have been woken up by futex_unlock_pi(), a timeout, or a * signal. futex_unlock_pi() will not destroy the lock_ptr nor @@ -2930,18 +2933,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; + /* + * If fixup_pi_state_owner() faulted and was unable to handle + * the fault, unlock the rt_mutex and return the fault to + * userspace. + */ + if (ret && rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } - /* - * If fixup_pi_state_owner() faulted and was unable to handle the - * fault, unlock the rt_mutex and return the fault to userspace. - */ - if (ret == -EFAULT) { - if (pi_mutex && rt_mutex_owner(pi_mutex) == current) - rt_mutex_unlock(pi_mutex); - } else if (ret == -EINTR) { + if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling * futex_lock_pi() directly. We could restart this syscall, but diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 7bc24d477805..c65f7989f850 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -213,10 +213,9 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) */ if (sem->count == 0) break; - if (signal_pending_state(state, current)) { - ret = -EINTR; - goto out; - } + if (signal_pending_state(state, current)) + goto out_nolock; + set_current_state(state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); @@ -224,12 +223,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) } /* got the lock */ sem->count = -1; -out: list_del(&waiter.list); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return ret; + +out_nolock: + list_del(&waiter.list); + if (!list_empty(&sem->wait_list)) + __rwsem_do_wake(sem, 1); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return -EINTR; } void __sched __down_write(struct rw_semaphore *sem) diff --git a/kernel/memremap.c b/kernel/memremap.c index 06123234f118..07e85e5229da 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -247,11 +247,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); - lock_device_hotplug(); mem_hotplug_begin(); arch_remove_memory(align_start, align_size); mem_hotplug_done(); - unlock_device_hotplug(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); @@ -364,11 +362,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; - lock_device_hotplug(); mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); mem_hotplug_done(); - unlock_device_hotplug(); if (error) goto err_add_memory; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 295479b792ec..6fa7208bcd56 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -125,9 +125,12 @@ void put_online_mems(void) } +/* Serializes write accesses to mem_hotplug.active_writer. */ +static DEFINE_MUTEX(memory_add_remove_lock); + void mem_hotplug_begin(void) { - assert_held_device_hotplug(); + mutex_lock(&memory_add_remove_lock); mem_hotplug.active_writer = current; @@ -147,6 +150,7 @@ void mem_hotplug_done(void) mem_hotplug.active_writer = NULL; mutex_unlock(&mem_hotplug.lock); memhp_lock_release(); + mutex_unlock(&memory_add_remove_lock); } /* add this memory to iomem resource */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0dd80222b20b..0b057628a7ba 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1683,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, if (fatal_signal_pending(current)) { area->nr_pages = i; - goto fail; + goto fail_no_warn; } if (node == NUMA_NO_NODE) @@ -1709,6 +1709,7 @@ fail: warn_alloc(gfp_mask, NULL, "vmalloc: allocation failure, allocated %ld of %ld bytes", (area->nr_pages*PAGE_SIZE), area->size); +fail_no_warn: vfree(area->addr); return NULL; } diff --git a/mm/z3fold.c b/mm/z3fold.c index 8970a2fd3b1a..f9492bccfd79 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -667,6 +667,7 @@ next: z3fold_page_unlock(zhdr); spin_lock(&pool->lock); if (kref_put(&zhdr->refcount, release_z3fold_page)) { + spin_unlock(&pool->lock); atomic64_dec(&pool->pages_nr); return 0; } |