diff options
Diffstat (limited to 'include')
45 files changed, 986 insertions, 230 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index 5d8ffa3e6f8c..c1cde3577551 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -7,10 +7,10 @@ static __always_inline int preempt_count(void) { - return current_thread_info()->preempt_count; + return READ_ONCE(current_thread_info()->preempt_count); } -static __always_inline int *preempt_count_ptr(void) +static __always_inline volatile int *preempt_count_ptr(void) { return ¤t_thread_info()->preempt_count; } diff --git a/include/drm/drm_dp_dual_mode_helper.h b/include/drm/drm_dp_dual_mode_helper.h new file mode 100644 index 000000000000..e8a9dfd0e055 --- /dev/null +++ b/include/drm/drm_dp_dual_mode_helper.h @@ -0,0 +1,92 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DRM_DP_DUAL_MODE_HELPER_H +#define DRM_DP_DUAL_MODE_HELPER_H + +#include <linux/types.h> + +/* + * Optional for type 1 DVI adaptors + * Mandatory for type 1 HDMI and type 2 adaptors + */ +#define DP_DUAL_MODE_HDMI_ID 0x00 /* 00-0f */ +#define DP_DUAL_MODE_HDMI_ID_LEN 16 +/* + * Optional for type 1 adaptors + * Mandatory for type 2 adaptors + */ +#define DP_DUAL_MODE_ADAPTOR_ID 0x10 +#define DP_DUAL_MODE_REV_MASK 0x07 +#define DP_DUAL_MODE_REV_TYPE2 0x00 +#define DP_DUAL_MODE_TYPE_MASK 0xf0 +#define DP_DUAL_MODE_TYPE_TYPE2 0xa0 +#define DP_DUAL_MODE_IEEE_OUI 0x11 /* 11-13*/ +#define DP_DUAL_IEEE_OUI_LEN 3 +#define DP_DUAL_DEVICE_ID 0x14 /* 14-19 */ +#define DP_DUAL_DEVICE_ID_LEN 6 +#define DP_DUAL_MODE_HARDWARE_REV 0x1a +#define DP_DUAL_MODE_FIRMWARE_MAJOR_REV 0x1b +#define DP_DUAL_MODE_FIRMWARE_MINOR_REV 0x1c +#define DP_DUAL_MODE_MAX_TMDS_CLOCK 0x1d +#define DP_DUAL_MODE_I2C_SPEED_CAP 0x1e +#define DP_DUAL_MODE_TMDS_OEN 0x20 +#define DP_DUAL_MODE_TMDS_DISABLE 0x01 +#define DP_DUAL_MODE_HDMI_PIN_CTRL 0x21 +#define DP_DUAL_MODE_CEC_ENABLE 0x01 +#define DP_DUAL_MODE_I2C_SPEED_CTRL 0x22 + +struct i2c_adapter; + +ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter, + u8 offset, void *buffer, size_t size); +ssize_t drm_dp_dual_mode_write(struct i2c_adapter *adapter, + u8 offset, const void *buffer, size_t size); + +/** + * enum drm_dp_dual_mode_type - Type of the DP dual mode adaptor + * @DRM_DP_DUAL_MODE_NONE: No DP dual mode adaptor + * @DRM_DP_DUAL_MODE_UNKNOWN: Could be either none or type 1 DVI adaptor + * @DRM_DP_DUAL_MODE_TYPE1_DVI: Type 1 DVI adaptor + * @DRM_DP_DUAL_MODE_TYPE1_HDMI: Type 1 HDMI adaptor + * @DRM_DP_DUAL_MODE_TYPE2_DVI: Type 2 DVI adaptor + * @DRM_DP_DUAL_MODE_TYPE2_HDMI: Type 2 HDMI adaptor + */ +enum drm_dp_dual_mode_type { + DRM_DP_DUAL_MODE_NONE, + DRM_DP_DUAL_MODE_UNKNOWN, + DRM_DP_DUAL_MODE_TYPE1_DVI, + DRM_DP_DUAL_MODE_TYPE1_HDMI, + DRM_DP_DUAL_MODE_TYPE2_DVI, + DRM_DP_DUAL_MODE_TYPE2_HDMI, +}; + +enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(struct i2c_adapter *adapter); +int drm_dp_dual_mode_max_tmds_clock(enum drm_dp_dual_mode_type type, + struct i2c_adapter *adapter); +int drm_dp_dual_mode_get_tmds_output(enum drm_dp_dual_mode_type type, + struct i2c_adapter *adapter, bool *enabled); +int drm_dp_dual_mode_set_tmds_output(enum drm_dp_dual_mode_type type, + struct i2c_adapter *adapter, bool enable); +const char *drm_dp_get_dual_mode_type_name(enum drm_dp_dual_mode_type type); + +#endif diff --git a/include/dt-bindings/thermal/tegra124-soctherm.h b/include/dt-bindings/thermal/tegra124-soctherm.h index 85aaf66690f9..729ab9fc325e 100644 --- a/include/dt-bindings/thermal/tegra124-soctherm.h +++ b/include/dt-bindings/thermal/tegra124-soctherm.h @@ -9,5 +9,6 @@ #define TEGRA124_SOCTHERM_SENSOR_MEM 1 #define TEGRA124_SOCTHERM_SENSOR_GPU 2 #define TEGRA124_SOCTHERM_SENSOR_PLLX 3 +#define TEGRA124_SOCTHERM_SENSOR_NUM 4 #endif diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index b651aed9dc6b..dda39d8fa189 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -24,9 +24,6 @@ #include <linux/workqueue.h> struct arch_timer_kvm { - /* Is the timer enabled */ - bool enabled; - /* Virtual offset */ cycle_t cntvoff; }; @@ -53,15 +50,15 @@ struct arch_timer_cpu { /* Timer IRQ */ struct kvm_irq_level irq; - /* VGIC mapping */ - struct irq_phys_map *map; - /* Active IRQ state caching */ bool active_cleared_last; + + /* Is the timer enabled */ + bool enabled; }; int kvm_timer_hyp_init(void); -void kvm_timer_enable(struct kvm *kvm); +int kvm_timer_enable(struct kvm_vcpu *vcpu); void kvm_timer_init(struct kvm *kvm); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index be6037aa703d..da0a524802cb 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -19,6 +19,10 @@ #ifndef __ASM_ARM_KVM_VGIC_H #define __ASM_ARM_KVM_VGIC_H +#ifdef CONFIG_KVM_NEW_VGIC +#include <kvm/vgic/vgic.h> +#else + #include <linux/kernel.h> #include <linux/kvm.h> #include <linux/irqreturn.h> @@ -158,7 +162,6 @@ struct vgic_io_device { struct irq_phys_map { u32 virt_irq; u32 phys_irq; - u32 irq; }; struct irq_phys_map_entry { @@ -305,9 +308,6 @@ struct vgic_cpu { unsigned long *active_shared; unsigned long *pend_act_shared; - /* Number of list registers on this CPU */ - int nr_lr; - /* CPU vif control registers for world switch */ union { struct vgic_v2_cpu_if vgic_v2; @@ -342,17 +342,18 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level); int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, - struct irq_phys_map *map, bool level); + unsigned int virt_irq, bool level); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, - int virt_irq, int irq); -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map); +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq); +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) #define vgic_ready(k) ((k)->arch.vgic.ready) +#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ + ((i) < (k)->arch.vgic.nr_irqs)) int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, const struct vgic_ops **ops, @@ -370,4 +371,5 @@ static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, } #endif +#endif /* old VGIC include */ #endif diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h new file mode 100644 index 000000000000..3fbd175265ae --- /dev/null +++ b/include/kvm/vgic/vgic.h @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2015, 2016 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_ARM_KVM_VGIC_VGIC_H +#define __ASM_ARM_KVM_VGIC_VGIC_H + +#include <linux/kernel.h> +#include <linux/kvm.h> +#include <linux/irqreturn.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <kvm/iodev.h> + +#define VGIC_V3_MAX_CPUS 255 +#define VGIC_V2_MAX_CPUS 8 +#define VGIC_NR_IRQS_LEGACY 256 +#define VGIC_NR_SGIS 16 +#define VGIC_NR_PPIS 16 +#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) +#define VGIC_MAX_PRIVATE (VGIC_NR_PRIVATE_IRQS - 1) +#define VGIC_MAX_SPI 1019 +#define VGIC_MAX_RESERVED 1023 +#define VGIC_MIN_LPI 8192 + +enum vgic_type { + VGIC_V2, /* Good ol' GICv2 */ + VGIC_V3, /* New fancy GICv3 */ +}; + +/* same for all guests, as depending only on the _host's_ GIC model */ +struct vgic_global { + /* type of the host GIC */ + enum vgic_type type; + + /* Physical address of vgic virtual cpu interface */ + phys_addr_t vcpu_base; + + /* virtual control interface mapping */ + void __iomem *vctrl_base; + + /* Number of implemented list registers */ + int nr_lr; + + /* Maintenance IRQ number */ + unsigned int maint_irq; + + /* maximum number of VCPUs allowed (GICv2 limits us to 8) */ + int max_gic_vcpus; + + /* Only needed for the legacy KVM_CREATE_IRQCHIP */ + bool can_emulate_gicv2; +}; + +extern struct vgic_global kvm_vgic_global_state; + +#define VGIC_V2_MAX_LRS (1 << 6) +#define VGIC_V3_MAX_LRS 16 +#define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) + +enum vgic_irq_config { + VGIC_CONFIG_EDGE = 0, + VGIC_CONFIG_LEVEL +}; + +struct vgic_irq { + spinlock_t irq_lock; /* Protects the content of the struct */ + struct list_head ap_list; + + struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU + * SPIs and LPIs: The VCPU whose ap_list + * this is queued on. + */ + + struct kvm_vcpu *target_vcpu; /* The VCPU that this interrupt should + * be sent to, as a result of the + * targets reg (v2) or the + * affinity reg (v3). + */ + + u32 intid; /* Guest visible INTID */ + bool pending; + bool line_level; /* Level only */ + bool soft_pending; /* Level only */ + bool active; /* not used for LPIs */ + bool enabled; + bool hw; /* Tied to HW IRQ */ + u32 hwintid; /* HW INTID number */ + union { + u8 targets; /* GICv2 target VCPUs mask */ + u32 mpidr; /* GICv3 target VCPU */ + }; + u8 source; /* GICv2 SGIs only */ + u8 priority; + enum vgic_irq_config config; /* Level or edge */ +}; + +struct vgic_register_region; + +struct vgic_io_device { + gpa_t base_addr; + struct kvm_vcpu *redist_vcpu; + const struct vgic_register_region *regions; + int nr_regions; + struct kvm_io_device dev; +}; + +struct vgic_dist { + bool in_kernel; + bool ready; + bool initialized; + + /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */ + u32 vgic_model; + + int nr_spis; + + /* TODO: Consider moving to global state */ + /* Virtual control interface mapping */ + void __iomem *vctrl_base; + + /* base addresses in guest physical address space: */ + gpa_t vgic_dist_base; /* distributor */ + union { + /* either a GICv2 CPU interface */ + gpa_t vgic_cpu_base; + /* or a number of GICv3 redistributor regions */ + gpa_t vgic_redist_base; + }; + + /* distributor enabled */ + bool enabled; + + struct vgic_irq *spis; + + struct vgic_io_device dist_iodev; + struct vgic_io_device *redist_iodevs; +}; + +struct vgic_v2_cpu_if { + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u64 vgic_eisr; /* Saved only */ + u64 vgic_elrsr; /* Saved only */ + u32 vgic_apr; + u32 vgic_lr[VGIC_V2_MAX_LRS]; +}; + +struct vgic_v3_cpu_if { +#ifdef CONFIG_KVM_ARM_VGIC_V3 + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_sre; /* Restored only, change ignored */ + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr; /* Saved only */ + u32 vgic_elrsr; /* Saved only */ + u32 vgic_ap0r[4]; + u32 vgic_ap1r[4]; + u64 vgic_lr[VGIC_V3_MAX_LRS]; +#endif +}; + +struct vgic_cpu { + /* CPU vif control registers for world switch */ + union { + struct vgic_v2_cpu_if vgic_v2; + struct vgic_v3_cpu_if vgic_v3; + }; + + unsigned int used_lrs; + struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; + + spinlock_t ap_list_lock; /* Protects the ap_list */ + + /* + * List of IRQs that this VCPU should consider because they are either + * Active or Pending (hence the name; AP list), or because they recently + * were one of the two and need to be migrated off this list to another + * VCPU. + */ + struct list_head ap_list_head; + + u64 live_lrs; +}; + +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); +void kvm_vgic_early_init(struct kvm *kvm); +int kvm_vgic_create(struct kvm *kvm, u32 type); +void kvm_vgic_destroy(struct kvm *kvm); +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); +int kvm_vgic_map_resources(struct kvm *kvm); +int kvm_vgic_hyp_init(void); + +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, + bool level); +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, + bool level); +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); + +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); + +#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) +#define vgic_initialized(k) ((k)->arch.vgic.initialized) +#define vgic_ready(k) ((k)->arch.vgic.ready) +#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ + ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) + +bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); + +#ifdef CONFIG_KVM_ARM_VGIC_V3 +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); +#else +static inline void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) +{ +} +#endif + +/** + * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW + * + * The host's GIC naturally limits the maximum amount of VCPUs a guest + * can use. + */ +static inline int kvm_vgic_get_max_vcpus(void) +{ + return kvm_vgic_global_state.max_gic_vcpus; +} + +#endif /* __ASM_ARM_KVM_VGIC_VGIC_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1fd8fdff2f81..3d9cf326574f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -768,6 +768,17 @@ static inline void rq_flush_dcache_pages(struct request *rq) } #endif +#ifdef CONFIG_PRINTK +#define vfs_msg(sb, level, fmt, ...) \ + __vfs_msg(sb, level, fmt, ##__VA_ARGS__) +#else +#define vfs_msg(sb, level, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __vfs_msg(sb, "", " "); \ +} while (0) +#endif + extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); @@ -1660,7 +1671,7 @@ struct block_device_operations { int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); long (*direct_access)(struct block_device *, sector_t, void __pmem **, - pfn_t *); + pfn_t *, long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ @@ -1680,6 +1691,8 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); +extern int bdev_dax_supported(struct super_block *, int); +extern bool bdev_dax_capable(struct block_device *); #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h index b827e066e55a..146507df8650 100644 --- a/include/linux/ceph/ceph_frag.h +++ b/include/linux/ceph/ceph_frag.h @@ -51,11 +51,11 @@ static inline __u32 ceph_frag_make_child(__u32 f, int by, int i) return ceph_frag_make(newbits, ceph_frag_value(f) | (i << (24 - newbits))); } -static inline int ceph_frag_is_leftmost(__u32 f) +static inline bool ceph_frag_is_leftmost(__u32 f) { return ceph_frag_value(f) == 0; } -static inline int ceph_frag_is_rightmost(__u32 f) +static inline bool ceph_frag_is_rightmost(__u32 f) { return ceph_frag_value(f) == ceph_frag_mask(f); } diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 37f28bf55ce4..dfce616002ad 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -153,8 +153,9 @@ struct ceph_dir_layout { /* watch-notify operations */ enum { - WATCH_NOTIFY = 1, /* notifying watcher */ - WATCH_NOTIFY_COMPLETE = 2, /* notifier notified when done */ + CEPH_WATCH_EVENT_NOTIFY = 1, /* notifying watcher */ + CEPH_WATCH_EVENT_NOTIFY_COMPLETE = 2, /* notifier notified when done */ + CEPH_WATCH_EVENT_DISCONNECT = 3, /* we were disconnected */ }; @@ -207,6 +208,8 @@ struct ceph_mon_subscribe_ack { struct ceph_fsid fsid; } __attribute__ ((packed)); +#define CEPH_FS_CLUSTER_ID_NONE -1 + /* * mdsmap flags */ @@ -344,6 +347,18 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_XATTR_REPLACE (1 << 1) #define CEPH_XATTR_REMOVE (1 << 31) +/* + * readdir request flags; + */ +#define CEPH_READDIR_REPLY_BITFLAGS (1<<0) + +/* + * readdir reply flags. + */ +#define CEPH_READDIR_FRAG_END (1<<0) +#define CEPH_READDIR_FRAG_COMPLETE (1<<8) +#define CEPH_READDIR_HASH_ORDER (1<<9) + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ @@ -361,6 +376,7 @@ union ceph_mds_request_args { __le32 frag; /* which dir fragment */ __le32 max_entries; /* how many dentries to grab */ __le32 max_bytes; + __le16 flags; } __attribute__ ((packed)) readdir; struct { __le32 mode; diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index a6ef9cc267ec..19e9932f3e77 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -47,7 +47,7 @@ static inline void ceph_decode_copy(void **p, void *pv, size_t n) /* * bounds check input. */ -static inline int ceph_has_room(void **p, void *end, size_t n) +static inline bool ceph_has_room(void **p, void *end, size_t n) { return end >= *p && n <= end - *p; } diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index db92a8d4926e..690985daad1c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -180,6 +180,63 @@ static inline int calc_pages_for(u64 off, u64 len) (off >> PAGE_SHIFT); } +/* + * These are not meant to be generic - an integer key is assumed. + */ +#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +static void insert_##name(struct rb_root *root, type *t) \ +{ \ + struct rb_node **n = &root->rb_node; \ + struct rb_node *parent = NULL; \ + \ + BUG_ON(!RB_EMPTY_NODE(&t->nodefld)); \ + \ + while (*n) { \ + type *cur = rb_entry(*n, type, nodefld); \ + \ + parent = *n; \ + if (t->keyfld < cur->keyfld) \ + n = &(*n)->rb_left; \ + else if (t->keyfld > cur->keyfld) \ + n = &(*n)->rb_right; \ + else \ + BUG(); \ + } \ + \ + rb_link_node(&t->nodefld, parent, n); \ + rb_insert_color(&t->nodefld, root); \ +} \ +static void erase_##name(struct rb_root *root, type *t) \ +{ \ + BUG_ON(RB_EMPTY_NODE(&t->nodefld)); \ + rb_erase(&t->nodefld, root); \ + RB_CLEAR_NODE(&t->nodefld); \ +} + +#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \ +static type *lookup_##name(struct rb_root *root, \ + typeof(((type *)0)->keyfld) key) \ +{ \ + struct rb_node *n = root->rb_node; \ + \ + while (n) { \ + type *cur = rb_entry(n, type, nodefld); \ + \ + if (key < cur->keyfld) \ + n = n->rb_left; \ + else if (key > cur->keyfld) \ + n = n->rb_right; \ + else \ + return cur; \ + } \ + \ + return NULL; \ +} + +#define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \ +DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) + extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index e230e7ed60d3..e2a92df08b47 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -39,20 +39,31 @@ struct ceph_mon_request { ceph_monc_request_func_t do_request; }; +typedef void (*ceph_monc_callback_t)(struct ceph_mon_generic_request *); + /* * ceph_mon_generic_request is being used for the statfs and * mon_get_version requests which are being done a bit differently * because we need to get data back to the caller */ struct ceph_mon_generic_request { + struct ceph_mon_client *monc; struct kref kref; u64 tid; struct rb_node node; int result; - void *buf; + struct completion completion; + ceph_monc_callback_t complete_cb; + u64 private_data; /* r_tid/linger_id */ + struct ceph_msg *request; /* original request */ struct ceph_msg *reply; /* and reply */ + + union { + struct ceph_statfs *st; + u64 newest; + } u; }; struct ceph_mon_client { @@ -77,7 +88,6 @@ struct ceph_mon_client { /* pending generic requests */ struct rb_root generic_request_tree; - int num_generic_requests; u64 last_tid; /* subs, indexed with CEPH_SUB_* */ @@ -86,6 +96,7 @@ struct ceph_mon_client { bool want; u32 have; /* epoch */ } subs[3]; + int fs_cluster_id; /* "mdsmap.<id>" sub */ #ifdef CONFIG_DEBUG_FS struct dentry *debugfs_file; @@ -116,16 +127,18 @@ extern const char *ceph_sub_str[]; bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch, bool continuous); void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch); +void ceph_monc_renew_subs(struct ceph_mon_client *monc); -extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, unsigned long timeout); extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf); -extern int ceph_monc_do_get_version(struct ceph_mon_client *monc, - const char *what, u64 *newest); +int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, + u64 *newest); +int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what, + ceph_monc_callback_t cb, u64 private_data); extern int ceph_monc_open_session(struct ceph_mon_client *monc); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index cbf460927c42..19b14862d3e0 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -20,10 +20,11 @@ struct ceph_osd_client; /* * completion callback for async writepages */ -typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, - struct ceph_msg *); +typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool); +#define CEPH_HOMELESS_OSD -1 + /* a given osd we're communicating with */ struct ceph_osd { atomic_t o_ref; @@ -32,16 +33,15 @@ struct ceph_osd { int o_incarnation; struct rb_node o_node; struct ceph_connection o_con; - struct list_head o_requests; - struct list_head o_linger_requests; + struct rb_root o_requests; + struct rb_root o_linger_requests; struct list_head o_osd_lru; struct ceph_auth_handshake o_auth; unsigned long lru_ttl; - int o_marked_for_keepalive; struct list_head o_keepalive_item; + struct mutex lock; }; - #define CEPH_OSD_SLAB_OPS 2 #define CEPH_OSD_MAX_OPS 16 @@ -104,76 +104,95 @@ struct ceph_osd_req_op { struct ceph_osd_data response_data; __u8 class_len; __u8 method_len; - __u8 argc; + u32 indata_len; } cls; struct { u64 cookie; - u64 ver; - u32 prot_ver; - u32 timeout; - __u8 flag; + __u8 op; /* CEPH_OSD_WATCH_OP_ */ + u32 gen; } watch; struct { + struct ceph_osd_data request_data; + } notify_ack; + struct { + u64 cookie; + struct ceph_osd_data request_data; + struct ceph_osd_data response_data; + } notify; + struct { u64 expected_object_size; u64 expected_write_size; } alloc_hint; }; }; +struct ceph_osd_request_target { + struct ceph_object_id base_oid; + struct ceph_object_locator base_oloc; + struct ceph_object_id target_oid; + struct ceph_object_locator target_oloc; + + struct ceph_pg pgid; + u32 pg_num; + u32 pg_num_mask; + struct ceph_osds acting; + struct ceph_osds up; + int size; + int min_size; + bool sort_bitwise; + + unsigned int flags; /* CEPH_OSD_FLAG_* */ + bool paused; + + int osd; +}; + /* an in-flight request */ struct ceph_osd_request { u64 r_tid; /* unique for this client */ struct rb_node r_node; - struct list_head r_req_lru_item; - struct list_head r_osd_item; - struct list_head r_linger_item; - struct list_head r_linger_osd_item; + struct rb_node r_mc_node; /* map check */ struct ceph_osd *r_osd; - struct ceph_pg r_pgid; - int r_pg_osds[CEPH_PG_MAX_SIZE]; - int r_num_pg_osds; + + struct ceph_osd_request_target r_t; +#define r_base_oid r_t.base_oid +#define r_base_oloc r_t.base_oloc +#define r_flags r_t.flags struct ceph_msg *r_request, *r_reply; - int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ /* request osd ops array */ unsigned int r_num_ops; - /* these are updated on each send */ - __le32 *r_request_osdmap_epoch; - __le32 *r_request_flags; - __le64 *r_request_pool; - void *r_request_pgid; - __le32 *r_request_attempts; - bool r_paused; - struct ceph_eversion *r_request_reassert_version; - int r_result; - int r_got_reply; - int r_linger; + bool r_got_reply; struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; - struct completion r_completion, r_safe_completion; + struct completion r_completion; + struct completion r_safe_completion; /* fsync waiter */ ceph_osdc_callback_t r_callback; ceph_osdc_unsafe_callback_t r_unsafe_callback; - struct ceph_eversion r_reassert_version; struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ void *r_priv; /* ditto */ - struct ceph_object_locator r_base_oloc; - struct ceph_object_id r_base_oid; - struct ceph_object_locator r_target_oloc; - struct ceph_object_id r_target_oid; - - u64 r_snapid; - unsigned long r_stamp; /* send OR check time */ + /* set by submitter */ + u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */ + struct ceph_snap_context *r_snapc; /* for writes */ + struct timespec r_mtime; /* ditto */ + u64 r_data_offset; /* ditto */ + bool r_linger; /* don't resend on failure */ - struct ceph_snap_context *r_snapc; /* snap context for writes */ + /* internal */ + unsigned long r_stamp; /* jiffies, send or check time */ + int r_attempts; + struct ceph_eversion r_replay_version; /* aka reassert_version */ + u32 r_last_force_resend; + u32 r_map_dne_bound; struct ceph_osd_req_op r_ops[]; }; @@ -182,44 +201,70 @@ struct ceph_request_redirect { struct ceph_object_locator oloc; }; -struct ceph_osd_event { - u64 cookie; - int one_shot; +typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie, + u64 notifier_id, void *data, size_t data_len); +typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err); + +struct ceph_osd_linger_request { struct ceph_osd_client *osdc; - void (*cb)(u64, u64, u8, void *); - void *data; - struct rb_node node; - struct list_head osd_node; + u64 linger_id; + bool committed; + bool is_watch; /* watch or notify */ + + struct ceph_osd *osd; + struct ceph_osd_request *reg_req; + struct ceph_osd_request *ping_req; + unsigned long ping_sent; + unsigned long watch_valid_thru; + struct list_head pending_lworks; + + struct ceph_osd_request_target t; + u32 last_force_resend; + u32 map_dne_bound; + + struct timespec mtime; + struct kref kref; -}; + struct mutex lock; + struct rb_node node; /* osd */ + struct rb_node osdc_node; /* osdc */ + struct rb_node mc_node; /* map check */ + struct list_head scan_item; + + struct completion reg_commit_wait; + struct completion notify_finish_wait; + int reg_commit_error; + int notify_finish_error; + int last_error; + + u32 register_gen; + u64 notify_id; + + rados_watchcb2_t wcb; + rados_watcherrcb_t errcb; + void *data; -struct ceph_osd_event_work { - struct work_struct work; - struct ceph_osd_event *event; - u64 ver; - u64 notify_id; - u8 opcode; + struct page ***preply_pages; + size_t *preply_len; }; struct ceph_osd_client { struct ceph_client *client; struct ceph_osdmap *osdmap; /* current map */ - struct rw_semaphore map_sem; - struct completion map_waiters; - u64 last_requested_map; + struct rw_semaphore lock; - struct mutex request_mutex; struct rb_root osds; /* osds */ struct list_head osd_lru; /* idle osds */ - u64 timeout_tid; /* tid of timeout triggering rq */ - u64 last_tid; /* tid of last request */ - struct rb_root requests; /* pending requests */ - struct list_head req_lru; /* in-flight lru */ - struct list_head req_unsent; /* unsent/need-resend queue */ - struct list_head req_notarget; /* map to no osd */ - struct list_head req_linger; /* lingering requests */ - int num_requests; + spinlock_t osd_lru_lock; + struct ceph_osd homeless_osd; + atomic64_t last_tid; /* tid of last request */ + u64 last_linger_id; + struct rb_root linger_requests; /* lingering requests */ + struct rb_root map_checks; + struct rb_root linger_map_checks; + atomic_t num_requests; + atomic_t num_homeless; struct delayed_work timeout_work; struct delayed_work osds_timeout_work; #ifdef CONFIG_DEBUG_FS @@ -231,10 +276,6 @@ struct ceph_osd_client { struct ceph_msgpool msgpool_op; struct ceph_msgpool msgpool_op_reply; - spinlock_t event_lock; - struct rb_root event_tree; - u64 event_count; - struct workqueue_struct *notify_wq; }; @@ -271,9 +312,6 @@ extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, extern struct ceph_osd_data *osd_req_op_extent_osd_data( struct ceph_osd_request *osd_req, unsigned int which); -extern struct ceph_osd_data *osd_req_op_cls_response_data( - struct ceph_osd_request *osd_req, - unsigned int which); extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, unsigned int which, @@ -309,9 +347,6 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode); -extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode, - u64 cookie, u64 version, int flag); extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, unsigned int which, u64 expected_object_size, @@ -322,11 +357,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client * unsigned int num_ops, bool use_mempool, gfp_t gfp_flags); - -extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, - struct ceph_snap_context *snapc, - u64 snap_id, - struct timespec *mtime); +int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, @@ -338,9 +369,6 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, u32 truncate_seq, u64 truncate_size, bool use_mempool); -extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, - struct ceph_osd_request *req); - extern void ceph_osdc_get_request(struct ceph_osd_request *req); extern void ceph_osdc_put_request(struct ceph_osd_request *req); @@ -353,6 +381,7 @@ extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, extern void ceph_osdc_sync(struct ceph_osd_client *osdc); extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); +void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc); extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_vino vino, @@ -371,11 +400,33 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct timespec *mtime, struct page **pages, int nr_pages); -/* watch/notify events */ -extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, - void (*event_cb)(u64, u64, u8, void *), - void *data, struct ceph_osd_event **pevent); -extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); -extern void ceph_osdc_put_event(struct ceph_osd_event *event); +/* watch/notify */ +struct ceph_osd_linger_request * +ceph_osdc_watch(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + rados_watchcb2_t wcb, + rados_watcherrcb_t errcb, + void *data); +int ceph_osdc_unwatch(struct ceph_osd_client *osdc, + struct ceph_osd_linger_request *lreq); + +int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + u64 notify_id, + u64 cookie, + void *payload, + size_t payload_len); +int ceph_osdc_notify(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + void *payload, + size_t payload_len, + u32 timeout, + struct page ***preply_pages, + size_t *preply_len); +int ceph_osdc_watch_check(struct ceph_osd_client *osdc, + struct ceph_osd_linger_request *lreq); #endif diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e55c08bc3a96..ddc426b22d81 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -24,21 +24,29 @@ struct ceph_pg { uint32_t seed; }; -#define CEPH_POOL_FLAG_HASHPSPOOL 1 +int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs); + +#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id + together */ +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ struct ceph_pg_pool_info { struct rb_node node; s64 id; - u8 type; + u8 type; /* CEPH_POOL_TYPE_* */ u8 size; + u8 min_size; u8 crush_ruleset; u8 object_hash; + u32 last_force_request_resend; u32 pg_num, pgp_num; int pg_num_mask, pgp_num_mask; s64 read_tier; s64 write_tier; /* wins for read+write ops */ - u64 flags; + u64 flags; /* CEPH_POOL_FLAG_* */ char *name; + + bool was_full; /* for handle_one_map() */ }; static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) @@ -57,6 +65,22 @@ struct ceph_object_locator { s64 pool; }; +static inline void ceph_oloc_init(struct ceph_object_locator *oloc) +{ + oloc->pool = -1; +} + +static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc) +{ + return oloc->pool == -1; +} + +static inline void ceph_oloc_copy(struct ceph_object_locator *dest, + const struct ceph_object_locator *src) +{ + dest->pool = src->pool; +} + /* * Maximum supported by kernel client object name length * @@ -64,11 +88,47 @@ struct ceph_object_locator { */ #define CEPH_MAX_OID_NAME_LEN 100 +/* + * 51-char inline_name is long enough for all cephfs and all but one + * rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be + * arbitrarily long (~PAGE_SIZE). It's done once during rbd map; all + * other rbd requests fit into inline_name. + * + * Makes ceph_object_id 64 bytes on 64-bit. + */ +#define CEPH_OID_INLINE_LEN 52 + +/* + * Both inline and external buffers have space for a NUL-terminator, + * which is carried around. It's not required though - RADOS object + * names don't have to be NUL-terminated and may contain NULs. + */ struct ceph_object_id { - char name[CEPH_MAX_OID_NAME_LEN]; + char *name; + char inline_name[CEPH_OID_INLINE_LEN]; int name_len; }; +static inline void ceph_oid_init(struct ceph_object_id *oid) +{ + oid->name = oid->inline_name; + oid->name_len = 0; +} + +static inline bool ceph_oid_empty(const struct ceph_object_id *oid) +{ + return oid->name == oid->inline_name && !oid->name_len; +} + +void ceph_oid_copy(struct ceph_object_id *dest, + const struct ceph_object_id *src); +__printf(2, 3) +void ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...); +__printf(3, 4) +int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp, + const char *fmt, ...); +void ceph_oid_destroy(struct ceph_object_id *oid); + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -87,7 +147,6 @@ struct ceph_pg_mapping { struct ceph_osdmap { struct ceph_fsid fsid; u32 epoch; - u32 mkfs_epoch; struct ceph_timespec created, modified; u32 flags; /* CEPH_OSDMAP_* */ @@ -113,43 +172,19 @@ struct ceph_osdmap { int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; }; -static inline void ceph_oid_set_name(struct ceph_object_id *oid, - const char *name) -{ - int len; - - len = strlen(name); - if (len > sizeof(oid->name)) { - WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n", - name, len, sizeof(oid->name)); - len = sizeof(oid->name); - } - - memcpy(oid->name, name, len); - oid->name_len = len; -} - -static inline void ceph_oid_copy(struct ceph_object_id *dest, - struct ceph_object_id *src) -{ - BUG_ON(src->name_len > sizeof(dest->name)); - memcpy(dest->name, src->name, src->name_len); - dest->name_len = src->name_len; -} - -static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) +static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd) { return osd >= 0 && osd < map->max_osd && (map->osd_state[osd] & CEPH_OSD_EXISTS); } -static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) +static inline bool ceph_osd_is_up(struct ceph_osdmap *map, int osd) { return ceph_osd_exists(map, osd) && (map->osd_state[osd] & CEPH_OSD_UP); } -static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd) +static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd) { return !ceph_osd_is_up(map, osd); } @@ -192,28 +227,59 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) return 0; } +struct ceph_osdmap *ceph_osdmap_alloc(void); extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); -extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, - struct ceph_osdmap *map, - struct ceph_messenger *msgr); +struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, + struct ceph_osdmap *map); extern void ceph_osdmap_destroy(struct ceph_osdmap *map); +struct ceph_osds { + int osds[CEPH_PG_MAX_SIZE]; + int size; + int primary; /* id, NOT index */ +}; + +static inline void ceph_osds_init(struct ceph_osds *set) +{ + set->size = 0; + set->primary = -1; +} + +void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src); + +bool ceph_is_new_interval(const struct ceph_osds *old_acting, + const struct ceph_osds *new_acting, + const struct ceph_osds *old_up, + const struct ceph_osds *new_up, + int old_size, + int new_size, + int old_min_size, + int new_min_size, + u32 old_pg_num, + u32 new_pg_num, + bool old_sort_bitwise, + bool new_sort_bitwise, + const struct ceph_pg *pgid); +bool ceph_osds_changed(const struct ceph_osds *old_acting, + const struct ceph_osds *new_acting, + bool any_change); + /* calculate mapping of a file extent to an object */ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); -/* calculate mapping of object to a placement group */ -extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, - struct ceph_object_locator *oloc, - struct ceph_object_id *oid, - struct ceph_pg *pg_out); - -extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, - struct ceph_pg pgid, - int *osds, int *primary); -extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, - struct ceph_pg pgid); +int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + struct ceph_pg *raw_pgid); + +void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap, + const struct ceph_pg *raw_pgid, + struct ceph_osds *up, + struct ceph_osds *acting); +int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap, + const struct ceph_pg *raw_pgid); extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2f822dca1046..5c0da61cb763 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -114,8 +114,8 @@ struct ceph_object_layout { * compound epoch+version, used by storage layer to serialize mutations */ struct ceph_eversion { - __le32 epoch; __le64 version; + __le32 epoch; } __attribute__ ((packed)); /* @@ -153,6 +153,11 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ #define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ #define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ +#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */ +#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */ +#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */ +#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */ +#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */ /* * The error code to return when an OSD can't handle a write @@ -389,6 +394,13 @@ enum { CEPH_OSD_FLAG_SKIPRWLOCKS = 0x10000, /* skip rw locks */ CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */ CEPH_OSD_FLAG_FLUSH = 0x40000, /* this is part of flush */ + CEPH_OSD_FLAG_MAP_SNAP_CLONE = 0x80000, /* map snap direct to clone id */ + CEPH_OSD_FLAG_ENFORCE_SNAPC = 0x100000, /* use snapc provided even if + pool uses pool snaps */ + CEPH_OSD_FLAG_REDIRECTED = 0x200000, /* op has been redirected */ + CEPH_OSD_FLAG_KNOWN_REDIR = 0x400000, /* redirect bit is authoritative */ + CEPH_OSD_FLAG_FULL_TRY = 0x800000, /* try op despite full flag */ + CEPH_OSD_FLAG_FULL_FORCE = 0x1000000, /* force op despite full flag */ }; enum { @@ -415,7 +427,17 @@ enum { CEPH_OSD_CMPXATTR_MODE_U64 = 2 }; -#define RADOS_NOTIFY_VER 1 +enum { + CEPH_OSD_WATCH_OP_UNWATCH = 0, + CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1, + /* note: use only ODD ids to prevent pre-giant code from + interpreting the op as UNWATCH */ + CEPH_OSD_WATCH_OP_WATCH = 3, + CEPH_OSD_WATCH_OP_RECONNECT = 5, + CEPH_OSD_WATCH_OP_PING = 7, +}; + +const char *ceph_osd_watch_op_name(int o); /* * an individual object operation. each may be accompanied by some data @@ -450,10 +472,14 @@ struct ceph_osd_op { } __attribute__ ((packed)) snap; struct { __le64 cookie; - __le64 ver; - __u8 flag; /* 0 = unwatch, 1 = watch */ + __le64 ver; /* no longer used */ + __u8 op; /* CEPH_OSD_WATCH_OP_* */ + __le32 gen; /* registration generation */ } __attribute__ ((packed)) watch; struct { + __le64 cookie; + } __attribute__ ((packed)) notify; + struct { __le64 offset, length; __le64 src_offset; } __attribute__ ((packed)) clonerange; diff --git a/include/linux/dax.h b/include/linux/dax.h index 982a6c4a62f3..43d5f0b799c7 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -3,45 +3,62 @@ #include <linux/fs.h> #include <linux/mm.h> +#include <linux/radix-tree.h> #include <asm/pgtable.h> +/* We use lowest available exceptional entry bit for locking */ +#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) + ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, get_block_t, dio_iodone_t, int flags); -int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, - dax_iodone_t); -int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, - dax_iodone_t); +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +void dax_wake_mapping_entry_waiter(struct address_space *mapping, + pgoff_t index, bool wake_all); #ifdef CONFIG_FS_DAX struct page *read_dax_sector(struct block_device *bdev, sector_t n); +void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index); +int __dax_zero_page_range(struct block_device *bdev, sector_t sector, + unsigned int offset, unsigned int length); #else static inline struct page *read_dax_sector(struct block_device *bdev, sector_t n) { return ERR_PTR(-ENXIO); } +/* Shouldn't ever be called when dax is disabled. */ +static inline void dax_unlock_mapping_entry(struct address_space *mapping, + pgoff_t index) +{ + BUG(); +} +static inline int __dax_zero_page_range(struct block_device *bdev, + sector_t sector, unsigned int offset, unsigned int length) +{ + return -ENXIO; +} #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, - unsigned int flags, get_block_t, dax_iodone_t); + unsigned int flags, get_block_t); int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, - unsigned int flags, get_block_t, dax_iodone_t); + unsigned int flags, get_block_t); #else static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, unsigned int flags, get_block_t gb, - dax_iodone_t di) + pmd_t *pmd, unsigned int flags, get_block_t gb) { return VM_FAULT_FALLBACK; } #define __dax_pmd_fault dax_pmd_fault #endif int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); -#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) -#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) +#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) +#define __dax_mkwrite(vma, vmf, gb) __dax_fault(vma, vmf, gb) static inline bool vma_is_dax(struct vm_area_struct *vma) { diff --git a/include/linux/err.h b/include/linux/err.h index 56762ab41713..1e3558845e4c 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -18,7 +18,7 @@ #ifndef __ASSEMBLY__ -#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) +#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) static inline void * __must_check ERR_PTR(long error) { diff --git a/include/linux/errno.h b/include/linux/errno.h index 89627b9187f9..7ce9fb1b7d28 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -28,5 +28,6 @@ #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ #define EIOCBQUEUED 529 /* iocb queued, will get completion event */ +#define ERECALLCONFLICT 530 /* conflict with recalled state */ #endif diff --git a/include/linux/export.h b/include/linux/export.h index 96e45ea463e7..2f9ccbe6a639 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -38,7 +38,7 @@ extern struct module __this_module; #ifdef CONFIG_MODULES -#ifndef __GENKSYMS__ +#if defined(__KERNEL__) && !defined(__GENKSYMS__) #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ @@ -53,7 +53,7 @@ extern struct module __this_module; #endif /* For every exported symbol, place a struct in the __ksymtab section */ -#define __EXPORT_SYMBOL(sym, sec) \ +#define ___EXPORT_SYMBOL(sym, sec) \ extern typeof(sym) sym; \ __CRC_SYMBOL(sym, sec) \ static const char __kstrtab_##sym[] \ @@ -65,6 +65,35 @@ extern struct module __this_module; __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ = { (unsigned long)&sym, __kstrtab_##sym } +#if defined(__KSYM_DEPS__) + +/* + * For fine grained build dependencies, we want to tell the build system + * about each possible exported symbol even if they're not actually exported. + * We use a string pattern that is unlikely to be valid code that the build + * system filters out from the preprocessor output (see ksym_dep_filter + * in scripts/Kbuild.include). + */ +#define __EXPORT_SYMBOL(sym, sec) === __KSYM_##sym === + +#elif defined(CONFIG_TRIM_UNUSED_KSYMS) + +#include <linux/kconfig.h> +#include <generated/autoksyms.h> + +#define __EXPORT_SYMBOL(sym, sec) \ + __cond_export_sym(sym, sec, config_enabled(__KSYM_##sym)) +#define __cond_export_sym(sym, sec, conf) \ + ___cond_export_sym(sym, sec, conf) +#define ___cond_export_sym(sym, sec, enabled) \ + __cond_export_sym_##enabled(sym, sec) +#define __cond_export_sym_1(sym, sec) ___EXPORT_SYMBOL(sym, sec) +#define __cond_export_sym_0(sym, sec) /* nothing */ + +#else +#define __EXPORT_SYMBOL ___EXPORT_SYMBOL +#endif + #define EXPORT_SYMBOL(sym) \ __EXPORT_SYMBOL(sym, "") diff --git a/include/linux/fs.h b/include/linux/fs.h index 62bdb0a6cf2d..dd288148a6b1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -74,7 +74,6 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); -typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 diff --git a/include/linux/iova.h b/include/linux/iova.h index 92f7177db2ce..f27bb2c62fca 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -19,8 +19,21 @@ /* iova structure */ struct iova { struct rb_node node; - unsigned long pfn_hi; /* IOMMU dish out addr hi */ - unsigned long pfn_lo; /* IOMMU dish out addr lo */ + unsigned long pfn_hi; /* Highest allocated pfn */ + unsigned long pfn_lo; /* Lowest allocated pfn */ +}; + +struct iova_magazine; +struct iova_cpu_rcache; + +#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ +#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ + +struct iova_rcache { + spinlock_t lock; + unsigned long depot_size; + struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + struct iova_cpu_rcache __percpu *cpu_rcaches; }; /* holds all the iova translations for a domain */ @@ -31,6 +44,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ }; static inline unsigned long iova_size(struct iova *iova) @@ -78,6 +92,10 @@ void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool size_aligned); +void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, + unsigned long size); +unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); @@ -87,5 +105,6 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); +void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); #endif diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 9e6fdd33bdb2..bfbd707de390 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -273,6 +273,12 @@ #define ICH_LR_ACTIVE_BIT (1ULL << 63) #define ICH_LR_PHYS_ID_SHIFT 32 #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) +#define ICH_LR_PRIORITY_SHIFT 48 + +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) #define ICH_MISR_EOI (1 << 0) #define ICH_MISR_U (1 << 1) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 9c940263ca23..fd051855539b 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -33,6 +33,7 @@ #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 +#define GIC_DIST_IIDR 0x008 #define GIC_DIST_IGROUP 0x080 #define GIC_DIST_ENABLE_SET 0x100 #define GIC_DIST_ENABLE_CLEAR 0x180 @@ -76,6 +77,7 @@ #define GICH_LR_VIRTUALID (0x3ff << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) #define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) +#define GICH_LR_PRIORITY_SHIFT 23 #define GICH_LR_STATE (3 << 28) #define GICH_LR_PENDING_BIT (1 << 28) #define GICH_LR_ACTIVE_BIT (1 << 29) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b1fa8f11c95b..1c9c973a7dd9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -412,6 +412,8 @@ struct kvm { #endif long tlbs_dirty; struct list_head devices; + struct dentry *debugfs_dentry; + struct kvm_stat_data **debugfs_stat_data; }; #define kvm_err(fmt, ...) \ @@ -991,6 +993,11 @@ enum kvm_stat_kind { KVM_STAT_VCPU, }; +struct kvm_stat_data { + int offset; + struct kvm *kvm; +}; + struct kvm_stats_debugfs_item { const char *name; int offset; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 20d8a5d4d133..5145620ba48a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -182,7 +182,7 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE -extern void register_page_bootmem_info_node(struct pglist_data *pgdat); +extern void __init register_page_bootmem_info_node(struct pglist_data *pgdat); #else static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 2835d598d258..5df5feb49575 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -303,6 +303,12 @@ struct vm_fault { * is set (which is also implied by * VM_FAULT_ERROR). */ + void *entry; /* ->fault handler can alternatively + * return locked DAX entry. In that + * case handler should return + * VM_FAULT_DAX_LOCKED and fill in + * entry here. + */ /* for ->map_pages() only */ pgoff_t max_pgoff; /* map pages for offset from pgoff till * max_pgoff inclusive */ @@ -1076,6 +1082,7 @@ static inline void clear_page_pfmemalloc(struct page *page) #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ +#define VM_FAULT_DAX_LOCKED 0x1000 /* ->fault has locked DAX entry */ #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ @@ -2011,7 +2018,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif /* These take the mm semaphore themselves */ -extern unsigned long __must_check vm_brk(unsigned long, unsigned long); +extern int __must_check vm_brk(unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d553855503e6..ca3e517980a0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -514,7 +514,9 @@ struct mm_struct { #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif +#ifdef CONFIG_MMU struct work_struct async_put_work; +#endif }; static inline void mm_init_cpumask(struct mm_struct *mm) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 85800b48241f..45cde8cd39f2 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -329,6 +329,7 @@ struct mmc_host { unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ unsigned int retune_now:1; /* do re-tuning at next req */ + unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ @@ -526,4 +527,7 @@ static inline void mmc_retune_recheck(struct mmc_host *host) host->retune_now = 1; } +void mmc_retune_pause(struct mmc_host *host); +void mmc_retune_unpause(struct mmc_host *host); + #endif /* LINUX_MMC_HOST_H */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 011433478a14..bfed6b367350 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -50,12 +50,27 @@ struct nfs4_label { typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; -struct nfs_stateid4 { - __be32 seqid; - char other[NFS4_STATEID_OTHER_SIZE]; -} __attribute__ ((packed)); +struct nfs4_stateid_struct { + union { + char data[NFS4_STATEID_SIZE]; + struct { + __be32 seqid; + char other[NFS4_STATEID_OTHER_SIZE]; + } __attribute__ ((packed)); + }; + + enum { + NFS4_INVALID_STATEID_TYPE = 0, + NFS4_SPECIAL_STATEID_TYPE, + NFS4_OPEN_STATEID_TYPE, + NFS4_LOCK_STATEID_TYPE, + NFS4_DELEGATION_STATEID_TYPE, + NFS4_LAYOUT_STATEID_TYPE, + NFS4_PNFS_DS_STATEID_TYPE, + } type; +}; -typedef struct nfs_stateid4 nfs4_stateid; +typedef struct nfs4_stateid_struct nfs4_stateid; enum nfs_opnum4 { OP_ACCESS = 3, @@ -504,6 +519,7 @@ enum { NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_LAYOUTSTATS, NFSPROC4_CLNT_CLONE, + NFSPROC4_CLNT_COPY, }; /* nfs41 types */ @@ -621,7 +637,9 @@ enum pnfs_update_layout_reason { PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, PNFS_UPDATE_LAYOUT_FOUND_CACHED, PNFS_UPDATE_LAYOUT_RETURN, + PNFS_UPDATE_LAYOUT_RETRY, PNFS_UPDATE_LAYOUT_BLOCKED, + PNFS_UPDATE_LAYOUT_INVALID_OPEN, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, }; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7fcc13c8cf1f..14a762d2734d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -246,5 +246,6 @@ struct nfs_server { #define NFS_CAP_DEALLOCATE (1U << 21) #define NFS_CAP_LAYOUTSTATS (1U << 22) #define NFS_CAP_CLONE (1U << 23) +#define NFS_CAP_COPY (1U << 24) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ee8491dadbf3..c304a11b5b1a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -233,7 +233,6 @@ struct nfs4_layoutget_args { struct inode *inode; struct nfs_open_context *ctx; nfs4_stateid stateid; - unsigned long timestamp; struct nfs4_layoutdriver_data layout; }; @@ -251,7 +250,6 @@ struct nfs4_layoutget { struct nfs4_layoutget_res res; struct rpc_cred *cred; gfp_t gfp_flags; - long timeout; }; struct nfs4_getdeviceinfo_args { @@ -1343,6 +1341,32 @@ struct nfs42_falloc_res { const struct nfs_server *falloc_server; }; +struct nfs42_copy_args { + struct nfs4_sequence_args seq_args; + + struct nfs_fh *src_fh; + nfs4_stateid src_stateid; + u64 src_pos; + + struct nfs_fh *dst_fh; + nfs4_stateid dst_stateid; + u64 dst_pos; + + u64 count; +}; + +struct nfs42_write_res { + u64 count; + struct nfs_writeverf verifier; +}; + +struct nfs42_copy_res { + struct nfs4_sequence_res seq_res; + struct nfs42_write_res write_res; + bool consecutive; + bool synchronous; +}; + struct nfs42_seek_args { struct nfs4_sequence_args seq_args; @@ -1431,7 +1455,7 @@ struct nfs_commit_completion_ops { }; struct nfs_commit_info { - spinlock_t *lock; /* inode->i_lock */ + struct inode *inode; /* Needed for inode->i_lock */ struct nfs_mds_commit_info *mds; struct pnfs_ds_commit_info *ds; struct nfs_direct_req *dreq; /* O_DIRECT request */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 44f33834ad78..1a827cecd62f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -61,6 +61,14 @@ struct perf_callchain_entry { __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ }; +struct perf_callchain_entry_ctx { + struct perf_callchain_entry *entry; + u32 max_stack; + u32 nr; + short contexts; + bool contexts_maxed; +}; + struct perf_raw_record { u32 size; void *data; @@ -1061,20 +1069,36 @@ extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); -extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, - bool crosstask, bool add_mark); + u32 max_stack, bool crosstask, bool add_mark); extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; + +static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip) +{ + if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { + struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; + ++ctx->contexts; + return 0; + } else { + ctx->contexts_maxed = true; + return -1; /* no more room, stop walking the stack */ + } +} -static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - if (entry->nr < sysctl_perf_event_max_stack) { + if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { + struct perf_callchain_entry *entry = ctx->entry; entry->ip[entry->nr++] = ip; + ++ctx->nr; return 0; } else { return -1; /* no more room, stop walking the stack */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 21c26e78aec5..6e42ada26345 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1539,6 +1539,7 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; + unsigned sched_remote_wakeup:1; unsigned :0; /* force alignment to the next boundary */ /* unserialized, strictly 'current' */ @@ -2744,10 +2745,12 @@ static inline bool mmget_not_zero(struct mm_struct *mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); -/* same as above but performs the slow path from the async kontext. Can +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can * be called from the atomic context as well */ extern void mmput_async(struct mm_struct *); +#endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index e0582106ef4f..7973a821ac58 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -277,7 +277,7 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) static inline int raw_read_seqcount_latch(seqcount_t *s) { - return lockless_dereference(s->sequence); + return lockless_dereference(s)->sequence; } /** @@ -331,7 +331,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * unsigned seq, idx; * * do { - * seq = lockless_dereference(latch->seq); + * seq = lockless_dereference(latch)->seq; * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 665cd0cd18b8..d1faa019c02a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -111,22 +111,6 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) } #endif - -/** - * virt_to_obj - returns address of the beginning of object. - * @s: object's kmem_cache - * @slab_page: address of slab page - * @x: address within object memory range - * - * Returns address of the beginning of object - */ -static inline void *virt_to_obj(struct kmem_cache *s, - const void *slab_page, - const void *x) -{ - return (void *)x - ((x - slab_page) % s->size); -} - void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason); diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 6a241a277249..899791573a40 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -127,7 +127,7 @@ struct rpc_authops { void (*destroy)(struct rpc_auth *); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); - struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); + struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); int (*list_pseudoflavors)(rpc_authflavor_t *, int); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); int (*flavor2info)(rpc_authflavor_t, @@ -167,6 +167,7 @@ void rpc_destroy_authunix(void); struct rpc_cred * rpc_lookup_cred(void); struct rpc_cred * rpc_lookup_cred_nonblock(void); +struct rpc_cred * rpc_lookup_generic_cred(struct auth_cred *, int, gfp_t); struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); @@ -178,7 +179,7 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, int rpcauth_get_gssinfo(rpc_authflavor_t, struct rpcsec_gss_info *); int rpcauth_list_flavors(rpc_authflavor_t *, int); -struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); +struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); @@ -201,9 +202,28 @@ char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline struct rpc_cred * get_rpccred(struct rpc_cred *cred) { - atomic_inc(&cred->cr_count); + if (cred != NULL) + atomic_inc(&cred->cr_count); return cred; } +/** + * get_rpccred_rcu - get a reference to a cred using rcu-protected pointer + * @cred: cred of which to take a reference + * + * In some cases, we may have a pointer to a credential to which we + * want to take a reference, but don't already have one. Because these + * objects are freed using RCU, we can access the cr_count while its + * on its way to destruction and only take a reference if it's not already + * zero. + */ +static inline struct rpc_cred * +get_rpccred_rcu(struct rpc_cred *cred) +{ + if (atomic_inc_not_zero(&cred->cr_count)) + return cred; + return NULL; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_H */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 9a7ddbaf116e..19c659d1c0f8 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -176,6 +176,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); int rpc_protocol(struct rpc_clnt *); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); +size_t rpc_max_bc_payload(struct rpc_clnt *); unsigned long rpc_get_timeout(struct rpc_clnt *clnt); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 807371357160..59cbf16eaeb5 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -158,9 +158,9 @@ typedef __be32 rpc_fraghdr; /* * Note that RFC 1833 does not put any size restrictions on the - * netid string, but all currently defined netid's fit in 4 bytes. + * netid string, but all currently defined netid's fit in 5 bytes. */ -#define RPCBIND_MAXNETIDLEN (4u) +#define RPCBIND_MAXNETIDLEN (5u) /* * Universal addresses are introduced in RFC 1833 and further spelled diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index fb0d212e0d3a..5aa3834619a8 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -142,6 +142,7 @@ struct rpc_xprt_ops { int (*bc_setup)(struct rpc_xprt *xprt, unsigned int min_reqs); int (*bc_up)(struct svc_serv *serv, struct net *net); + size_t (*bc_maxpayload)(struct rpc_xprt *xprt); void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_destroy)(struct rpc_xprt *xprt, unsigned int max_reqs); diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 767190b01363..39267dc3486a 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -52,7 +52,9 @@ #define RPCRDMA_DEF_SLOT_TABLE (128U) #define RPCRDMA_MAX_SLOT_TABLE (256U) -#define RPCRDMA_DEF_INLINE (1024) /* default inline max */ +#define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ +#define RPCRDMA_DEF_INLINE (1024) /* default inline thresh */ +#define RPCRDMA_MAX_INLINE (3068) /* max inline thresh */ /* Memory registration strategies, by number. * This is part of a kernel / user space API. Do not remove. */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 1b8a5a7876ce..e45abe7db9a6 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -340,6 +340,7 @@ struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); int (*get_trend)(void *, long *); int (*set_emul_temp)(void *, int); + int (*set_trip_temp)(void *, int, int); }; /** diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 526fb3d2e43a..f28292d73ddb 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -108,7 +108,7 @@ TRACE_EVENT(kvm_ioapic_set_irq, __entry->coalesced = coalesced; ), - TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s", + TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s", __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), (__entry->e & (1<<11)) ? "logical" : "physical", @@ -129,7 +129,7 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, __entry->e = e; ), - TP_printk("dst %x vec=%u (%s|%s|%s%s)", + TP_printk("dst %x vec %u (%s|%s|%s%s)", (u8)(__entry->e >> 56), (u8)__entry->e, __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), (__entry->e & (1<<11)) ? "logical" : "physical", @@ -151,7 +151,7 @@ TRACE_EVENT(kvm_msi_set_irq, __entry->data = data; ), - TP_printk("dst %u vec %x (%s|%s|%s%s)", + TP_printk("dst %u vec %u (%s|%s|%s%s)", (u8)(__entry->address >> 12), (u8)__entry->data, __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), (__entry->address & (1<<2)) ? "logical" : "physical", diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index c4b2a3f90829..50ff21f748b6 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -61,5 +61,6 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) #define NVME_IOCTL_RESET _IO('N', 0x44) #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) +#define NVME_IOCTL_RESCAN _IO('N', 0x46) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 43fc8d213472..36ce552cf6a9 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -862,6 +862,7 @@ enum perf_event_type { }; #define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index ad66589f2ae6..3a2a79401789 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -16,6 +16,7 @@ #include <linux/videodev2.h> #include <linux/bitmap.h> #include <linux/fb.h> +#include <linux/of.h> #include <media/v4l2-mediabus.h> #include <video/videomode.h> @@ -345,6 +346,7 @@ struct ipu_client_platformdata { int dc; int dp; int dma[2]; + struct device_node *of_node; }; #endif /* __DRM_IPU_H__ */ |