diff options
Diffstat (limited to 'include')
236 files changed, 4692 insertions, 3626 deletions
diff --git a/include/acpi/acpi_io.h b/include/acpi/acpi_io.h index a0212e67d6f4..027faa8883aa 100644 --- a/include/acpi/acpi_io.h +++ b/include/acpi/acpi_io.h @@ -14,14 +14,6 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, } #endif -#ifndef acpi_os_memmap -static inline void __iomem *acpi_os_memmap(acpi_physical_address phys, - acpi_size size) -{ - return ioremap_cache(phys, size); -} -#endif - extern bool acpi_permanent_mmap; void __iomem __ref diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 4dec4ed138cd..73e806fe7ce7 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -9,18 +9,27 @@ #include <linux/mailbox_controller.h> #include <linux/mailbox_client.h> +struct pcc_mbox_chan { + struct mbox_chan *mchan; + u64 shmem_base_addr; + u64 shmem_size; + u32 latency; + u32 max_access_rate; + u16 min_turnaround_time; +}; + #define MAX_PCC_SUBSPACES 256 #ifdef CONFIG_PCC -extern struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, - int subspace_id); -extern void pcc_mbox_free_channel(struct mbox_chan *chan); +extern struct pcc_mbox_chan * +pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id); +extern void pcc_mbox_free_channel(struct pcc_mbox_chan *chan); #else -static inline struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, - int subspace_id) +static inline struct pcc_mbox_chan * +pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) { return ERR_PTR(-ENODEV); } -static inline void pcc_mbox_free_channel(struct mbox_chan *chan) { } +static inline void pcc_mbox_free_channel(struct pcc_mbox_chan *chan) { } #endif #endif /* _PCC_H */ diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index fb172a03a753..20ecb004f5a4 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -22,9 +22,14 @@ typedef __builtin_va_list va_list; #define va_arg(v, l) __builtin_va_arg(v, l) #define va_copy(d, s) __builtin_va_copy(d, s) #else +#ifdef __KERNEL__ #include <linux/stdarg.h> -#endif -#endif +#else +/* Used to build acpi tools */ +#include <stdarg.h> +#endif /* __KERNEL__ */ +#endif /* ACPI_USE_BUILTIN_STDARG */ +#endif /* ! va_arg */ #define ACPI_INLINE __inline__ diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index 4a674db4e1fa..fedc0dfa4877 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -49,9 +49,15 @@ static inline void flush_cache_page(struct vm_area_struct *vma, static inline void flush_dcache_page(struct page *page) { } + +static inline void flush_dcache_folio(struct folio *folio) { } #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO #endif +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio); +#endif #ifndef flush_dcache_mmap_lock static inline void flush_dcache_mmap_lock(struct address_space *mapping) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index e93375c710b9..7ce93aaf69f8 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -957,7 +957,7 @@ static inline void __iomem *ioremap(phys_addr_t offset, size_t size) #ifndef iounmap #define iounmap iounmap -static inline void iounmap(void __iomem *addr) +static inline void iounmap(volatile void __iomem *addr) { } #endif @@ -1023,16 +1023,7 @@ static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) port &= IO_SPACE_LIMIT; return (port > MMIO_UPPER_LIMIT) ? NULL : PCI_IOBASE + port; } -#define __pci_ioport_unmap __pci_ioport_unmap -static inline void __pci_ioport_unmap(void __iomem *p) -{ - uintptr_t start = (uintptr_t) PCI_IOBASE; - uintptr_t addr = (uintptr_t) p; - - if (addr >= start && addr < start + IO_SPACE_LIMIT) - return; - iounmap(p); -} +#define ARCH_HAS_GENERIC_IOPORT_MAP #endif #ifndef ioport_unmap @@ -1048,21 +1039,10 @@ extern void ioport_unmap(void __iomem *p); #endif /* CONFIG_HAS_IOPORT_MAP */ #ifndef CONFIG_GENERIC_IOMAP -struct pci_dev; -extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); - -#ifndef __pci_ioport_unmap -static inline void __pci_ioport_unmap(void __iomem *p) {} -#endif - #ifndef pci_iounmap -#define pci_iounmap pci_iounmap -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) -{ - __pci_ioport_unmap(p); -} +#define ARCH_WANTS_GENERIC_PCI_IOUNMAP +#endif #endif -#endif /* CONFIG_GENERIC_IOMAP */ #ifndef xlate_dev_mem_ptr #define xlate_dev_mem_ptr xlate_dev_mem_ptr diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 9b3eb6d86200..08237ae8b840 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -110,16 +110,6 @@ static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size) } #endif -#ifdef CONFIG_PCI -/* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */ -struct pci_dev; -extern void pci_iounmap(struct pci_dev *dev, void __iomem *); -#elif defined(CONFIG_GENERIC_IOMAP) -struct pci_dev; -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) -{ } -#endif - #include <asm-generic/pci_iomap.h> #endif diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c1ab6a6e72b5..d3eae6cdbacb 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -197,10 +197,12 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } -static inline int cpumask_to_vpset(struct hv_vpset *vpset, - const struct cpumask *cpus) +static inline int __cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus, + bool exclude_self) { int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + int this_cpu = smp_processor_id(); /* valid_bank_mask can represent up to 64 banks */ if (hv_max_vp_index / 64 >= 64) @@ -218,6 +220,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, * Some banks may end up being empty but this is acceptable. */ for_each_cpu(cpu, cpus) { + if (exclude_self && cpu == this_cpu) + continue; vcpu = hv_cpu_number_to_vp_number(cpu); if (vcpu == VP_INVAL) return -1; @@ -232,6 +236,19 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, return nr_bank; } +static inline int cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + return __cpumask_to_vpset(vpset, cpus, false); +} + +static inline int cpumask_to_vpset_noself(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + WARN_ON_ONCE(preemptible()); + return __cpumask_to_vpset(vpset, cpus, true); +} + void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h index df636c6d8e6c..5a2f9bf53384 100644 --- a/include/asm-generic/pci_iomap.h +++ b/include/asm-generic/pci_iomap.h @@ -18,6 +18,7 @@ extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, extern void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar, unsigned long offset, unsigned long maxlen); +extern void pci_iounmap(struct pci_dev *dev, void __iomem *); /* Create a virtual mapping cookie for a port on a given PCI device. * Do not call this directly, it exists to make it easier for architectures * to override */ @@ -50,6 +51,8 @@ static inline void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar, { return NULL; } +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ } #endif #endif /* __ASM_GENERIC_PCI_IOMAP_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index aa50bf2959fe..8771c435f34b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -116,11 +116,7 @@ * GCC 4.5 and later have a 32 bytes section alignment for structures. * Except GCC 4.9, that feels the need to align on 64 bytes. */ -#if __GNUC__ == 4 && __GNUC_MINOR__ == 9 -#define STRUCT_ALIGNMENT 64 -#else #define STRUCT_ALIGNMENT 32 -#endif #define STRUCT_ALIGN() . = ALIGN(STRUCT_ALIGNMENT) /* @@ -168,16 +164,22 @@ * Need to also make ftrace_stub_graph point to ftrace_stub * so that the same stub location may have different protocols * and not mess up with C verifiers. + * + * ftrace_ops_list_func will be defined as arch_ftrace_ops_list_func + * as some archs will have a different prototype for that function + * but ftrace_ops_list_func() will have a single prototype. */ #define MCOUNT_REC() . = ALIGN(8); \ __start_mcount_loc = .; \ KEEP(*(__mcount_loc)) \ KEEP(*(__patchable_function_entries)) \ __stop_mcount_loc = .; \ - ftrace_stub_graph = ftrace_stub; + ftrace_stub_graph = ftrace_stub; \ + ftrace_ops_list_func = arch_ftrace_ops_list_func; #else # ifdef CONFIG_FUNCTION_TRACER -# define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; +# define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; \ + ftrace_ops_list_func = arch_ftrace_ops_list_func; # else # define MCOUNT_REC() # endif diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 73c7139c866f..e715bdb720d5 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -24,7 +24,7 @@ enum arch_timer_reg { ARCH_TIMER_REG_CTRL, - ARCH_TIMER_REG_TVAL, + ARCH_TIMER_REG_CVAL, }; enum arch_timer_ppi_nr { diff --git a/include/kunit/test.h b/include/kunit/test.h index 24b40e5c160b..018e776a34b9 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -613,7 +613,7 @@ void kunit_remove_resource(struct kunit *test, struct kunit_resource *res); * and is automatically cleaned up after the test case concludes. See &struct * kunit_resource for more information. */ -void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t flags); +void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp); /** * kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*. @@ -657,9 +657,9 @@ static inline void *kunit_kzalloc(struct kunit *test, size_t size, gfp_t gfp) * * See kcalloc() and kunit_kmalloc_array() for more information. */ -static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t flags) +static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t gfp) { - return kunit_kmalloc_array(test, n, size, flags | __GFP_ZERO); + return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO); } void kunit_cleanup(struct kunit *test); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 864b9997efb2..90f21898aad8 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); -int kvm_pmu_probe_pmuver(void); #else struct kvm_pmu { }; @@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) return 0; } -static inline int kvm_pmu_probe_pmuver(void) { return 0xf; } - #endif #endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 974d497a897d..fbc2146050a4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1353,6 +1353,7 @@ static inline int lpit_read_residency_count_address(u64 *address) #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); +int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); int find_acpi_cpu_cache_topology(unsigned int cpu, int level); @@ -1365,6 +1366,10 @@ static inline int find_acpi_cpu_topology(unsigned int cpu, int level) { return -EINVAL; } +static inline int find_acpi_cpu_topology_cluster(unsigned int cpu) +{ + return -EINVAL; +} static inline int find_acpi_cpu_topology_package(unsigned int cpu) { return -EINVAL; diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 71881a2b6f78..5deaddbd7927 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -15,6 +15,10 @@ struct inode; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); +struct file *anon_inode_getfile_secure(const char *name, + const struct file_operations *fops, + void *priv, int flags, + const struct inode *context_inode); int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags); int anon_inode_getfd_secure(const char *name, diff --git a/include/linux/apple-mailbox.h b/include/linux/apple-mailbox.h new file mode 100644 index 000000000000..720fbb70294a --- /dev/null +++ b/include/linux/apple-mailbox.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* + * Apple mailbox message format + * + * Copyright (C) 2021 The Asahi Linux Contributors + */ + +#ifndef _LINUX_APPLE_MAILBOX_H_ +#define _LINUX_APPLE_MAILBOX_H_ + +#include <linux/types.h> + +/* encodes a single 96bit message sent over the single channel */ +struct apple_mbox_msg { + u64 msg0; + u32 msg1; +}; + +#endif diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index f180240dc95f..b97cea83b25e 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -62,10 +62,12 @@ void topology_set_thermal_pressure(const struct cpumask *cpus, struct cpu_topology { int thread_id; int core_id; + int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; + cpumask_t cluster_sibling; cpumask_t llc_sibling; }; @@ -73,13 +75,16 @@ struct cpu_topology { extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id) +#define topology_cluster_id(cpu) (cpu_topology[cpu].cluster_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) +#define topology_cluster_cpumask(cpu) (&cpu_topology[cpu].cluster_sibling) #define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_sibling) void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +const struct cpumask *cpu_clustergroup_mask(int cpu); void update_siblings_masks(unsigned int cpu); void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 7d1cabe15262..63ccb5252190 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -321,10 +321,20 @@ asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0); * from register 0 to 3 on return from the SMC instruction. An optional * quirk structure provides vendor specific behavior. */ +#ifdef CONFIG_HAVE_ARM_SMCCC asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, unsigned long a6, unsigned long a7, struct arm_smccc_res *res, struct arm_smccc_quirk *quirk); +#else +static inline void __arm_smccc_smc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, unsigned long a4, + unsigned long a5, unsigned long a6, unsigned long a7, + struct arm_smccc_res *res, struct arm_smccc_quirk *quirk) +{ + *res = (struct arm_smccc_res){}; +} +#endif /** * __arm_smccc_hvc() - make HVC calls diff --git a/include/linux/ata.h b/include/linux/ata.h index 1b44f40c7700..199e47e97d64 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -329,6 +329,7 @@ enum { ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, + ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device SATA settings log:*/ ATA_LOG_DEVSLP_OFFSET = 0x30, diff --git a/include/linux/audit.h b/include/linux/audit.h index 0e0eb4c0fa10..d06134ac6245 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -287,7 +287,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t) /* These are defined in auditsc.c */ /* Public API */ extern int audit_alloc(struct task_struct *task); +extern int audit_alloc_kernel(struct task_struct *task); extern void __audit_free(struct task_struct *task); +extern void __audit_uring_entry(u8 op); +extern void __audit_uring_exit(int success, long code); extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3); extern void __audit_syscall_exit(int ret_success, long ret_value); @@ -324,6 +327,21 @@ static inline void audit_free(struct task_struct *task) if (unlikely(task->audit_context)) __audit_free(task); } +static inline void audit_uring_entry(u8 op) +{ + /* + * We intentionally check audit_context() before audit_enabled as most + * Linux systems (as of ~2021) rely on systemd which forces audit to + * be enabled regardless of the user's audit configuration. + */ + if (unlikely(audit_context() && audit_enabled)) + __audit_uring_entry(op); +} +static inline void audit_uring_exit(int success, long code) +{ + if (unlikely(!audit_dummy_context())) + __audit_uring_exit(success, code); +} static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) @@ -562,8 +580,16 @@ static inline int audit_alloc(struct task_struct *task) { return 0; } +static inline int audit_alloc_kernel(struct task_struct *task) +{ + return 0; +} static inline void audit_free(struct task_struct *task) { } +static inline void audit_uring_entry(u8 op) +{ } +static inline void audit_uring_exit(int success, long code) +{ } static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ac7f231b8825..9c14f0a8dbe5 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -12,13 +12,13 @@ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/sched.h> -#include <linux/blkdev.h> #include <linux/device.h> #include <linux/writeback.h> -#include <linux/blk-cgroup.h> #include <linux/backing-dev-defs.h> #include <linux/slab.h> +struct blkcg; + static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) { kref_get(&bdi->refcnt); @@ -64,7 +64,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) return atomic_long_read(&bdi->tot_write_bandwidth); } -static inline void __add_wb_stat(struct bdi_writeback *wb, +static inline void wb_stat_mod(struct bdi_writeback *wb, enum wb_stat_item item, s64 amount) { percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); @@ -72,12 +72,12 @@ static inline void __add_wb_stat(struct bdi_writeback *wb, static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - __add_wb_stat(wb, item, 1); + wb_stat_mod(wb, item, 1); } static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - __add_wb_stat(wb, item, -1); + wb_stat_mod(wb, item, -1); } static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) @@ -133,20 +133,7 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb) return test_bit(WB_writeback_running, &wb->state); } -static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) -{ - struct super_block *sb; - - if (!inode) - return &noop_backing_dev_info; - - sb = inode->i_sb; -#ifdef CONFIG_BLOCK - if (sb_is_blkdev_sb(sb)) - return I_BDEV(inode)->bd_disk->bdi; -#endif - return sb->s_bdi; -} +struct backing_dev_info *inode_to_bdi(struct inode *inode); static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) { diff --git a/include/linux/bio.h b/include/linux/bio.h index 00952e92eae1..fe6bdfbbef66 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -6,19 +6,10 @@ #define __LINUX_BIO_H #include <linux/mempool.h> -#include <linux/ioprio.h> /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include <linux/blk_types.h> #include <linux/uio.h> -#define BIO_DEBUG - -#ifdef BIO_DEBUG -#define BIO_BUG_ON BUG_ON -#else -#define BIO_BUG_ON -#endif - #define BIO_MAX_VECS 256U static inline unsigned int bio_max_segs(unsigned int nr_segs) @@ -78,22 +69,6 @@ static inline bool bio_no_advance_iter(const struct bio *bio) bio_op(bio) == REQ_OP_WRITE_ZEROES; } -static inline bool bio_mergeable(struct bio *bio) -{ - if (bio->bi_opf & REQ_NOMERGE_FLAGS) - return false; - - return true; -} - -static inline unsigned int bio_cur_bytes(struct bio *bio) -{ - if (bio_has_data(bio)) - return bio_iovec(bio).bv_len; - else /* dataless requests such as discard */ - return bio->bi_iter.bi_size; -} - static inline void *bio_data(struct bio *bio) { if (bio_has_data(bio)) @@ -102,25 +77,6 @@ static inline void *bio_data(struct bio *bio) return NULL; } -/** - * bio_full - check if the bio is full - * @bio: bio to check - * @len: length of one segment to be added - * - * Return true if @bio is full and one segment with @len bytes can't be - * added to the bio, otherwise return false - */ -static inline bool bio_full(struct bio *bio, unsigned len) -{ - if (bio->bi_vcnt >= bio->bi_max_vecs) - return true; - - if (bio->bi_iter.bi_size > UINT_MAX - len) - return true; - - return false; -} - static inline bool bio_next_segment(const struct bio *bio, struct bvec_iter_all *iter) { @@ -163,6 +119,28 @@ static inline void bio_advance_iter_single(const struct bio *bio, bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); } +void __bio_advance(struct bio *, unsigned bytes); + +/** + * bio_advance - increment/complete a bio by some number of bytes + * @bio: bio to advance + * @bytes: number of bytes to complete + * + * This updates bi_sector, bi_size and bi_idx; if the number of bytes to + * complete doesn't align with a bvec boundary, then bv_len and bv_offset will + * be updated on the last bvec as well. + * + * @bio will then represent the remaining, uncompleted portion of the io. + */ +static inline void bio_advance(struct bio *bio, unsigned int nbytes) +{ + if (nbytes == bio->bi_iter.bi_size) { + bio->bi_iter.bi_size = 0; + return; + } + __bio_advance(bio, nbytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ @@ -265,37 +243,6 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit) bio->bi_flags &= ~(1U << bit); } -static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) -{ - *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); -} - -static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) -{ - struct bvec_iter iter = bio->bi_iter; - int idx; - - bio_get_first_bvec(bio, bv); - if (bv->bv_len == bio->bi_iter.bi_size) - return; /* this bio only has a single bvec */ - - bio_advance_iter(bio, &iter, iter.bi_size); - - if (!iter.bi_bvec_done) - idx = iter.bi_idx - 1; - else /* in the middle of bvec */ - idx = iter.bi_idx; - - *bv = bio->bi_io_vec[idx]; - - /* - * iter.bi_bvec_done records actual length of the last bvec - * if this bio ends in the middle of one io vector - */ - if (iter.bi_bvec_done) - bv->bv_len = iter.bi_bvec_done; -} - static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); @@ -424,7 +371,7 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); } -extern blk_qc_t submit_bio(struct bio *); +void submit_bio(struct bio *bio); extern void bio_endio(struct bio *); @@ -456,8 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; extern int submit_bio_wait(struct bio *bio); -extern void bio_advance(struct bio *, unsigned); - extern void bio_init(struct bio *bio, struct bio_vec *table, unsigned short max_vecs); extern void bio_uninit(struct bio *); @@ -469,12 +414,11 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); int bio_add_zone_append_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); -bool __bio_try_merge_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int off, bool *same_page); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); -void bio_release_pages(struct bio *bio, bool mark_dirty); +void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter); +void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); @@ -482,27 +426,16 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); -void bio_truncate(struct bio *bio, unsigned new_size); void guard_bio_eod(struct bio *bio); void zero_fill_bio(struct bio *bio); -extern const char *bio_devname(struct bio *bio, char *buffer); +static inline void bio_release_pages(struct bio *bio, bool mark_dirty) +{ + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + __bio_release_pages(bio, mark_dirty); +} -#define bio_set_dev(bio, bdev) \ -do { \ - bio_clear_flag(bio, BIO_REMAPPED); \ - if ((bio)->bi_bdev != (bdev)) \ - bio_clear_flag(bio, BIO_THROTTLED); \ - (bio)->bi_bdev = (bdev); \ - bio_associate_blkg(bio); \ -} while (0) - -#define bio_copy_dev(dst, src) \ -do { \ - bio_clear_flag(dst, BIO_REMAPPED); \ - (dst)->bi_bdev = (src)->bi_bdev; \ - bio_clone_blkg_association(dst, src); \ -} while (0) +extern const char *bio_devname(struct bio *bio, char *buffer); #define bio_dev(bio) \ disk_devt((bio)->bi_bdev->bd_disk) @@ -521,6 +454,22 @@ static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ +static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) +{ + bio_clear_flag(bio, BIO_REMAPPED); + if (bio->bi_bdev != bdev) + bio_clear_flag(bio, BIO_THROTTLED); + bio->bi_bdev = bdev; + bio_associate_blkg(bio); +} + +static inline void bio_copy_dev(struct bio *dst, struct bio *src) +{ + bio_clear_flag(dst, BIO_REMAPPED); + dst->bi_bdev = src->bi_bdev; + bio_clone_blkg_association(dst, src); +} + /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * @@ -784,7 +733,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, */ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { - bio->bi_opf |= REQ_HIPRI; + bio->bi_opf |= REQ_POLLED; if (!is_sync_kiocb(kiocb)) bio->bi_opf |= REQ_NOWAIT; } diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h new file mode 100644 index 000000000000..bbab65bd5428 --- /dev/null +++ b/include/linux/blk-crypto-profile.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_BLK_CRYPTO_PROFILE_H +#define __LINUX_BLK_CRYPTO_PROFILE_H + +#include <linux/bio.h> +#include <linux/blk-crypto.h> + +struct blk_crypto_profile; + +/** + * struct blk_crypto_ll_ops - functions to control inline encryption hardware + * + * Low-level operations for controlling inline encryption hardware. This + * interface must be implemented by storage drivers that support inline + * encryption. All functions may sleep, are serialized by profile->lock, and + * are never called while profile->dev (if set) is runtime-suspended. + */ +struct blk_crypto_ll_ops { + + /** + * @keyslot_program: Program a key into the inline encryption hardware. + * + * Program @key into the specified @slot in the inline encryption + * hardware, overwriting any key that the keyslot may already contain. + * The keyslot is guaranteed to not be in-use by any I/O. + * + * This is required if the device has keyslots. Otherwise (i.e. if the + * device is a layered device, or if the device is real hardware that + * simply doesn't have the concept of keyslots) it is never called. + * + * Must return 0 on success, or -errno on failure. + */ + int (*keyslot_program)(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot); + + /** + * @keyslot_evict: Evict a key from the inline encryption hardware. + * + * If the device has keyslots, this function must evict the key from the + * specified @slot. The slot will contain @key, but there should be no + * need for the @key argument to be used as @slot should be sufficient. + * The keyslot is guaranteed to not be in-use by any I/O. + * + * If the device doesn't have keyslots itself, this function must evict + * @key from any underlying devices. @slot won't be valid in this case. + * + * If there are no keyslots and no underlying devices, this function + * isn't required. + * + * Must return 0 on success, or -errno on failure. + */ + int (*keyslot_evict)(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot); +}; + +/** + * struct blk_crypto_profile - inline encryption profile for a device + * + * This struct contains a storage device's inline encryption capabilities (e.g. + * the supported crypto algorithms), driver-provided functions to control the + * inline encryption hardware (e.g. programming and evicting keys), and optional + * device-independent keyslot management data. + */ +struct blk_crypto_profile { + + /* public: Drivers must initialize the following fields. */ + + /** + * @ll_ops: Driver-provided functions to control the inline encryption + * hardware, e.g. program and evict keys. + */ + struct blk_crypto_ll_ops ll_ops; + + /** + * @max_dun_bytes_supported: The maximum number of bytes supported for + * specifying the data unit number (DUN). Specifically, the range of + * supported DUNs is 0 through (1 << (8 * max_dun_bytes_supported)) - 1. + */ + unsigned int max_dun_bytes_supported; + + /** + * @modes_supported: Array of bitmasks that specifies whether each + * combination of crypto mode and data unit size is supported. + * Specifically, the i'th bit of modes_supported[crypto_mode] is set if + * crypto_mode can be used with a data unit size of (1 << i). Note that + * only data unit sizes that are powers of 2 can be supported. + */ + unsigned int modes_supported[BLK_ENCRYPTION_MODE_MAX]; + + /** + * @dev: An optional device for runtime power management. If the driver + * provides this device, it will be runtime-resumed before any function + * in @ll_ops is called and will remain resumed during the call. + */ + struct device *dev; + + /* private: The following fields shouldn't be accessed by drivers. */ + + /* Number of keyslots, or 0 if not applicable */ + unsigned int num_slots; + + /* + * Serializes all calls to functions in @ll_ops as well as all changes + * to @slot_hashtable. This can also be taken in read mode to look up + * keyslots while ensuring that they can't be changed concurrently. + */ + struct rw_semaphore lock; + + /* List of idle slots, with least recently used slot at front */ + wait_queue_head_t idle_slots_wait_queue; + struct list_head idle_slots; + spinlock_t idle_slots_lock; + + /* + * Hash table which maps struct *blk_crypto_key to keyslots, so that we + * can find a key's keyslot in O(1) time rather than O(num_slots). + * Protected by 'lock'. + */ + struct hlist_head *slot_hashtable; + unsigned int log_slot_ht_size; + + /* Per-keyslot data */ + struct blk_crypto_keyslot *slots; +}; + +int blk_crypto_profile_init(struct blk_crypto_profile *profile, + unsigned int num_slots); + +int devm_blk_crypto_profile_init(struct device *dev, + struct blk_crypto_profile *profile, + unsigned int num_slots); + +unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot); + +blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + struct blk_crypto_keyslot **slot_ptr); + +void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot); + +bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, + const struct blk_crypto_config *cfg); + +int __blk_crypto_evict_key(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key); + +void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile); + +void blk_crypto_profile_destroy(struct blk_crypto_profile *profile); + +void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent, + const struct blk_crypto_profile *child); + +bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target, + const struct blk_crypto_profile *reference); + +void blk_crypto_update_capabilities(struct blk_crypto_profile *dst, + const struct blk_crypto_profile *src); + +#endif /* __LINUX_BLK_CRYPTO_PROFILE_H */ diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h new file mode 100644 index 000000000000..8a038ea0717e --- /dev/null +++ b/include/linux/blk-integrity.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BLK_INTEGRITY_H +#define _LINUX_BLK_INTEGRITY_H + +#include <linux/blk-mq.h> + +struct request; + +enum blk_integrity_flags { + BLK_INTEGRITY_VERIFY = 1 << 0, + BLK_INTEGRITY_GENERATE = 1 << 1, + BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, + BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, +}; + +struct blk_integrity_iter { + void *prot_buf; + void *data_buf; + sector_t seed; + unsigned int data_size; + unsigned short interval; + const char *disk_name; +}; + +typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); +typedef void (integrity_prepare_fn) (struct request *); +typedef void (integrity_complete_fn) (struct request *, unsigned int); + +struct blk_integrity_profile { + integrity_processing_fn *generate_fn; + integrity_processing_fn *verify_fn; + integrity_prepare_fn *prepare_fn; + integrity_complete_fn *complete_fn; + const char *name; +}; + +#ifdef CONFIG_BLK_DEV_INTEGRITY +void blk_integrity_register(struct gendisk *, struct blk_integrity *); +void blk_integrity_unregister(struct gendisk *); +int blk_integrity_compare(struct gendisk *, struct gendisk *); +int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, + struct scatterlist *); +int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); + +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + struct blk_integrity *bi = &disk->queue->integrity; + + if (!bi->profile) + return NULL; + + return bi; +} + +static inline struct blk_integrity * +bdev_get_integrity(struct block_device *bdev) +{ + return blk_get_integrity(bdev->bd_disk); +} + +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) +{ + return q->integrity.profile; +} + +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ + q->limits.max_integrity_segments = segs; +} + +static inline unsigned short +queue_max_integrity_segments(const struct request_queue *q) +{ + return q->limits.max_integrity_segments; +} + +/** + * bio_integrity_intervals - Return number of integrity intervals for a bio + * @bi: blk_integrity profile for device + * @sectors: Size of the bio in 512-byte sectors + * + * Description: The block layer calculates everything in 512 byte + * sectors but integrity metadata is done in terms of the data integrity + * interval size of the storage device. Convert the block layer sectors + * to the appropriate number of integrity intervals. + */ +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) +{ + return sectors >> (bi->interval_exp - 9); +} + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return bio_integrity_intervals(bi, sectors) * bi->tuple_size; +} + +static inline bool blk_integrity_rq(struct request *rq) +{ + return rq->cmd_flags & REQ_INTEGRITY; +} + +/* + * Return the first bvec that contains integrity data. Only drivers that are + * limited to a single integrity segment should use this helper. + */ +static inline struct bio_vec *rq_integrity_vec(struct request *rq) +{ + if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) + return NULL; + return rq->bio->bi_integrity->bip_vec; +} +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static inline int blk_rq_count_integrity_sg(struct request_queue *q, + struct bio *b) +{ + return 0; +} +static inline int blk_rq_map_integrity_sg(struct request_queue *q, + struct bio *b, + struct scatterlist *s) +{ + return 0; +} +static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) +{ + return NULL; +} +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + return NULL; +} +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) +{ + return false; +} +static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) +{ + return 0; +} +static inline void blk_integrity_register(struct gendisk *d, + struct blk_integrity *b) +{ +} +static inline void blk_integrity_unregister(struct gendisk *d) +{ +} +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ +} +static inline unsigned short +queue_max_integrity_segments(const struct request_queue *q) +{ + return 0; +} + +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) +{ + return 0; +} + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return 0; +} +static inline int blk_integrity_rq(struct request *rq) +{ + return 0; +} + +static inline struct bio_vec *rq_integrity_vec(struct request *rq) +{ + return NULL; +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ +#endif /* _LINUX_BLK_INTEGRITY_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 13ba1861e688..8682663e7368 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -6,10 +6,226 @@ #include <linux/sbitmap.h> #include <linux/srcu.h> #include <linux/lockdep.h> +#include <linux/scatterlist.h> +#include <linux/prefetch.h> struct blk_mq_tags; struct blk_flush_queue; +#define BLKDEV_MIN_RQ 4 +#define BLKDEV_DEFAULT_RQ 128 + +typedef void (rq_end_io_fn)(struct request *, blk_status_t); + +/* + * request flags */ +typedef __u32 __bitwise req_flags_t; + +/* drive already may have started this one */ +#define RQF_STARTED ((__force req_flags_t)(1 << 1)) +/* may not be passed by ioscheduler */ +#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) +/* request for flush sequence */ +#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) +/* merge of different types, fail separately */ +#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) +/* track inflight for MQ */ +#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) +/* don't call prep for this one */ +#define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) +/* vaguely specified driver internal error. Ignored by the block layer */ +#define RQF_FAILED ((__force req_flags_t)(1 << 10)) +/* don't warn about errors */ +#define RQF_QUIET ((__force req_flags_t)(1 << 11)) +/* elevator private data attached */ +#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) +/* account into disk and partition IO statistics */ +#define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) +/* runtime pm request */ +#define RQF_PM ((__force req_flags_t)(1 << 15)) +/* on IO scheduler merge hash */ +#define RQF_HASHED ((__force req_flags_t)(1 << 16)) +/* track IO completion time */ +#define RQF_STATS ((__force req_flags_t)(1 << 17)) +/* Look at ->special_vec for the actual data payload instead of the + bio chain. */ +#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) +/* The per-zone write lock is held for this request */ +#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) +/* already slept for hybrid poll */ +#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) +/* ->timeout has been called, don't expire again */ +#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) +/* queue has elevator attached */ +#define RQF_ELV ((__force req_flags_t)(1 << 22)) + +/* flags that prevent us from merging requests: */ +#define RQF_NOMERGE_FLAGS \ + (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) + +enum mq_rq_state { + MQ_RQ_IDLE = 0, + MQ_RQ_IN_FLIGHT = 1, + MQ_RQ_COMPLETE = 2, +}; + +/* + * Try to put the fields that are referenced together in the same cacheline. + * + * If you modify this structure, make sure to update blk_rq_init() and + * especially blk_mq_rq_ctx_init() to take care of the added fields. + */ +struct request { + struct request_queue *q; + struct blk_mq_ctx *mq_ctx; + struct blk_mq_hw_ctx *mq_hctx; + + unsigned int cmd_flags; /* op and common flags */ + req_flags_t rq_flags; + + int tag; + int internal_tag; + + unsigned int timeout; + + /* the following two fields are internal, NEVER access directly */ + unsigned int __data_len; /* total data len */ + sector_t __sector; /* sector cursor */ + + struct bio *bio; + struct bio *biotail; + + union { + struct list_head queuelist; + struct request *rq_next; + }; + + struct gendisk *rq_disk; + struct block_device *part; +#ifdef CONFIG_BLK_RQ_ALLOC_TIME + /* Time that the first bio started allocating this request. */ + u64 alloc_time_ns; +#endif + /* Time that this request was allocated for this IO. */ + u64 start_time_ns; + /* Time that I/O was submitted to the device. */ + u64 io_start_time_ns; + +#ifdef CONFIG_BLK_WBT + unsigned short wbt_flags; +#endif + /* + * rq sectors used for blk stats. It has the same value + * with blk_rq_sectors(rq), except that it never be zeroed + * by completion. + */ + unsigned short stats_sectors; + + /* + * Number of scatter-gather DMA addr+len pairs after + * physical address coalescing is performed. + */ + unsigned short nr_phys_segments; + +#ifdef CONFIG_BLK_DEV_INTEGRITY + unsigned short nr_integrity_segments; +#endif + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct bio_crypt_ctx *crypt_ctx; + struct blk_crypto_keyslot *crypt_keyslot; +#endif + + unsigned short write_hint; + unsigned short ioprio; + + enum mq_rq_state state; + refcount_t ref; + + unsigned long deadline; + + /* + * The hash is used inside the scheduler, and killed once the + * request reaches the dispatch list. The ipi_list is only used + * to queue the request for softirq completion, which is long + * after the request has been unhashed (and even removed from + * the dispatch list). + */ + union { + struct hlist_node hash; /* merge hash */ + struct llist_node ipi_list; + }; + + /* + * The rb_node is only used inside the io scheduler, requests + * are pruned when moved to the dispatch queue. So let the + * completion_data share space with the rb_node. + */ + union { + struct rb_node rb_node; /* sort/lookup */ + struct bio_vec special_vec; + void *completion_data; + int error_count; /* for legacy drivers, don't use */ + }; + + + /* + * Three pointers are available for the IO schedulers, if they need + * more they have to dynamically allocate it. Flush requests are + * never put on the IO scheduler. So let the flush fields share + * space with the elevator data. + */ + union { + struct { + struct io_cq *icq; + void *priv[2]; + } elv; + + struct { + unsigned int seq; + struct list_head list; + rq_end_io_fn *saved_end_io; + } flush; + }; + + union { + struct __call_single_data csd; + u64 fifo_time; + }; + + /* + * completion callback. + */ + rq_end_io_fn *end_io; + void *end_io_data; +}; + +#define req_op(req) \ + ((req)->cmd_flags & REQ_OP_MASK) + +static inline bool blk_rq_is_passthrough(struct request *rq) +{ + return blk_op_is_passthrough(req_op(rq)); +} + +static inline unsigned short req_get_ioprio(struct request *req) +{ + return req->ioprio; +} + +#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) + +#define rq_dma_dir(rq) \ + (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) + +enum blk_eh_timer_return { + BLK_EH_DONE, /* drivers has completed the command */ + BLK_EH_RESET_TIMER, /* reset timer and try again */ +}; + +#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ +#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ + /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware * block device @@ -126,9 +342,6 @@ struct blk_mq_hw_ctx { unsigned long queued; /** @run: Number of dispatched requests. */ unsigned long run; -#define BLK_MQ_MAX_DISPATCH_ORDER 7 - /** @dispatched: Number of dispatch requests by queue. */ - unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; @@ -148,13 +361,6 @@ struct blk_mq_hw_ctx { /** @kobj: Kernel object for sysfs. */ struct kobject kobj; - /** @poll_considered: Count times blk_poll() was called. */ - unsigned long poll_considered; - /** @poll_invoked: Count how many requests blk_poll() polled. */ - unsigned long poll_invoked; - /** @poll_success: Count how many polled requests were completed. */ - unsigned long poll_success; - #ifdef CONFIG_BLK_DEBUG_FS /** * @debugfs_dir: debugfs directory for this hardware queue. Named @@ -232,13 +438,11 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. - * @active_queues_shared_sbitmap: - * number of active request queues per tag set. - * @__bitmap_tags: A shared tags sbitmap, used over all hctx's - * @__breserved_tags: - * A shared reserved tags sbitmap, used over all hctx's * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. + * @shared_tags: + * Shared set of tags. Has @nr_hw_queues elements. If set, + * shared by all @tags. * @tag_list_lock: Serializes tag_list accesses. * @tag_list: List of the request queues that use this tag set. See also * request_queue.tag_set_list. @@ -255,12 +459,11 @@ struct blk_mq_tag_set { unsigned int timeout; unsigned int flags; void *driver_data; - atomic_t active_queues_shared_sbitmap; - struct sbitmap_queue __bitmap_tags; - struct sbitmap_queue __breserved_tags; struct blk_mq_tags **tags; + struct blk_mq_tags *shared_tags; + struct mutex tag_list_lock; struct list_head tag_list; }; @@ -330,7 +533,7 @@ struct blk_mq_ops { /** * @poll: Called to poll for completion of a specific tag. */ - int (*poll)(struct blk_mq_hw_ctx *); + int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *); /** * @complete: Mark the request as complete. @@ -364,11 +567,6 @@ struct blk_mq_ops { unsigned int); /** - * @initialize_rq_fn: Called from inside blk_get_request(). - */ - void (*initialize_rq_fn)(struct request *rq); - - /** * @cleanup_rq: Called before freeing one request which isn't completed * yet, and usually for freeing the driver private data. */ @@ -432,6 +630,8 @@ enum { ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ << BLK_MQ_F_ALLOC_POLICY_START_BIT) +#define BLK_MQ_NO_HCTX_IDX (-1U) + struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, struct lock_class_key *lkclass); #define blk_mq_alloc_disk(set, queuedata) \ @@ -451,8 +651,6 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, unsigned int set_flags); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); -void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); - void blk_mq_free_request(struct request *rq); bool blk_mq_queue_inflight(struct request_queue *q); @@ -471,7 +669,40 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, struct request *blk_mq_alloc_request_hctx(struct request_queue *q, unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx); -struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); + +/* + * Tag address space map. + */ +struct blk_mq_tags { + unsigned int nr_tags; + unsigned int nr_reserved_tags; + + atomic_t active_queues; + + struct sbitmap_queue bitmap_tags; + struct sbitmap_queue breserved_tags; + + struct request **rqs; + struct request **static_rqs; + struct list_head page_list; + + /* + * used to clear request reference in rqs[] before freeing one + * request pool + */ + spinlock_t lock; +}; + +static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, + unsigned int tag) +{ + if (tag < tags->nr_tags) { + prefetch(tags->rqs[tag]); + return tags->rqs[tag]; + } + + return NULL; +} enum { BLK_MQ_UNIQUE_TAG_BITS = 16, @@ -524,6 +755,35 @@ static inline void blk_mq_set_request_complete(struct request *rq) void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); +void blk_mq_end_request_batch(struct io_comp_batch *ib); + +/* + * Only need start/end time stamping if we have iostat or + * blk stats enabled, or using an IO scheduler. + */ +static inline bool blk_mq_need_time_stamp(struct request *rq) +{ + return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); +} + +/* + * Batched completions only work when there is no I/O error and no special + * ->end_io handler. + */ +static inline bool blk_mq_add_to_batch(struct request *req, + struct io_comp_batch *iob, int ioerror, + void (*complete)(struct io_comp_batch *)) +{ + if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror) + return false; + if (!iob->complete) + iob->complete = complete; + else if (iob->complete != complete) + return false; + iob->need_ts |= blk_mq_need_time_stamp(req); + rq_list_add(&iob->req_list, req); + return true; +} void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); @@ -605,16 +865,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) -static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag != -1) - return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); - - return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | - BLK_QC_T_INTERNAL; -} - static inline void blk_mq_cleanup_rq(struct request *rq) { if (rq->q->mq_ops->cleanup_rq) @@ -633,8 +883,265 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, rq->rq_disk = bio->bi_bdev->bd_disk; } -blk_qc_t blk_mq_submit_bio(struct bio *bio); void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, struct lock_class_key *key); +static inline bool rq_is_sync(struct request *rq) +{ + return op_is_sync(rq->cmd_flags); +} + +void blk_rq_init(struct request_queue *q, struct request *rq); +int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); +void blk_rq_unprep_clone(struct request *rq); +blk_status_t blk_insert_cloned_request(struct request_queue *q, + struct request *rq); + +struct rq_map_data { + struct page **pages; + int page_order; + int nr_entries; + unsigned long offset; + int null_mapped; + int from_user; +}; + +int blk_rq_map_user(struct request_queue *, struct request *, + struct rq_map_data *, void __user *, unsigned long, gfp_t); +int blk_rq_map_user_iov(struct request_queue *, struct request *, + struct rq_map_data *, const struct iov_iter *, gfp_t); +int blk_rq_unmap_user(struct bio *); +int blk_rq_map_kern(struct request_queue *, struct request *, void *, + unsigned int, gfp_t); +int blk_rq_append_bio(struct request *rq, struct bio *bio); +void blk_execute_rq_nowait(struct gendisk *, struct request *, int, + rq_end_io_fn *); +blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq, + int at_head); + +struct req_iterator { + struct bvec_iter iter; + struct bio *bio; +}; + +#define __rq_for_each_bio(_bio, rq) \ + if ((rq->bio)) \ + for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) + +#define rq_for_each_segment(bvl, _rq, _iter) \ + __rq_for_each_bio(_iter.bio, _rq) \ + bio_for_each_segment(bvl, _iter.bio, _iter.iter) + +#define rq_for_each_bvec(bvl, _rq, _iter) \ + __rq_for_each_bio(_iter.bio, _rq) \ + bio_for_each_bvec(bvl, _iter.bio, _iter.iter) + +#define rq_iter_last(bvec, _iter) \ + (_iter.bio->bi_next == NULL && \ + bio_iter_last(bvec, _iter.iter)) + +/* + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_err_bytes() : bytes left till the next error boundary + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment + * blk_rq_stats_sectors() : sectors of the entire request used for stats + */ +static inline sector_t blk_rq_pos(const struct request *rq) +{ + return rq->__sector; +} + +static inline unsigned int blk_rq_bytes(const struct request *rq) +{ + return rq->__data_len; +} + +static inline int blk_rq_cur_bytes(const struct request *rq) +{ + if (!rq->bio) + return 0; + if (!bio_has_data(rq->bio)) /* dataless requests such as discard */ + return rq->bio->bi_iter.bi_size; + return bio_iovec(rq->bio).bv_len; +} + +unsigned int blk_rq_err_bytes(const struct request *rq); + +static inline unsigned int blk_rq_sectors(const struct request *rq) +{ + return blk_rq_bytes(rq) >> SECTOR_SHIFT; +} + +static inline unsigned int blk_rq_cur_sectors(const struct request *rq) +{ + return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; +} + +static inline unsigned int blk_rq_stats_sectors(const struct request *rq) +{ + return rq->stats_sectors; +} + +/* + * Some commands like WRITE SAME have a payload or data transfer size which + * is different from the size of the request. Any driver that supports such + * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to + * calculate the data transfer size. + */ +static inline unsigned int blk_rq_payload_bytes(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return rq->special_vec.bv_len; + return blk_rq_bytes(rq); +} + +/* + * Return the first full biovec in the request. The caller needs to check that + * there are any bvecs before calling this helper. + */ +static inline struct bio_vec req_bvec(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return rq->special_vec; + return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); +} + +static inline unsigned int blk_rq_count_bios(struct request *rq) +{ + unsigned int nr_bios = 0; + struct bio *bio; + + __rq_for_each_bio(bio, rq) + nr_bios++; + + return nr_bios; +} + +void blk_steal_bios(struct bio_list *list, struct request *rq); + +/* + * Request completion related functions. + * + * blk_update_request() completes given number of bytes and updates + * the request without completing it. + */ +bool blk_update_request(struct request *rq, blk_status_t error, + unsigned int nr_bytes); +void blk_abort_request(struct request *); + +/* + * Number of physical segments as sent to the device. + * + * Normally this is the number of discontiguous data segments sent by the + * submitter. But for data-less command like discard we might have no + * actual data segments submitted, but the driver might have to add it's + * own special payload. In that case we still return 1 here so that this + * special payload will be mapped. + */ +static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return 1; + return rq->nr_phys_segments; +} + +/* + * Number of discard segments (or ranges) the driver needs to fill in. + * Each discard bio merged into a request is counted as one segment. + */ +static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) +{ + return max_t(unsigned short, rq->nr_phys_segments, 1); +} + +int __blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist, struct scatterlist **last_sg); +static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist) +{ + struct scatterlist *last_sg = NULL; + + return __blk_rq_map_sg(q, rq, sglist, &last_sg); +} +void blk_dump_rq_flags(struct request *, char *); + +#ifdef CONFIG_BLK_DEV_ZONED +static inline unsigned int blk_rq_zone_no(struct request *rq) +{ + return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); +} + +static inline unsigned int blk_rq_zone_is_seq(struct request *rq) +{ + return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); +} + +bool blk_req_needs_zone_write_lock(struct request *rq); +bool blk_req_zone_write_trylock(struct request *rq); +void __blk_req_zone_write_lock(struct request *rq); +void __blk_req_zone_write_unlock(struct request *rq); + +static inline void blk_req_zone_write_lock(struct request *rq) +{ + if (blk_req_needs_zone_write_lock(rq)) + __blk_req_zone_write_lock(rq); +} + +static inline void blk_req_zone_write_unlock(struct request *rq) +{ + if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) + __blk_req_zone_write_unlock(rq); +} + +static inline bool blk_req_zone_is_write_locked(struct request *rq) +{ + return rq->q->seq_zones_wlock && + test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); +} + +static inline bool blk_req_can_dispatch_to_zone(struct request *rq) +{ + if (!blk_req_needs_zone_write_lock(rq)) + return true; + return !blk_req_zone_is_write_locked(rq); +} +#else /* CONFIG_BLK_DEV_ZONED */ +static inline bool blk_req_needs_zone_write_lock(struct request *rq) +{ + return false; +} + +static inline void blk_req_zone_write_lock(struct request *rq) +{ +} + +static inline void blk_req_zone_write_unlock(struct request *rq) +{ +} +static inline bool blk_req_zone_is_write_locked(struct request *rq) +{ + return false; +} + +static inline bool blk_req_can_dispatch_to_zone(struct request *rq) +{ + return true; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" #endif +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +void rq_flush_dcache_pages(struct request *rq); +#else +static inline void rq_flush_dcache_pages(struct request *rq) +{ +} +#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */ +#endif /* BLK_MQ_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index be622b5a21ed..fe065c394fff 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,8 +20,26 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; +/* + * The basic unit of block I/O is a sector. It is used in a number of contexts + * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 + * bytes. Variables of type sector_t represent an offset or size that is a + * multiple of 512 bytes. Hence these two constants. + */ +#ifndef SECTOR_SHIFT +#define SECTOR_SHIFT 9 +#endif +#ifndef SECTOR_SIZE +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#endif + +#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) +#define SECTOR_MASK (PAGE_SECTORS - 1) + struct block_device { sector_t bd_start_sect; + sector_t bd_nr_sectors; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; bool bd_read_only; /* read-only policy */ @@ -38,6 +56,7 @@ struct block_device { u8 bd_partno; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; + struct request_queue * bd_queue; /* The counter of freeze processes */ int bd_fsfreeze_count; @@ -208,6 +227,9 @@ static inline void bio_issue_init(struct bio_issue *issue, ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +typedef unsigned int blk_qc_t; +#define BLK_QC_T_NONE -1U + /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -227,8 +249,8 @@ struct bio { struct bvec_iter bi_iter; + blk_qc_t bi_cookie; bio_end_io_t *bi_end_io; - void *bi_private; #ifdef CONFIG_BLK_CGROUP /* @@ -384,7 +406,7 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ - __REQ_HIPRI, + __REQ_POLLED, /* caller polls for completion using bio_poll */ /* for driver use */ __REQ_DRV, @@ -409,7 +431,7 @@ enum req_flag_bits { #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) -#define REQ_HIPRI (1ULL << __REQ_HIPRI) +#define REQ_POLLED (1ULL << __REQ_POLLED) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) @@ -431,8 +453,6 @@ enum stat_group { #define bio_op(bio) \ ((bio)->bi_opf & REQ_OP_MASK) -#define req_op(req) \ - ((req)->cmd_flags & REQ_OP_MASK) /* obsolete, don't use in new code */ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, @@ -497,31 +517,6 @@ static inline int op_stat_group(unsigned int op) return op_is_write(op); } -typedef unsigned int blk_qc_t; -#define BLK_QC_T_NONE -1U -#define BLK_QC_T_SHIFT 16 -#define BLK_QC_T_INTERNAL (1U << 31) - -static inline bool blk_qc_t_valid(blk_qc_t cookie) -{ - return cookie != BLK_QC_T_NONE; -} - -static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) -{ - return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; -} - -static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) -{ - return cookie & ((1u << BLK_QC_T_SHIFT) - 1); -} - -static inline bool blk_qc_t_is_internal(blk_qc_t cookie) -{ - return (cookie & BLK_QC_T_INTERNAL) != 0; -} - struct blk_rq_stat { u64 mean; u64 min; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12b9dbcc980e..bd4370baccca 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -3,8 +3,6 @@ #define _LINUX_BLKDEV_H #include <linux/sched.h> -#include <linux/sched/clock.h> -#include <linux/major.h> #include <linux/genhd.h> #include <linux/list.h> #include <linux/llist.h> @@ -12,17 +10,11 @@ #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/wait.h> -#include <linux/mempool.h> -#include <linux/pfn.h> #include <linux/bio.h> -#include <linux/stringify.h> #include <linux/gfp.h> -#include <linux/smp.h> #include <linux/rcupdate.h> #include <linux/percpu-refcount.h> -#include <linux/scatterlist.h> #include <linux/blkzoned.h> -#include <linux/pm.h> #include <linux/sbitmap.h> struct module; @@ -33,14 +25,12 @@ struct request; struct sg_io_hdr; struct blkcg_gq; struct blk_flush_queue; +struct kiocb; struct pr_ops; struct rq_qos; struct blk_queue_stats; struct blk_stat_callback; -struct blk_keyslot_manager; - -#define BLKDEV_MIN_RQ 4 -#define BLKDEV_MAX_RQ 128 /* Default maximum */ +struct blk_crypto_profile; /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 @@ -54,186 +44,13 @@ struct blk_keyslot_manager; */ #define BLKCG_MAX_POLS 6 -typedef void (rq_end_io_fn)(struct request *, blk_status_t); - -/* - * request flags */ -typedef __u32 __bitwise req_flags_t; - -/* drive already may have started this one */ -#define RQF_STARTED ((__force req_flags_t)(1 << 1)) -/* may not be passed by ioscheduler */ -#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) -/* request for flush sequence */ -#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) -/* merge of different types, fail separately */ -#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) -/* track inflight for MQ */ -#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) -/* don't call prep for this one */ -#define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) -/* vaguely specified driver internal error. Ignored by the block layer */ -#define RQF_FAILED ((__force req_flags_t)(1 << 10)) -/* don't warn about errors */ -#define RQF_QUIET ((__force req_flags_t)(1 << 11)) -/* elevator private data attached */ -#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) -/* account into disk and partition IO statistics */ -#define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) -/* runtime pm request */ -#define RQF_PM ((__force req_flags_t)(1 << 15)) -/* on IO scheduler merge hash */ -#define RQF_HASHED ((__force req_flags_t)(1 << 16)) -/* track IO completion time */ -#define RQF_STATS ((__force req_flags_t)(1 << 17)) -/* Look at ->special_vec for the actual data payload instead of the - bio chain. */ -#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) -/* The per-zone write lock is held for this request */ -#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) -/* already slept for hybrid poll */ -#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) -/* ->timeout has been called, don't expire again */ -#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) - -/* flags that prevent us from merging requests: */ -#define RQF_NOMERGE_FLAGS \ - (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) - -/* - * Request state for blk-mq. - */ -enum mq_rq_state { - MQ_RQ_IDLE = 0, - MQ_RQ_IN_FLIGHT = 1, - MQ_RQ_COMPLETE = 2, -}; - -/* - * Try to put the fields that are referenced together in the same cacheline. - * - * If you modify this structure, make sure to update blk_rq_init() and - * especially blk_mq_rq_ctx_init() to take care of the added fields. - */ -struct request { - struct request_queue *q; - struct blk_mq_ctx *mq_ctx; - struct blk_mq_hw_ctx *mq_hctx; - - unsigned int cmd_flags; /* op and common flags */ - req_flags_t rq_flags; - - int tag; - int internal_tag; - - /* the following two fields are internal, NEVER access directly */ - unsigned int __data_len; /* total data len */ - sector_t __sector; /* sector cursor */ - - struct bio *bio; - struct bio *biotail; - - struct list_head queuelist; - - /* - * The hash is used inside the scheduler, and killed once the - * request reaches the dispatch list. The ipi_list is only used - * to queue the request for softirq completion, which is long - * after the request has been unhashed (and even removed from - * the dispatch list). - */ - union { - struct hlist_node hash; /* merge hash */ - struct llist_node ipi_list; - }; - - /* - * The rb_node is only used inside the io scheduler, requests - * are pruned when moved to the dispatch queue. So let the - * completion_data share space with the rb_node. - */ - union { - struct rb_node rb_node; /* sort/lookup */ - struct bio_vec special_vec; - void *completion_data; - int error_count; /* for legacy drivers, don't use */ - }; - - /* - * Three pointers are available for the IO schedulers, if they need - * more they have to dynamically allocate it. Flush requests are - * never put on the IO scheduler. So let the flush fields share - * space with the elevator data. - */ - union { - struct { - struct io_cq *icq; - void *priv[2]; - } elv; - - struct { - unsigned int seq; - struct list_head list; - rq_end_io_fn *saved_end_io; - } flush; - }; - - struct gendisk *rq_disk; - struct block_device *part; -#ifdef CONFIG_BLK_RQ_ALLOC_TIME - /* Time that the first bio started allocating this request. */ - u64 alloc_time_ns; -#endif - /* Time that this request was allocated for this IO. */ - u64 start_time_ns; - /* Time that I/O was submitted to the device. */ - u64 io_start_time_ns; - -#ifdef CONFIG_BLK_WBT - unsigned short wbt_flags; -#endif - /* - * rq sectors used for blk stats. It has the same value - * with blk_rq_sectors(rq), except that it never be zeroed - * by completion. - */ - unsigned short stats_sectors; - - /* - * Number of scatter-gather DMA addr+len pairs after - * physical address coalescing is performed. - */ - unsigned short nr_phys_segments; - -#if defined(CONFIG_BLK_DEV_INTEGRITY) - unsigned short nr_integrity_segments; -#endif - -#ifdef CONFIG_BLK_INLINE_ENCRYPTION - struct bio_crypt_ctx *crypt_ctx; - struct blk_ksm_keyslot *crypt_keyslot; -#endif - - unsigned short write_hint; - unsigned short ioprio; - - enum mq_rq_state state; - refcount_t ref; - - unsigned int timeout; - unsigned long deadline; - - union { - struct __call_single_data csd; - u64 fifo_time; - }; +static inline int blk_validate_block_size(unsigned int bsize) +{ + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + return -EINVAL; - /* - * completion callback. - */ - rq_end_io_fn *end_io; - void *end_io_data; -}; + return 0; +} static inline bool blk_op_is_passthrough(unsigned int op) { @@ -241,35 +58,6 @@ static inline bool blk_op_is_passthrough(unsigned int op) return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } -static inline bool blk_rq_is_passthrough(struct request *rq) -{ - return blk_op_is_passthrough(req_op(rq)); -} - -static inline unsigned short req_get_ioprio(struct request *req) -{ - return req->ioprio; -} - -#include <linux/elevator.h> - -struct blk_queue_ctx; - -struct bio_vec; - -enum blk_eh_timer_return { - BLK_EH_DONE, /* drivers has completed the command */ - BLK_EH_RESET_TIMER, /* reset timer and try again */ -}; - -enum blk_queue_state { - Queue_down, - Queue_up, -}; - -#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ -#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ - /* * Zoned block device models (zoned limit). * @@ -370,6 +158,34 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, #endif /* CONFIG_BLK_DEV_ZONED */ +/* + * Independent access ranges: struct blk_independent_access_range describes + * a range of contiguous sectors that can be accessed using device command + * execution resources that are independent from the resources used for + * other access ranges. This is typically found with single-LUN multi-actuator + * HDDs where each access range is served by a different set of heads. + * The set of independent ranges supported by the device is defined using + * struct blk_independent_access_ranges. The independent ranges must not overlap + * and must include all sectors within the disk capacity (no sector holes + * allowed). + * For a device with multiple ranges, requests targeting sectors in different + * ranges can be executed in parallel. A request can straddle an access range + * boundary. + */ +struct blk_independent_access_range { + struct kobject kobj; + struct request_queue *queue; + sector_t sector; + sector_t nr_sectors; +}; + +struct blk_independent_access_ranges { + struct kobject kobj; + bool sysfs_registered; + unsigned int nr_ia_ranges; + struct blk_independent_access_range ia_range[]; +}; + struct request_queue { struct request *last_merge; struct elevator_queue *elevator; @@ -444,8 +260,7 @@ struct request_queue { unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION - /* Inline crypto capabilities */ - struct blk_keyslot_manager *ksm; + struct blk_crypto_profile *crypto_profile; #endif unsigned int rq_timeout; @@ -457,10 +272,9 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; - atomic_t nr_active_requests_shared_sbitmap; + atomic_t nr_active_requests_shared_tags; - struct sbitmap_queue sched_bitmap_tags; - struct sbitmap_queue sched_breserved_tags; + struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP @@ -536,6 +350,8 @@ struct request_queue { */ struct mutex mq_freeze_lock; + int quiesce_depth; + struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; struct bio_set bio_split; @@ -549,10 +365,14 @@ struct request_queue { bool mq_sysfs_init_done; - size_t cmd_size; - #define BLK_MAX_WRITE_HINTS 5 u64 write_hints[BLK_MAX_WRITE_HINTS]; + + /* + * Independent sector access ranges. This is always NULL for + * devices that do not have multiple independent access ranges. + */ + struct blk_independent_access_ranges *ia_ranges; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ @@ -579,7 +399,6 @@ struct request_queue { #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ -#define QUEUE_FLAG_SCSI_PASSTHROUGH 23 /* queue supports SCSI commands */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ @@ -613,8 +432,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_secure_erase(q) \ (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -#define blk_queue_scsi_passthrough(q) \ - test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) #ifdef CONFIG_BLK_RQ_ALLOC_TIME @@ -638,11 +455,6 @@ extern void blk_clear_pm_only(struct request_queue *q); #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) - -#define rq_dma_dir(rq) \ - (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) - #define dma_map_bvec(dev, bv, dir, attrs) \ dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ (dir), (attrs)) @@ -758,42 +570,6 @@ static inline unsigned int queue_max_active_zones(const struct request_queue *q) } #endif /* CONFIG_BLK_DEV_ZONED */ -static inline bool rq_is_sync(struct request *rq) -{ - return op_is_sync(rq->cmd_flags); -} - -static inline bool rq_mergeable(struct request *rq) -{ - if (blk_rq_is_passthrough(rq)) - return false; - - if (req_op(rq) == REQ_OP_FLUSH) - return false; - - if (req_op(rq) == REQ_OP_WRITE_ZEROES) - return false; - - if (req_op(rq) == REQ_OP_ZONE_APPEND) - return false; - - if (rq->cmd_flags & REQ_NOMERGE_FLAGS) - return false; - if (rq->rq_flags & RQF_NOMERGE_FLAGS) - return false; - - return true; -} - -static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) -{ - if (bio_page(a) == bio_page(b) && - bio_offset(a) == bio_offset(b)) - return true; - - return false; -} - static inline unsigned int blk_queue_depth(struct request_queue *q) { if (q->queue_depth) @@ -808,83 +584,20 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) #define BLK_MIN_SG_TIMEOUT (7 * HZ) -struct rq_map_data { - struct page **pages; - int page_order; - int nr_entries; - unsigned long offset; - int null_mapped; - int from_user; -}; - -struct req_iterator { - struct bvec_iter iter; - struct bio *bio; -}; - /* This should not be used directly - use rq_for_each_segment */ #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) -#define __rq_for_each_bio(_bio, rq) \ - if ((rq->bio)) \ - for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) -#define rq_for_each_segment(bvl, _rq, _iter) \ - __rq_for_each_bio(_iter.bio, _rq) \ - bio_for_each_segment(bvl, _iter.bio, _iter.iter) - -#define rq_for_each_bvec(bvl, _rq, _iter) \ - __rq_for_each_bio(_iter.bio, _rq) \ - bio_for_each_bvec(bvl, _iter.bio, _iter.iter) - -#define rq_iter_last(bvec, _iter) \ - (_iter.bio->bi_next == NULL && \ - bio_iter_last(bvec, _iter.iter)) - -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" -#endif -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -extern void rq_flush_dcache_pages(struct request *rq); -#else -static inline void rq_flush_dcache_pages(struct request *rq) -{ -} -#endif extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -blk_qc_t submit_bio_noacct(struct bio *bio); -extern void blk_rq_init(struct request_queue *q, struct request *rq); -extern void blk_put_request(struct request *); -extern struct request *blk_get_request(struct request_queue *, unsigned int op, - blk_mq_req_flags_t flags); +void submit_bio_noacct(struct bio *bio); + extern int blk_lld_busy(struct request_queue *q); -extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, - struct bio_set *bs, gfp_t gfp_mask, - int (*bio_ctr)(struct bio *, struct bio *, void *), - void *data); -extern void blk_rq_unprep_clone(struct request *rq); -extern blk_status_t blk_insert_cloned_request(struct request_queue *q, - struct request *rq); -int blk_rq_append_bio(struct request *rq, struct bio *bio); extern void blk_queue_split(struct bio **); extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -extern int blk_rq_map_user(struct request_queue *, struct request *, - struct rq_map_data *, void __user *, unsigned long, - gfp_t); -extern int blk_rq_unmap_user(struct bio *); -extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); -extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct rq_map_data *, const struct iov_iter *, - gfp_t); -extern void blk_execute_rq_nowait(struct gendisk *, - struct request *, int, rq_end_io_fn *); - -blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq, - int at_head); /* Helper to convert REQ_OP_XXX to its string format XXX */ extern const char *blk_op_str(unsigned int op); @@ -892,69 +605,17 @@ extern const char *blk_op_str(unsigned int op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); +/* only poll the hardware once, don't continue until a completion was found */ +#define BLK_POLL_ONESHOT (1 << 0) +/* do not sleep to wait for the expected completion time */ +#define BLK_POLL_NOSLEEP (1 << 1) +int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); +int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, + unsigned int flags); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { - return bdev->bd_disk->queue; /* this is never NULL */ -} - -/* - * The basic unit of block I/O is a sector. It is used in a number of contexts - * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 - * bytes. Variables of type sector_t represent an offset or size that is a - * multiple of 512 bytes. Hence these two constants. - */ -#ifndef SECTOR_SHIFT -#define SECTOR_SHIFT 9 -#endif -#ifndef SECTOR_SIZE -#define SECTOR_SIZE (1 << SECTOR_SHIFT) -#endif - -#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) -#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) -#define SECTOR_MASK (PAGE_SECTORS - 1) - -/* - * blk_rq_pos() : the current sector - * blk_rq_bytes() : bytes left in the entire request - * blk_rq_cur_bytes() : bytes left in the current segment - * blk_rq_err_bytes() : bytes left till the next error boundary - * blk_rq_sectors() : sectors left in the entire request - * blk_rq_cur_sectors() : sectors left in the current segment - * blk_rq_stats_sectors() : sectors of the entire request used for stats - */ -static inline sector_t blk_rq_pos(const struct request *rq) -{ - return rq->__sector; -} - -static inline unsigned int blk_rq_bytes(const struct request *rq) -{ - return rq->__data_len; -} - -static inline int blk_rq_cur_bytes(const struct request *rq) -{ - return rq->bio ? bio_cur_bytes(rq->bio) : 0; -} - -extern unsigned int blk_rq_err_bytes(const struct request *rq); - -static inline unsigned int blk_rq_sectors(const struct request *rq) -{ - return blk_rq_bytes(rq) >> SECTOR_SHIFT; -} - -static inline unsigned int blk_rq_cur_sectors(const struct request *rq) -{ - return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; -} - -static inline unsigned int blk_rq_stats_sectors(const struct request *rq) -{ - return rq->stats_sectors; + return bdev->bd_queue; /* this is never NULL */ } #ifdef CONFIG_BLK_DEV_ZONED @@ -973,42 +634,8 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio) return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), bio->bi_iter.bi_sector); } - -static inline unsigned int blk_rq_zone_no(struct request *rq) -{ - return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); -} - -static inline unsigned int blk_rq_zone_is_seq(struct request *rq) -{ - return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); -} #endif /* CONFIG_BLK_DEV_ZONED */ -/* - * Some commands like WRITE SAME have a payload or data transfer size which - * is different from the size of the request. Any driver that supports such - * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to - * calculate the data transfer size. - */ -static inline unsigned int blk_rq_payload_bytes(struct request *rq) -{ - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - return rq->special_vec.bv_len; - return blk_rq_bytes(rq); -} - -/* - * Return the first full biovec in the request. The caller needs to check that - * there are any bvecs before calling this helper. - */ -static inline struct bio_vec req_bvec(struct request *rq) -{ - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - return rq->special_vec; - return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); -} - static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { @@ -1048,47 +675,6 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, return min(q->limits.max_sectors, chunk_sectors); } -static inline unsigned int blk_rq_get_max_sectors(struct request *rq, - sector_t offset) -{ - struct request_queue *q = rq->q; - - if (blk_rq_is_passthrough(rq)) - return q->limits.max_hw_sectors; - - if (!q->limits.chunk_sectors || - req_op(rq) == REQ_OP_DISCARD || - req_op(rq) == REQ_OP_SECURE_ERASE) - return blk_queue_get_max_sectors(q, req_op(rq)); - - return min(blk_max_size_offset(q, offset, 0), - blk_queue_get_max_sectors(q, req_op(rq))); -} - -static inline unsigned int blk_rq_count_bios(struct request *rq) -{ - unsigned int nr_bios = 0; - struct bio *bio; - - __rq_for_each_bio(bio, rq) - nr_bios++; - - return nr_bios; -} - -void blk_steal_bios(struct bio_list *list, struct request *rq); - -/* - * Request completion related functions. - * - * blk_update_request() completes given number of bytes and updates - * the request without completing it. - */ -extern bool blk_update_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); - -extern void blk_abort_request(struct request *); - /* * Access functions for manipulating queue properties */ @@ -1133,46 +719,24 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -extern void blk_queue_required_elevator_features(struct request_queue *q, - unsigned int features); -extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, - struct device *dev); -/* - * Number of physical segments as sent to the device. - * - * Normally this is the number of discontiguous data segments sent by the - * submitter. But for data-less command like discard we might have no - * actual data segments submitted, but the driver might have to add it's - * own special payload. In that case we still return 1 here so that this - * special payload will be mapped. - */ -static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) -{ - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - return 1; - return rq->nr_phys_segments; -} +struct blk_independent_access_ranges * +disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); +void disk_set_independent_access_ranges(struct gendisk *disk, + struct blk_independent_access_ranges *iars); /* - * Number of discard segments (or ranges) the driver needs to fill in. - * Each discard bio merged into a request is counted as one segment. + * Elevator features for blk_queue_required_elevator_features: */ -static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) -{ - return max_t(unsigned short, rq->nr_phys_segments, 1); -} - -int __blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist, struct scatterlist **last_sg); -static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist) -{ - struct scatterlist *last_sg = NULL; +/* Supports zoned block devices sequential write constraint */ +#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) +/* Supports scheduling on multiple hardware queues */ +#define ELEVATOR_F_MQ_AWARE (1U << 1) - return __blk_rq_map_sg(q, rq, sglist, &last_sg); -} -extern void blk_dump_rq_flags(struct request *, char *); +extern void blk_queue_required_elevator_features(struct request_queue *q, + unsigned int features); +extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, + struct device *dev); bool __must_check blk_get_queue(struct request_queue *); extern void blk_put_queue(struct request_queue *); @@ -1187,19 +751,24 @@ extern void blk_set_queue_dying(struct request_queue *); * as the lock contention for request_queue lock is reduced. * * It is ok not to disable preemption when adding the request to the plug list - * or when attempting a merge, because blk_schedule_flush_list() will only flush - * the plug list when the task sleeps by itself. For details, please see - * schedule() where blk_schedule_flush_plug() is called. + * or when attempting a merge. For details, please see schedule() where + * blk_flush_plug() is called. */ struct blk_plug { - struct list_head mq_list; /* blk-mq requests */ - struct list_head cb_list; /* md requires an unplug callback */ + struct request *mq_list; /* blk-mq requests */ + + /* if ios_left is > 1, we can batch tag/rq allocations */ + struct request *cached_rq; + unsigned short nr_ios; + unsigned short rq_count; + bool multiple_queues; + bool has_elevator; bool nowait; + + struct list_head cb_list; /* md requires an unplug callback */ }; -#define BLK_MAX_REQUEST_COUNT 16 -#define BLK_PLUG_FLUSH_SIZE (128 * 1024) struct blk_plug_cb; typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); @@ -1211,32 +780,17 @@ struct blk_plug_cb { extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, int size); extern void blk_start_plug(struct blk_plug *); +extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); -extern void blk_flush_plug_list(struct blk_plug *, bool); - -static inline void blk_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - if (plug) - blk_flush_plug_list(plug, false); -} - -static inline void blk_schedule_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - - if (plug) - blk_flush_plug_list(plug, true); -} +void blk_flush_plug(struct blk_plug *plug, bool from_schedule); static inline bool blk_needs_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; return plug && - (!list_empty(&plug->mq_list) || - !list_empty(&plug->cb_list)); + (plug->mq_list || !list_empty(&plug->cb_list)); } int blkdev_issue_flush(struct block_device *bdev); @@ -1245,23 +799,23 @@ long nr_blockdev_pages(void); struct blk_plug { }; -static inline void blk_start_plug(struct blk_plug *plug) +static inline void blk_start_plug_nr_ios(struct blk_plug *plug, + unsigned short nr_ios) { } -static inline void blk_finish_plug(struct blk_plug *plug) +static inline void blk_start_plug(struct blk_plug *plug) { } -static inline void blk_flush_plug(struct task_struct *task) +static inline void blk_finish_plug(struct blk_plug *plug) { } -static inline void blk_schedule_flush_plug(struct task_struct *task) +static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } - static inline bool blk_needs_flush_plug(struct task_struct *tsk) { return false; @@ -1499,22 +1053,6 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector return offset << SECTOR_SHIFT; } -/* - * Two cases of handling DISCARD merge: - * If max_discard_segments > 1, the driver takes every bio - * as a range and send them to controller together. The ranges - * needn't to be contiguous. - * Otherwise, the bios/requests will be handled as same as - * others which should be contiguous. - */ -static inline bool blk_discard_mergable(struct request *req) -{ - if (req_op(req) == REQ_OP_DISCARD && - queue_max_discard_segments(req->q) > 1) - return true; - return false; -} - static inline int bdev_discard_alignment(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1628,210 +1166,36 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ MODULE_ALIAS("block-major-" __stringify(major) "-*") -#if defined(CONFIG_BLK_DEV_INTEGRITY) - -enum blk_integrity_flags { - BLK_INTEGRITY_VERIFY = 1 << 0, - BLK_INTEGRITY_GENERATE = 1 << 1, - BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, - BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, -}; - -struct blk_integrity_iter { - void *prot_buf; - void *data_buf; - sector_t seed; - unsigned int data_size; - unsigned short interval; - const char *disk_name; -}; - -typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); -typedef void (integrity_prepare_fn) (struct request *); -typedef void (integrity_complete_fn) (struct request *, unsigned int); - -struct blk_integrity_profile { - integrity_processing_fn *generate_fn; - integrity_processing_fn *verify_fn; - integrity_prepare_fn *prepare_fn; - integrity_complete_fn *complete_fn; - const char *name; -}; - -extern void blk_integrity_register(struct gendisk *, struct blk_integrity *); -extern void blk_integrity_unregister(struct gendisk *); -extern int blk_integrity_compare(struct gendisk *, struct gendisk *); -extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, - struct scatterlist *); -extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); - -static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) -{ - struct blk_integrity *bi = &disk->queue->integrity; - - if (!bi->profile) - return NULL; - - return bi; -} - -static inline -struct blk_integrity *bdev_get_integrity(struct block_device *bdev) -{ - return blk_get_integrity(bdev->bd_disk); -} - -static inline bool -blk_integrity_queue_supports_integrity(struct request_queue *q) -{ - return q->integrity.profile; -} - -static inline bool blk_integrity_rq(struct request *rq) -{ - return rq->cmd_flags & REQ_INTEGRITY; -} - -static inline void blk_queue_max_integrity_segments(struct request_queue *q, - unsigned int segs) -{ - q->limits.max_integrity_segments = segs; -} - -static inline unsigned short -queue_max_integrity_segments(const struct request_queue *q) -{ - return q->limits.max_integrity_segments; -} - -/** - * bio_integrity_intervals - Return number of integrity intervals for a bio - * @bi: blk_integrity profile for device - * @sectors: Size of the bio in 512-byte sectors - * - * Description: The block layer calculates everything in 512 byte - * sectors but integrity metadata is done in terms of the data integrity - * interval size of the storage device. Convert the block layer sectors - * to the appropriate number of integrity intervals. - */ -static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, - unsigned int sectors) -{ - return sectors >> (bi->interval_exp - 9); -} - -static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, - unsigned int sectors) -{ - return bio_integrity_intervals(bi, sectors) * bi->tuple_size; -} - -/* - * Return the first bvec that contains integrity data. Only drivers that are - * limited to a single integrity segment should use this helper. - */ -static inline struct bio_vec *rq_integrity_vec(struct request *rq) -{ - if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) - return NULL; - return rq->bio->bi_integrity->bip_vec; -} - -#else /* CONFIG_BLK_DEV_INTEGRITY */ - -struct bio; -struct block_device; -struct gendisk; -struct blk_integrity; - -static inline int blk_integrity_rq(struct request *rq) -{ - return 0; -} -static inline int blk_rq_count_integrity_sg(struct request_queue *q, - struct bio *b) -{ - return 0; -} -static inline int blk_rq_map_integrity_sg(struct request_queue *q, - struct bio *b, - struct scatterlist *s) -{ - return 0; -} -static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) -{ - return NULL; -} -static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) -{ - return NULL; -} -static inline bool -blk_integrity_queue_supports_integrity(struct request_queue *q) -{ - return false; -} -static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) -{ - return 0; -} -static inline void blk_integrity_register(struct gendisk *d, - struct blk_integrity *b) -{ -} -static inline void blk_integrity_unregister(struct gendisk *d) -{ -} -static inline void blk_queue_max_integrity_segments(struct request_queue *q, - unsigned int segs) -{ -} -static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) -{ - return 0; -} - -static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, - unsigned int sectors) -{ - return 0; -} - -static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, - unsigned int sectors) -{ - return 0; -} - -static inline struct bio_vec *rq_integrity_vec(struct request *rq) -{ - return NULL; -} - -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #ifdef CONFIG_BLK_INLINE_ENCRYPTION -bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q); +bool blk_crypto_register(struct blk_crypto_profile *profile, + struct request_queue *q); -void blk_ksm_unregister(struct request_queue *q); +void blk_crypto_unregister(struct request_queue *q); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ -static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm, - struct request_queue *q) +static inline bool blk_crypto_register(struct blk_crypto_profile *profile, + struct request_queue *q) { return true; } -static inline void blk_ksm_unregister(struct request_queue *q) { } +static inline void blk_crypto_unregister(struct request_queue *q) { } #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ +enum blk_unique_id { + /* these match the Designator Types specified in SPC */ + BLK_UID_T10 = 1, + BLK_UID_EUI64 = 2, + BLK_UID_NAA = 3, +}; + +#define NFL4_UFLG_MASK 0x0000003F struct block_device_operations { - blk_qc_t (*submit_bio) (struct bio *bio); + void (*submit_bio)(struct bio *bio); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); @@ -1847,6 +1211,9 @@ struct block_device_operations { int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); char *(*devnode)(struct gendisk *disk, umode_t *mode); + /* returns the length of the identifier or a negative errno: */ + int (*get_unique_id)(struct gendisk *disk, u8 id[16], + enum blk_unique_id id_type); struct module *owner; const struct pr_ops *pr_ops; @@ -1869,60 +1236,6 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -#ifdef CONFIG_BLK_DEV_ZONED -bool blk_req_needs_zone_write_lock(struct request *rq); -bool blk_req_zone_write_trylock(struct request *rq); -void __blk_req_zone_write_lock(struct request *rq); -void __blk_req_zone_write_unlock(struct request *rq); - -static inline void blk_req_zone_write_lock(struct request *rq) -{ - if (blk_req_needs_zone_write_lock(rq)) - __blk_req_zone_write_lock(rq); -} - -static inline void blk_req_zone_write_unlock(struct request *rq) -{ - if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) - __blk_req_zone_write_unlock(rq); -} - -static inline bool blk_req_zone_is_write_locked(struct request *rq) -{ - return rq->q->seq_zones_wlock && - test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); -} - -static inline bool blk_req_can_dispatch_to_zone(struct request *rq) -{ - if (!blk_req_needs_zone_write_lock(rq)) - return true; - return !blk_req_zone_is_write_locked(rq); -} -#else -static inline bool blk_req_needs_zone_write_lock(struct request *rq) -{ - return false; -} - -static inline void blk_req_zone_write_lock(struct request *rq) -{ -} - -static inline void blk_req_zone_write_unlock(struct request *rq) -{ -} -static inline bool blk_req_zone_is_write_locked(struct request *rq) -{ - return false; -} - -static inline bool blk_req_can_dispatch_to_zone(struct request *rq) -{ - return true; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline void blk_wake_io_task(struct task_struct *waiter) { /* @@ -1991,6 +1304,8 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); int sync_blockdev(struct block_device *bdev); +int sync_blockdev_nowait(struct block_device *bdev); +void sync_bdevs(bool wait); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1999,10 +1314,54 @@ static inline int sync_blockdev(struct block_device *bdev) { return 0; } +static inline int sync_blockdev_nowait(struct block_device *bdev) +{ + return 0; +} +static inline void sync_bdevs(bool wait) +{ +} #endif int fsync_bdev(struct block_device *bdev); int freeze_bdev(struct block_device *bdev); int thaw_bdev(struct block_device *bdev); +struct io_comp_batch { + struct request *req_list; + bool need_ts; + void (*complete)(struct io_comp_batch *); +}; + +#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } + +#define rq_list_add(listptr, rq) do { \ + (rq)->rq_next = *(listptr); \ + *(listptr) = rq; \ +} while (0) + +#define rq_list_pop(listptr) \ +({ \ + struct request *__req = NULL; \ + if ((listptr) && *(listptr)) { \ + __req = *(listptr); \ + *(listptr) = __req->rq_next; \ + } \ + __req; \ +}) + +#define rq_list_peek(listptr) \ +({ \ + struct request *__req = NULL; \ + if ((listptr) && *(listptr)) \ + __req = *(listptr); \ + __req; \ +}) + +#define rq_list_for_each(listptr, pos) \ + for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) \ + +#define rq_list_next(rq) (rq)->rq_next +#define rq_list_empty(list) ((list) == (struct request *) NULL) + #endif /* _LINUX_BLKDEV_H */ diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index a083e15df608..22501a293fa5 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -2,7 +2,7 @@ #ifndef BLKTRACE_H #define BLKTRACE_H -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/relay.h> #include <linux/compat.h> #include <uapi/linux/blktrace_api.h> diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 537e1b991f11..a4665c7ab07c 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -7,8 +7,18 @@ * Author: Masami Hiramatsu <mhiramat@kernel.org> */ +#ifdef __KERNEL__ #include <linux/kernel.h> #include <linux/types.h> +#else /* !__KERNEL__ */ +/* + * NOTE: This is only for tools/bootconfig, because tools/bootconfig will + * run the parser sanity test. + * This does NOT mean linux/bootconfig.h is available in the user space. + * However, if you change this file, please make sure the tools/bootconfig + * has no issue on building and running. + */ +#endif #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 @@ -25,10 +35,10 @@ * The checksum will be used with the BOOTCONFIG_MAGIC and the size for * embedding the bootconfig in the initrd image. */ -static inline __init u32 xbc_calc_checksum(void *data, u32 size) +static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size) { unsigned char *p = data; - u32 ret = 0; + uint32_t ret = 0; while (size--) ret += *p++; @@ -38,10 +48,10 @@ static inline __init u32 xbc_calc_checksum(void *data, u32 size) /* XBC tree node */ struct xbc_node { - u16 next; - u16 child; - u16 parent; - u16 data; + uint16_t next; + uint16_t child; + uint16_t parent; + uint16_t data; } __attribute__ ((__packed__)); #define XBC_KEY 0 @@ -271,13 +281,12 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node, } /* XBC node initializer */ -int __init xbc_init(char *buf, const char **emsg, int *epos); +int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); +/* XBC node and size information */ +int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_destroy_all(void); - -/* Debug dump functions */ -void __init xbc_debug_dump(void); +void __init xbc_exit(void); #endif diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f4c16f19f83e..3db6f6c95489 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -578,11 +578,12 @@ struct btf_func_model { * programs only. Should not be used with normal calls and indirect calls. */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) - /* Store IP address of the caller on the trampoline stack, * so it's available for trampoline's programs. */ #define BPF_TRAMP_F_IP_ARG BIT(3) +/* Return the return value of fentry prog. Only used by bpf_struct_ops. */ +#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 @@ -928,8 +929,11 @@ struct bpf_array_aux { * stored in the map to make sure that all callers and callees have * the same prog type and JITed flag. */ - enum bpf_prog_type type; - bool jited; + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9c81724e4b98..bbe1eefa4c8a 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) -#ifdef CONFIG_NET -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) +#ifdef CONFIG_NET +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6486d3c19463..36f33685c8c0 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -194,7 +194,7 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); -void invalidate_bh_lrus_cpu(int cpu); +void invalidate_bh_lrus_cpu(void); bool has_bh_in_lru(int cpu, void *dummy); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -408,7 +408,7 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } -static inline void invalidate_bh_lrus_cpu(int cpu) {} +static inline void invalidate_bh_lrus_cpu(void) {} static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } #define buffer_heads_over_limit 0 diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 0e9bdd42dafb..35c25dff651a 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -44,7 +44,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -}; +} __packed; struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h new file mode 100644 index 000000000000..a075b70b9a70 --- /dev/null +++ b/include/linux/cc_platform.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#ifndef _LINUX_CC_PLATFORM_H +#define _LINUX_CC_PLATFORM_H + +#include <linux/types.h> +#include <linux/stddef.h> + +/** + * enum cc_attr - Confidential computing attributes + * + * These attributes represent confidential computing features that are + * currently active. + */ +enum cc_attr { + /** + * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active + * + * The platform/OS is running with active memory encryption. This + * includes running either as a bare-metal system or a hypervisor + * and actively using memory encryption or as a guest/virtual machine + * and actively using memory encryption. + * + * Examples include SME, SEV and SEV-ES. + */ + CC_ATTR_MEM_ENCRYPT, + + /** + * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active + * + * The platform/OS is running as a bare-metal system or a hypervisor + * and actively using memory encryption. + * + * Examples include SME. + */ + CC_ATTR_HOST_MEM_ENCRYPT, + + /** + * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active + * + * The platform/OS is running as a guest/virtual machine and actively + * using memory encryption. + * + * Examples include SEV and SEV-ES. + */ + CC_ATTR_GUEST_MEM_ENCRYPT, + + /** + * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active + * + * The platform/OS is running as a guest/virtual machine and actively + * using memory encryption and register state encryption. + * + * Examples include SEV-ES. + */ + CC_ATTR_GUEST_STATE_ENCRYPT, +}; + +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + +/** + * cc_platform_has() - Checks if the specified cc_attr attribute is active + * @attr: Confidential computing attribute to check + * + * The cc_platform_has() function will return an indicator as to whether the + * specified Confidential Computing attribute is currently active. + * + * Context: Any context + * Return: + * * TRUE - Specified Confidential Computing attribute is active + * * FALSE - Specified Confidential Computing attribute is not active + */ +bool cc_platform_has(enum cc_attr attr); + +#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ + +static inline bool cc_platform_has(enum cc_attr attr) { return false; } + +#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ + +#endif /* _LINUX_CC_PLATFORM_H */ diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index c4fef00abdf3..0a89f111e00e 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -64,6 +64,7 @@ struct cdrom_device_info { int for_data; int (*exit)(struct cdrom_device_info *); int mrw_mode_page; + __s64 last_media_change_ms; }; struct cdrom_device_ops { diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index e1c705fdfa7c..db2e147e069f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains * per-socket cgroup information except for memcg association. * - * On legacy hierarchies, net_prio and net_cls controllers directly set - * attributes on each sock which can then be tested by the network layer. - * On the default hierarchy, each sock is associated with the cgroup it was - * created in and the networking layer can match the cgroup directly. - * - * To avoid carrying all three cgroup related fields separately in sock, - * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. - * On boot, sock_cgroup_data records the cgroup that the sock was created - * in so that cgroup2 matches can be made; however, once either net_prio or - * net_cls starts being used, the area is overridden to carry prioidx and/or - * classid. The two modes are distinguished by whether the lowest bit is - * set. Clear bit indicates cgroup pointer while set bit prioidx and - * classid. - * - * While userland may start using net_prio or net_cls at any time, once - * either is used, cgroup2 matching no longer works. There is no reason to - * mix the two and this is in line with how legacy and v2 compatibility is - * handled. On mode switch, cgroup references which are already being - * pointed to by socks may be leaked. While this can be remedied by adding - * synchronization around sock_cgroup_data, given that the number of leaked - * cgroups is bound and highly unlikely to be high, this seems to be the - * better trade-off. + * On legacy hierarchies, net_prio and net_cls controllers directly + * set attributes on each sock which can then be tested by the network + * layer. On the default hierarchy, each sock is associated with the + * cgroup it was created in and the networking layer can match the + * cgroup directly. */ struct sock_cgroup_data { - union { -#ifdef __LITTLE_ENDIAN - struct { - u8 is_data : 1; - u8 no_refcnt : 1; - u8 unused : 6; - u8 padding; - u16 prioidx; - u32 classid; - } __packed; -#else - struct { - u32 classid; - u16 prioidx; - u8 padding; - u8 unused : 6; - u8 no_refcnt : 1; - u8 is_data : 1; - } __packed; + struct cgroup *cgroup; /* v2 */ +#ifdef CONFIG_CGROUP_NET_CLASSID + u32 classid; /* v1 */ +#endif +#ifdef CONFIG_CGROUP_NET_PRIO + u16 prioidx; /* v1 */ #endif - u64 val; - }; }; -/* - * There's a theoretical window where the following accessors race with - * updaters and return part of the previous pointer as the prioidx or - * classid. Such races are short-lived and the result isn't critical. - */ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { - /* fallback to 1 which is always the ID of the root cgroup */ - return (skcd->is_data & 1) ? skcd->prioidx : 1; +#ifdef CONFIG_CGROUP_NET_PRIO + return READ_ONCE(skcd->prioidx); +#else + return 1; +#endif } static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { - /* fallback to 0 which is the unconfigured default classid */ - return (skcd->is_data & 1) ? skcd->classid : 0; +#ifdef CONFIG_CGROUP_NET_CLASSID + return READ_ONCE(skcd->classid); +#else + return 0; +#endif } -/* - * If invoked concurrently, the updaters may clobber each other. The - * caller is responsible for synchronization. - */ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) { - struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; - - if (sock_cgroup_prioidx(&skcd_buf) == prioidx) - return; - - if (!(skcd_buf.is_data & 1)) { - skcd_buf.val = 0; - skcd_buf.is_data = 1; - } - - skcd_buf.prioidx = prioidx; - WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +#ifdef CONFIG_CGROUP_NET_PRIO + WRITE_ONCE(skcd->prioidx, prioidx); +#endif } static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { - struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; - - if (sock_cgroup_classid(&skcd_buf) == classid) - return; - - if (!(skcd_buf.is_data & 1)) { - skcd_buf.val = 0; - skcd_buf.is_data = 1; - } - - skcd_buf.classid = classid; - WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +#ifdef CONFIG_CGROUP_NET_CLASSID + WRITE_ONCE(skcd->classid, classid); +#endif } #else /* CONFIG_SOCK_CGROUP_DATA */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7bf60454a313..75c151413fda 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task, */ #ifdef CONFIG_SOCK_CGROUP_DATA -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) -extern spinlock_t cgroup_sk_update_lock; -#endif - -void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) { -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) - unsigned long v; - - /* - * @skcd->val is 64bit but the following is safe on 32bit too as we - * just need the lower ulong to be written and read atomically. - */ - v = READ_ONCE(skcd->val); - - if (v & 3) - return &cgrp_dfl_root.cgrp; - - return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; -#else - return (struct cgroup *)(unsigned long)skcd->val; -#endif + return skcd->cgroup; } #else /* CONFIG_CGROUP_DATA */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 49b0ac8b6fd3..3c4de9b6c6e3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -62,19 +62,6 @@ #define __no_sanitize_coverage #endif -/* - * Not all versions of clang implement the type-generic versions - * of the builtin overflow checkers. Fortunately, clang implements - * __has_builtin allowing us to avoid awkward version - * checks. Unfortunately, we don't know which version of gcc clang - * pretends to be, so the macro may or may not be defined. - */ -#if __has_builtin(__builtin_mul_overflow) && \ - __has_builtin(__builtin_add_overflow) && \ - __has_builtin(__builtin_sub_overflow) -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 21c36b69eb06..7bbd8df02532 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -41,8 +41,6 @@ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) -#define __compiletime_object_size(obj) __builtin_object_size(obj, 0) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif @@ -95,10 +93,8 @@ #if GCC_VERSION >= 70000 #define KASAN_ABI_VERSION 5 -#elif GCC_VERSION >= 50000 +#else #define KASAN_ABI_VERSION 4 -#elif GCC_VERSION >= 40902 -#define KASAN_ABI_VERSION 3 #endif #if __has_attribute(__no_sanitize_address__) @@ -125,8 +121,12 @@ #define __no_sanitize_coverage #endif -#if GCC_VERSION >= 50100 -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 +/* + * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel, + * matching the defines used by Clang. + */ +#ifdef __SANITIZE_HWADDRESS__ +#define __SANITIZE_ADDRESS__ #endif /* diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b67261a1e3e9..3d5af56337bd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -188,6 +188,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, (typeof(ptr)) (__ptr + (off)); }) #endif +#define absolute_pointer(val) RELOC_HIDE((void *)(val), 0) + #ifndef OPTIMIZER_HIDE_VAR /* Make the optimizer believe the variable can be manipulated arbitrarily. */ #define OPTIMIZER_HIDE_VAR(var) \ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 8f2106e9e5c1..e6ec63403965 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -21,29 +21,6 @@ */ /* - * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. - * In the meantime, to support gcc < 5, we implement __has_attribute - * by hand. - */ -#ifndef __has_attribute -# define __has_attribute(x) __GCC4_has_attribute_##x -# define __GCC4_has_attribute___assume_aligned__ 1 -# define __GCC4_has_attribute___copy__ 0 -# define __GCC4_has_attribute___designated_init__ 0 -# define __GCC4_has_attribute___error__ 1 -# define __GCC4_has_attribute___externally_visible__ 1 -# define __GCC4_has_attribute___no_caller_saved_registers__ 0 -# define __GCC4_has_attribute___noclone__ 1 -# define __GCC4_has_attribute___no_profile_instrument_function__ 0 -# define __GCC4_has_attribute___nonstring__ 0 -# define __GCC4_has_attribute___no_sanitize_address__ 1 -# define __GCC4_has_attribute___no_sanitize_undefined__ 1 -# define __GCC4_has_attribute___no_sanitize_coverage__ 0 -# define __GCC4_has_attribute___fallthrough__ 0 -# define __GCC4_has_attribute___warning__ 1 -#endif - -/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute */ #define __alias(symbol) __attribute__((__alias__(#symbol))) @@ -77,7 +54,6 @@ * compiler should see some alignment anyway, when the return value is * massaged by 'flags = ptr & 3; ptr &= ~3;'). * - * Optional: only supported since gcc >= 4.9 * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index b6ff83a714ca..05ceb2e92b0e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -290,11 +290,6 @@ struct ftrace_likely_data { (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \ sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) -/* Compile time object size, -1 for unknown */ -#ifndef __compiletime_object_size -# define __compiletime_object_size(obj) -1 -#endif - #ifdef __OPTIMIZE__ # define __compiletime_assert(condition, msg, prefix, suffix) \ do { \ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 4d7fced3a39f..7a14807c9d1a 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -105,7 +105,7 @@ static inline void user_exit_irqoff(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } -static inline bool context_tracking_guest_enter(void) { return false; } +static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } #endif /* !CONFIG_CONTEXT_TRACKING */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 832d8a74fa59..991911048857 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -72,6 +72,8 @@ enum cpuhp_state { CPUHP_SLUB_DEAD, CPUHP_DEBUG_OBJ_DEAD, CPUHP_MM_WRITEBACK_DEAD, + /* Must be after CPUHP_MM_VMSTAT_DEAD */ + CPUHP_MM_DEMOTION_DEAD, CPUHP_MM_VMSTAT_DEAD, CPUHP_SOFTIRQ_DEAD, CPUHP_NET_MVNETA_DEAD, @@ -240,6 +242,8 @@ enum cpuhp_state { CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, + /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */ + CPUHP_AP_MM_DEMOTION_ONLINE, CPUHP_AP_X86_HPET_ONLINE, CPUHP_AP_X86_KVM_CLK_ONLINE, CPUHP_AP_DTPM_CPU_ONLINE, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5d4d07a9e1ed..1e7399fc69c0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -996,14 +996,15 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * cpumask; Typically used by bin_attribute to export cpumask bitmask * ABI. * - * Returns the length of how many bytes have been copied. + * Returns the length of how many bytes have been copied, excluding + * terminating '\0'. */ static inline ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, loff_t off, size_t count) { return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask), - nr_cpu_ids, off, count); + nr_cpu_ids, off, count) - 1; } /** @@ -1018,7 +1019,7 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, loff_t off, size_t count) { return bitmap_print_list_to_buf(buf, cpumask_bits(mask), - nr_cpu_ids, off, count); + nr_cpu_ids, off, count) - 1; } #if NR_CPUS <= BITS_PER_LONG diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 3f49e65169c6..dbb409d77d4f 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -47,8 +47,6 @@ extern int debug_locks_off(void); # define locking_selftest() do { } while (0) #endif -struct task_struct; - #ifdef CONFIG_LOCKDEP extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 114553b487ef..a7df155ea49b 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -576,9 +576,9 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *t); /* - * Table keyslot manager functions + * Table blk_crypto_profile functions */ -void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm); +void dm_destroy_crypto_profile(struct blk_crypto_profile *profile); /*----------------------------------------------------------------- * Macros. diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index e1ca2080a1ff..39fefb86780b 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -173,7 +173,7 @@ static inline int dma_resv_lock_slow_interruptible(struct dma_resv *obj, */ static inline bool __must_check dma_resv_trylock(struct dma_resv *obj) { - return ww_mutex_trylock(&obj->lock); + return ww_mutex_trylock(&obj->lock, NULL); } /** diff --git a/include/linux/dsa/mv88e6xxx.h b/include/linux/dsa/mv88e6xxx.h new file mode 100644 index 000000000000..8c3d45eca46b --- /dev/null +++ b/include/linux/dsa/mv88e6xxx.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright 2021 NXP + */ + +#ifndef _NET_DSA_TAG_MV88E6XXX_H +#define _NET_DSA_TAG_MV88E6XXX_H + +#include <linux/if_vlan.h> + +#define MV88E6XXX_VID_STANDALONE 0 +#define MV88E6XXX_VID_BRIDGED (VLAN_N_VID - 1) + +#endif diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index c6bc45ae5e03..8ae999f587c4 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -1,11 +1,32 @@ /* SPDX-License-Identifier: GPL-2.0 - * Copyright 2019-2021 NXP Semiconductors + * Copyright 2019-2021 NXP */ #ifndef _NET_DSA_TAG_OCELOT_H #define _NET_DSA_TAG_OCELOT_H +#include <linux/kthread.h> #include <linux/packing.h> +#include <linux/skbuff.h> + +struct ocelot_skb_cb { + struct sk_buff *clone; + unsigned int ptp_class; /* valid only for clones */ + u8 ptp_cmd; + u8 ts_id; +}; + +#define OCELOT_SKB_CB(skb) \ + ((struct ocelot_skb_cb *)((skb)->cb)) + +#define IFH_TAG_TYPE_C 0 +#define IFH_TAG_TYPE_S 1 + +#define IFH_REW_OP_NOOP 0x0 +#define IFH_REW_OP_DSCP 0x1 +#define IFH_REW_OP_ONE_STEP_PTP 0x2 +#define IFH_REW_OP_TWO_STEP_PTP 0x3 +#define IFH_REW_OP_ORIGIN_PTP 0x5 #define OCELOT_TAG_LEN 16 #define OCELOT_SHORT_PREFIX_LEN 4 @@ -140,6 +161,17 @@ * +------+------+------+------+------+------+------+------+ */ +struct felix_deferred_xmit_work { + struct dsa_port *dp; + struct sk_buff *skb; + struct kthread_work work; +}; + +struct felix_port { + void (*xmit_work_fn)(struct kthread_work *work); + struct kthread_worker *xmit_worker; +}; + static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val) { packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0); @@ -215,4 +247,21 @@ static inline void ocelot_ifh_set_vid(void *injection, u64 vid) packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0); } +/* Determine the PTP REW_OP to use for injecting the given skb */ +static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) +{ + struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone; + u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd; + u32 rew_op = 0; + + if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) { + rew_op = ptp_cmd; + rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3; + } else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { + rew_op = ptp_cmd; + } + + return rew_op; +} + #endif diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 171106202fe5..9e07079528a5 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -48,6 +48,10 @@ struct sja1105_tagger_data { spinlock_t meta_lock; unsigned long state; u8 ts_id; + /* Used on SJA1110 where meta frames are generated only for + * 2-step TX timestamps + */ + struct sk_buff_head skb_txtstamp_queue; }; struct sja1105_skb_cb { @@ -69,42 +73,24 @@ struct sja1105_port { bool hwts_tx_en; }; -enum sja1110_meta_tstamp { - SJA1110_META_TSTAMP_TX = 0, - SJA1110_META_TSTAMP_RX = 1, -}; - -#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) - -void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id, - enum sja1110_meta_tstamp dir, u64 tstamp); - -#else +/* Timestamps are in units of 8 ns clock ticks (equivalent to + * a fixed 125 MHz clock). + */ +#define SJA1105_TICK_NS 8 -static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, - u8 ts_id, enum sja1110_meta_tstamp dir, - u64 tstamp) +static inline s64 ns_to_sja1105_ticks(s64 ns) { + return ns / SJA1105_TICK_NS; } -#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */ - -#if IS_ENABLED(CONFIG_NET_DSA_SJA1105) - -extern const struct dsa_switch_ops sja1105_switch_ops; - -static inline bool dsa_port_is_sja1105(struct dsa_port *dp) +static inline s64 sja1105_ticks_to_ns(s64 ticks) { - return dp->ds->ops == &sja1105_switch_ops; + return ticks * SJA1105_TICK_NS; } -#else - static inline bool dsa_port_is_sja1105(struct dsa_port *dp) { - return false; + return true; } -#endif - #endif /* _NET_DSA_SJA1105_H */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h deleted file mode 100644 index ef9ceead3db1..000000000000 --- a/include/linux/elevator.h +++ /dev/null @@ -1,181 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_ELEVATOR_H -#define _LINUX_ELEVATOR_H - -#include <linux/percpu.h> -#include <linux/hashtable.h> - -#ifdef CONFIG_BLOCK - -struct io_cq; -struct elevator_type; -#ifdef CONFIG_BLK_DEBUG_FS -struct blk_mq_debugfs_attr; -#endif - -/* - * Return values from elevator merger - */ -enum elv_merge { - ELEVATOR_NO_MERGE = 0, - ELEVATOR_FRONT_MERGE = 1, - ELEVATOR_BACK_MERGE = 2, - ELEVATOR_DISCARD_MERGE = 3, -}; - -struct blk_mq_alloc_data; -struct blk_mq_hw_ctx; - -struct elevator_mq_ops { - int (*init_sched)(struct request_queue *, struct elevator_type *); - void (*exit_sched)(struct elevator_queue *); - int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); - void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); - void (*depth_updated)(struct blk_mq_hw_ctx *); - - bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int); - int (*request_merge)(struct request_queue *q, struct request **, struct bio *); - void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); - void (*requests_merged)(struct request_queue *, struct request *, struct request *); - void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *); - void (*prepare_request)(struct request *); - void (*finish_request)(struct request *); - void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); - struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); - bool (*has_work)(struct blk_mq_hw_ctx *); - void (*completed_request)(struct request *, u64); - void (*requeue_request)(struct request *); - struct request *(*former_request)(struct request_queue *, struct request *); - struct request *(*next_request)(struct request_queue *, struct request *); - void (*init_icq)(struct io_cq *); - void (*exit_icq)(struct io_cq *); -}; - -#define ELV_NAME_MAX (16) - -struct elv_fs_entry { - struct attribute attr; - ssize_t (*show)(struct elevator_queue *, char *); - ssize_t (*store)(struct elevator_queue *, const char *, size_t); -}; - -/* - * identifies an elevator type, such as AS or deadline - */ -struct elevator_type -{ - /* managed by elevator core */ - struct kmem_cache *icq_cache; - - /* fields provided by elevator implementation */ - struct elevator_mq_ops ops; - - size_t icq_size; /* see iocontext.h */ - size_t icq_align; /* ditto */ - struct elv_fs_entry *elevator_attrs; - const char *elevator_name; - const char *elevator_alias; - const unsigned int elevator_features; - struct module *elevator_owner; -#ifdef CONFIG_BLK_DEBUG_FS - const struct blk_mq_debugfs_attr *queue_debugfs_attrs; - const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; -#endif - - /* managed by elevator core */ - char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */ - struct list_head list; -}; - -#define ELV_HASH_BITS 6 - -void elv_rqhash_del(struct request_queue *q, struct request *rq); -void elv_rqhash_add(struct request_queue *q, struct request *rq); -void elv_rqhash_reposition(struct request_queue *q, struct request *rq); -struct request *elv_rqhash_find(struct request_queue *q, sector_t offset); - -/* - * each queue has an elevator_queue associated with it - */ -struct elevator_queue -{ - struct elevator_type *type; - void *elevator_data; - struct kobject kobj; - struct mutex sysfs_lock; - unsigned int registered:1; - DECLARE_HASHTABLE(hash, ELV_HASH_BITS); -}; - -/* - * block elevator interface - */ -extern enum elv_merge elv_merge(struct request_queue *, struct request **, - struct bio *); -extern void elv_merge_requests(struct request_queue *, struct request *, - struct request *); -extern void elv_merged_request(struct request_queue *, struct request *, - enum elv_merge); -extern bool elv_attempt_insert_merge(struct request_queue *, struct request *, - struct list_head *); -extern struct request *elv_former_request(struct request_queue *, struct request *); -extern struct request *elv_latter_request(struct request_queue *, struct request *); -void elevator_init_mq(struct request_queue *q); - -/* - * io scheduler registration - */ -extern int elv_register(struct elevator_type *); -extern void elv_unregister(struct elevator_type *); - -/* - * io scheduler sysfs switching - */ -extern ssize_t elv_iosched_show(struct request_queue *, char *); -extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); - -extern bool elv_bio_merge_ok(struct request *, struct bio *); -extern struct elevator_queue *elevator_alloc(struct request_queue *, - struct elevator_type *); - -/* - * Helper functions. - */ -extern struct request *elv_rb_former_request(struct request_queue *, struct request *); -extern struct request *elv_rb_latter_request(struct request_queue *, struct request *); - -/* - * rb support functions. - */ -extern void elv_rb_add(struct rb_root *, struct request *); -extern void elv_rb_del(struct rb_root *, struct request *); -extern struct request *elv_rb_find(struct rb_root *, sector_t); - -/* - * Insertion selection - */ -#define ELEVATOR_INSERT_FRONT 1 -#define ELEVATOR_INSERT_BACK 2 -#define ELEVATOR_INSERT_SORT 3 -#define ELEVATOR_INSERT_REQUEUE 4 -#define ELEVATOR_INSERT_FLUSH 5 -#define ELEVATOR_INSERT_SORT_MERGE 6 - -#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) -#define rb_entry_rq(node) rb_entry((node), struct request, rb_node) - -#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) -#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist) - -/* - * Elevator features. - */ - -/* Supports zoned block devices sequential write constraint */ -#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) -/* Supports scheduling on multiple hardware queues */ -#define ELEVATOR_F_MQ_AWARE (1U << 1) - -#endif /* CONFIG_BLOCK */ -#endif diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 2aaa15779d50..957ebec35aad 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if defined(CONFIG_UM) || defined(CONFIG_IA64) +#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 928c411bd509..c58d50451485 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -308,7 +308,7 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src) */ static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr) { - ether_addr_copy(dev->dev_addr, addr); + __dev_addr_set(dev, addr, ETH_ALEN); } /** diff --git a/include/linux/filter.h b/include/linux/filter.h index 4a93c12543ee..b956eeb0f9a8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -586,8 +586,10 @@ struct bpf_prog { struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ /* Instructions for interpreter */ - struct sock_filter insns[0]; - struct bpf_insn insnsi[]; + union { + DECLARE_FLEX_ARRAY(struct sock_filter, insns); + DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); + }; }; struct sk_filter { @@ -1051,6 +1053,7 @@ extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; extern long bpf_jit_limit; +extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index aec8f30ab200..07967a450eaa 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -436,6 +436,12 @@ typedef void (*fw_iso_callback_t)(struct fw_iso_context *context, void *header, void *data); typedef void (*fw_iso_mc_callback_t)(struct fw_iso_context *context, dma_addr_t completed, void *data); + +union fw_iso_callback { + fw_iso_callback_t sc; + fw_iso_mc_callback_t mc; +}; + struct fw_iso_context { struct fw_card *card; int type; @@ -443,10 +449,7 @@ struct fw_iso_context { int speed; bool drop_overflow_headers; size_t header_size; - union { - fw_iso_callback_t sc; - fw_iso_mc_callback_t mc; - } callback; + union fw_iso_callback callback; void *callback_data; }; diff --git a/include/linux/firmware/imx/s4.h b/include/linux/firmware/imx/s4.h new file mode 100644 index 000000000000..9e34923ae1d6 --- /dev/null +++ b/include/linux/firmware/imx/s4.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2021 NXP + * + * Header file for the IPC implementation. + */ + +#ifndef _S4_IPC_H +#define _S4_IPC_H + +struct imx_s4_ipc; + +struct imx_s4_rpc_msg { + uint8_t ver; + uint8_t size; + uint8_t cmd; + uint8_t tag; +} __packed; + +#endif /* _S4_IPC_H */ diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 56b426fe020c..4c70a6e2141e 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -123,6 +123,7 @@ enum pm_ioctl_id { IOCTL_READ_PGGS = 15, /* Set healthy bit value */ IOCTL_SET_BOOT_HEALTH_STATUS = 17, + IOCTL_OSPI_MUX_SELECT = 21, }; enum pm_query_id { @@ -351,6 +352,11 @@ enum zynqmp_pm_shutdown_subtype { ZYNQMP_PM_SHUTDOWN_SUBTYPE_SYSTEM = 2, }; +enum ospi_mux_select_type { + PM_OSPI_MUX_SEL_DMA = 0, + PM_OSPI_MUX_SEL_LINEAR = 1, +}; + /** * struct zynqmp_pm_query_data - PM query data * @qid: query ID @@ -387,6 +393,7 @@ int zynqmp_pm_set_pll_frac_data(u32 clk_id, u32 data); int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data); int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value); int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type); +int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select); int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, const enum zynqmp_pm_reset_action assert_flag); int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status); @@ -508,6 +515,11 @@ static inline int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type) return -ENODEV; } +static inline int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select) +{ + return -ENODEV; +} + static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, const enum zynqmp_pm_reset_action assert_flag) { diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index c12df59d3f5f..3e378b1fb0bc 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -83,9 +83,10 @@ struct fprop_local_percpu { int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp); void fprop_local_destroy_percpu(struct fprop_local_percpu *pl); -void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl); -void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl, - int max_frac); +void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, + long nr); +void __fprop_add_percpu_max(struct fprop_global *p, + struct fprop_local_percpu *pl, int max_frac, long nr); void fprop_fraction_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, unsigned long *numerator, unsigned long *denominator); @@ -96,7 +97,7 @@ void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) unsigned long flags; local_irq_save(flags); - __fprop_inc_percpu(p, pl); + __fprop_add_percpu(p, pl, 1); local_irq_restore(flags); } diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index c1be37437e77..a6cd6815f249 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,6 +2,27 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline)) +#define __RENAME(x) __asm__(#x) + +void fortify_panic(const char *name) __noreturn __cold; +void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)"); +void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)"); +void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)"); + +#define __compiletime_strlen(p) \ +({ \ + unsigned char *__p = (unsigned char *)(p); \ + size_t __ret = (size_t)-1; \ + size_t __p_size = __builtin_object_size(p, 1); \ + if (__p_size != (size_t)-1) { \ + size_t __p_len = __p_size - 1; \ + if (__builtin_constant_p(__p[__p_len]) && \ + __p[__p_len] == '\0') \ + __ret = __builtin_strlen(__p); \ + } \ + __ret; \ +}) #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr); @@ -49,28 +70,38 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q) return p; } -__FORTIFY_INLINE __kernel_size_t strlen(const char *p) +extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); +__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) { - __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 1); + size_t p_len = __compiletime_strlen(p); + size_t ret; - /* Work around gcc excess stack consumption issue */ - if (p_size == (size_t)-1 || - (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0')) - return __underlying_strlen(p); - ret = strnlen(p, p_size); - if (p_size <= ret) + /* We can take compile-time actions when maxlen is const. */ + if (__builtin_constant_p(maxlen) && p_len != (size_t)-1) { + /* If p is const, we can use its compile-time-known len. */ + if (maxlen >= p_size) + return p_len; + } + + /* Do not check characters beyond the end of p. */ + ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); + if (p_size <= ret && maxlen != ret) fortify_panic(__func__); return ret; } -extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); -__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) +/* defined after fortified strnlen to reuse it. */ +__FORTIFY_INLINE __kernel_size_t strlen(const char *p) { + __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 1); - __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); - if (p_size <= ret && maxlen != ret) + /* Give up if we don't know how large p is. */ + if (p_size == (size_t)-1) + return __underlying_strlen(p); + ret = strnlen(p, p_size); + if (p_size <= ret) fortify_panic(__func__); return ret; } @@ -79,24 +110,27 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) { - size_t ret; size_t p_size = __builtin_object_size(p, 1); size_t q_size = __builtin_object_size(q, 1); + size_t q_len; /* Full count of source string length. */ + size_t len; /* Count of characters going into destination. */ if (p_size == (size_t)-1 && q_size == (size_t)-1) return __real_strlcpy(p, q, size); - ret = strlen(q); - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - - if (__builtin_constant_p(len) && len >= p_size) + q_len = strlen(q); + len = (q_len >= size) ? size - 1 : q_len; + if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) { + /* Write size is always larger than destination. */ + if (len >= p_size) __write_overflow(); + } + if (size) { if (len >= p_size) fortify_panic(__func__); __underlying_memcpy(p, q, len); p[len] = '\0'; } - return ret; + return q_len; } /* defined after fortified strnlen to reuse it */ @@ -280,7 +314,10 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) if (p_size == (size_t)-1 && q_size == (size_t)-1) return __underlying_strcpy(p, q); size = strlen(q) + 1; - /* test here to use the more stringent object size */ + /* Compile-time check for const size overflow. */ + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + /* Run-time check for dynamic size overflow. */ if (p_size < size) fortify_panic(__func__); memcpy(p, q, size); diff --git a/include/linux/fs.h b/include/linux/fs.h index e7a633353fd2..f3cfca5edc9a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -48,6 +48,7 @@ struct backing_dev_info; struct bdi_writeback; struct bio; +struct io_comp_batch; struct export_operations; struct fiemap_extent_info; struct hd_geometry; @@ -329,16 +330,12 @@ struct kiocb { randomized_struct_fields_start loff_t ki_pos; - void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void (*ki_complete)(struct kiocb *iocb, long ret); void *private; int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - union { - unsigned int ki_cookie; /* for ->iopoll */ - struct wait_page_queue *ki_waitq; /* for async buffered IO */ - }; - + struct wait_page_queue *ki_waitq; /* for async buffered IO */ randomized_struct_fields_end }; @@ -2075,7 +2072,8 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); - int (*iopoll)(struct kiocb *kiocb, bool spin); + int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, + unsigned int flags); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); @@ -2498,6 +2496,8 @@ enum file_time_flags { extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); +int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); + static inline void file_accessed(struct file *file) { if (!(file->f_flags & O_NOATIME)) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index e912ed9141d9..91ea9477e9bd 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -118,9 +118,6 @@ struct fscrypt_operations { */ bool (*empty_dir)(struct inode *inode); - /* The filesystem's maximum ciphertext filename length, in bytes */ - unsigned int max_namelen; - /* * Check whether the filesystem's inode numbers and UUID are stable, * meaning that they will never be changed even by offline operations diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 832e65f06754..9999e29187de 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -30,16 +30,26 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif +#ifdef CONFIG_FUNCTION_TRACER +struct ftrace_ops; +struct ftrace_regs; /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that * does. Or at least does enough to prevent any unwelcome side effects. + * + * Also define the function prototype that these architectures use + * to call the ftrace_ops_list_func(). */ #if !ARCH_SUPPORTS_FTRACE_OPS # define FTRACE_FORCE_LIST_FUNC 1 +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); #else # define FTRACE_FORCE_LIST_FUNC 0 +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); #endif +#endif /* CONFIG_FUNCTION_TRACER */ /* Main tracing buffer and events set up */ #ifdef CONFIG_TRACING @@ -88,8 +98,6 @@ extern int ftrace_enable_sysctl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -struct ftrace_ops; - #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_regs { @@ -316,7 +324,12 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, unsigned long old_addr, unsigned long new_addr); unsigned long ftrace_find_rec_direct(unsigned long ip); +int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); + #else +struct ftrace_ops; # define ftrace_direct_func_count 0 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) { @@ -346,6 +359,18 @@ static inline unsigned long ftrace_find_rec_direct(unsigned long ip) { return 0; } +static inline int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} +static inline int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} +static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS @@ -795,6 +820,15 @@ static inline bool is_ftrace_trampoline(unsigned long addr) } #endif /* CONFIG_DYNAMIC_FTRACE */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifndef ftrace_graph_func +#define ftrace_graph_func ftrace_stub +#define FTRACE_OPS_GRAPH_STUB FTRACE_OPS_FL_STUB +#else +#define FTRACE_OPS_GRAPH_STUB 0 +#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 59828516ebaf..9f4ad719bfe3 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -22,10 +22,15 @@ struct device; * LINKS_ADDED: The fwnode has already be parsed to add fwnode links. * NOT_DEVICE: The fwnode will never be populated as a struct device. * INITIALIZED: The hardware corresponding to fwnode has been initialized. + * NEEDS_CHILD_BOUND_ON_ADD: For this fwnode/device to probe successfully, its + * driver needs its child devices to be bound with + * their respective drivers as soon as they are + * added. */ -#define FWNODE_FLAG_LINKS_ADDED BIT(0) -#define FWNODE_FLAG_NOT_DEVICE BIT(1) -#define FWNODE_FLAG_INITIALIZED BIT(2) +#define FWNODE_FLAG_LINKS_ADDED BIT(0) +#define FWNODE_FLAG_NOT_DEVICE BIT(1) +#define FWNODE_FLAG_INITIALIZED BIT(2) +#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) struct fwnode_handle { struct fwnode_handle *secondary; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c68d83c87f83..59eabbc3a36b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -12,12 +12,10 @@ #include <linux/types.h> #include <linux/kdev_t.h> -#include <linux/rcupdate.h> -#include <linux/slab.h> -#include <linux/percpu-refcount.h> #include <linux/uuid.h> #include <linux/blk_types.h> -#include <asm/local.h> +#include <linux/device.h> +#include <linux/xarray.h> extern const struct device_type disk_type; extern struct device_type part_type; @@ -26,14 +24,6 @@ extern struct class block_class; #define DISK_MAX_PARTS 256 #define DISK_NAME_LEN 32 -#include <linux/major.h> -#include <linux/device.h> -#include <linux/smp.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/workqueue.h> -#include <linux/xarray.h> - #define PARTITION_META_INFO_VOLNAMELTH 64 /* * Enough for the string representation of any kind of UUID plus NULL. @@ -149,6 +139,7 @@ struct gendisk { unsigned long state; #define GD_NEED_PART_SCAN 0 #define GD_READ_ONLY 1 +#define GD_DEAD 2 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -222,6 +213,8 @@ static inline int add_disk(struct gendisk *disk) } extern void del_gendisk(struct gendisk *gp); +void invalidate_disk(struct gendisk *disk); + void set_disk_ro(struct gendisk *disk, bool read_only); static inline int get_disk_ro(struct gendisk *disk) @@ -230,6 +223,11 @@ static inline int get_disk_ro(struct gendisk *disk) test_bit(GD_READ_ONLY, &disk->state); } +static inline int bdev_read_only(struct block_device *bdev) +{ + return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); +} + extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); @@ -247,7 +245,12 @@ static inline sector_t get_start_sect(struct block_device *bdev) static inline sector_t bdev_nr_sectors(struct block_device *bdev) { - return i_size_read(bdev->bd_inode) >> 9; + return bdev->bd_nr_sectors; +} + +static inline loff_t bdev_nr_bytes(struct block_device *bdev) +{ + return bdev_nr_sectors(bdev) << SECTOR_SHIFT; } static inline sector_t get_capacity(struct gendisk *disk) @@ -255,6 +258,12 @@ static inline sector_t get_capacity(struct gendisk *disk) return bdev_nr_sectors(disk->part0); } +static inline u64 sb_bdev_nr_blocks(struct super_block *sb) +{ + return bdev_nr_sectors(sb->s_bdev) >> + (sb->s_blocksize_bits - SECTOR_SHIFT); +} + int bdev_disk_changed(struct gendisk *disk, bool invalidate); void blk_drop_partitions(struct gendisk *disk); @@ -290,10 +299,6 @@ bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void set_capacity(struct gendisk *disk, sector_t size); -/* for drivers/char/raw.c: */ -int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); - #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 55b2ec1f965a..3745efd21cf6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -520,15 +520,11 @@ static inline void arch_free_page(struct page *page, int order) { } #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif -#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -static inline int arch_make_page_accessible(struct page *page) -{ - return 0; -} -#endif struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); +struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, + nodemask_t *nodemask); unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, @@ -570,6 +566,15 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) return __alloc_pages(gfp_mask, order, nid, NULL); } +static inline +struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) +{ + VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); + VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + + return __folio_alloc(gfp, order, nid, NULL); +} + /* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and @@ -586,6 +591,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); +struct folio *folio_alloc(gfp_t gfp, unsigned order); extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, int node, bool hugepage); @@ -596,6 +602,10 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node(numa_node_id(), gfp_mask, order); } +static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) +{ + return __folio_alloc_node(gfp, order, numa_node_id()); +} #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 4aa1031d3e4c..0a0b2b09b1b8 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -73,6 +73,12 @@ static inline void *kmap_local_page(struct page *page) return __kmap_local_page_prot(page, kmap_prot); } +static inline void *kmap_local_folio(struct folio *folio, size_t offset) +{ + struct page *page = folio_page(folio, offset / PAGE_SIZE); + return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE; +} + static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) { return __kmap_local_page_prot(page, prot); @@ -171,6 +177,11 @@ static inline void *kmap_local_page(struct page *page) return page_address(page); } +static inline void *kmap_local_folio(struct folio *folio, size_t offset) +{ + return page_address(&folio->page) + offset; +} + static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) { return kmap_local_page(page); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b4c49f9cc379..27cdd715c5f9 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -97,6 +97,43 @@ static inline void kmap_flush_unused(void); static inline void *kmap_local_page(struct page *page); /** + * kmap_local_folio - Map a page in this folio for temporary usage + * @folio: The folio containing the page. + * @offset: The byte offset within the folio which identifies the page. + * + * Requires careful handling when nesting multiple mappings because the map + * management is stack based. The unmap has to be in the reverse order of + * the map operation:: + * + * addr1 = kmap_local_folio(folio1, offset1); + * addr2 = kmap_local_folio(folio2, offset2); + * ... + * kunmap_local(addr2); + * kunmap_local(addr1); + * + * Unmapping addr1 before addr2 is invalid and causes malfunction. + * + * Contrary to kmap() mappings the mapping is only valid in the context of + * the caller and cannot be handed to other contexts. + * + * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the + * virtual address of the direct mapping. Only real highmem pages are + * temporarily mapped. + * + * While it is significantly faster than kmap() for the higmem case it + * comes with restrictions about the pointer validity. Only use when really + * necessary. + * + * On HIGHMEM enabled systems mapping a highmem page has the side effect of + * disabling migration in order to keep the virtual address stable across + * preemption. No caller of kmap_local_folio() can rely on this side effect. + * + * Context: Can be invoked from any context. + * Return: The virtual address of @offset. + */ +static inline void *kmap_local_folio(struct folio *folio, size_t offset); + +/** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! * @page: Pointer to the page to be mapped * diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f123e15d966e..f280f33ff223 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -251,15 +251,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, } /** - * thp_head - Head page of a transparent huge page. - * @page: Any page (tail, head or regular) found in the page cache. - */ -static inline struct page *thp_head(struct page *page) -{ - return compound_head(page); -} - -/** * thp_order - Order of a transparent huge page. * @page: Head page of a transparent huge page. */ @@ -336,12 +327,6 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) -static inline struct page *thp_head(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return page; -} - static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 694264503119..ada3dd79cd08 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1143,7 +1143,7 @@ struct ieee80211_mgmt { __le16 auth_transaction; __le16 status_code; /* possibly followed by Challenge text */ - u8 variable[0]; + u8 variable[]; } __packed auth; struct { __le16 reason_code; @@ -1152,26 +1152,26 @@ struct ieee80211_mgmt { __le16 capab_info; __le16 listen_interval; /* followed by SSID and Supported rates */ - u8 variable[0]; + u8 variable[]; } __packed assoc_req; struct { __le16 capab_info; __le16 status_code; __le16 aid; /* followed by Supported rates */ - u8 variable[0]; + u8 variable[]; } __packed assoc_resp, reassoc_resp; struct { __le16 capab_info; __le16 status_code; - u8 variable[0]; + u8 variable[]; } __packed s1g_assoc_resp, s1g_reassoc_resp; struct { __le16 capab_info; __le16 listen_interval; u8 current_ap[ETH_ALEN]; /* followed by SSID and Supported rates */ - u8 variable[0]; + u8 variable[]; } __packed reassoc_req; struct { __le16 reason_code; @@ -1182,11 +1182,11 @@ struct ieee80211_mgmt { __le16 capab_info; /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params, TIM */ - u8 variable[0]; + u8 variable[]; } __packed beacon; struct { /* only variable items: SSID, Supported rates */ - u8 variable[0]; + DECLARE_FLEX_ARRAY(u8, variable); } __packed probe_req; struct { __le64 timestamp; @@ -1194,7 +1194,7 @@ struct ieee80211_mgmt { __le16 capab_info; /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params */ - u8 variable[0]; + u8 variable[]; } __packed probe_resp; struct { u8 category; @@ -1203,16 +1203,16 @@ struct ieee80211_mgmt { u8 action_code; u8 dialog_token; u8 status_code; - u8 variable[0]; + u8 variable[]; } __packed wme_action; struct{ u8 action_code; - u8 variable[0]; + u8 variable[]; } __packed chan_switch; struct{ u8 action_code; struct ieee80211_ext_chansw_ie data; - u8 variable[0]; + u8 variable[]; } __packed ext_chan_switch; struct{ u8 action_code; @@ -1228,7 +1228,7 @@ struct ieee80211_mgmt { __le16 timeout; __le16 start_seq_num; /* followed by BA Extension */ - u8 variable[0]; + u8 variable[]; } __packed addba_req; struct{ u8 action_code; @@ -1244,11 +1244,11 @@ struct ieee80211_mgmt { } __packed delba; struct { u8 action_code; - u8 variable[0]; + u8 variable[]; } __packed self_prot; struct{ u8 action_code; - u8 variable[0]; + u8 variable[]; } __packed mesh_action; struct { u8 action; @@ -1292,7 +1292,7 @@ struct ieee80211_mgmt { u8 toa[6]; __le16 tod_error; __le16 toa_error; - u8 variable[0]; + u8 variable[]; } __packed ftm; struct { u8 action_code; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 24f8489583ca..63f4ea4dac9b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -313,8 +313,8 @@ int iomap_writepages(struct address_space *mapping, struct iomap_dio_ops { int (*end_io)(struct kiocb *iocb, ssize_t size, int error, unsigned flags); - blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio, - loff_t file_offset); + void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, + loff_t file_offset); }; /* @@ -337,7 +337,6 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags); ssize_t iomap_dio_complete(struct iomap_dio *dio); -int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP struct file; diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 52850a02a3d0..163831a087ef 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -335,4 +335,7 @@ extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data); #define GET_DEVICE_ID_MAX_RETRY 5 +/* Helper function for computing the IPMB checksum of some data. */ +unsigned char ipmb_checksum(unsigned char *data, int size); + #endif /* __LINUX_IPMI_H */ diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index deec18b8944a..9277d21c2690 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -39,6 +39,59 @@ struct ipmi_smi; #define IPMI_WATCH_MASK_CHECK_COMMANDS (1 << 2) /* + * SMI messages + * + * When communicating with an SMI, messages come in two formats: + * + * * Normal (to a BMC over a BMC interface) + * + * * IPMB (over a IPMB to another MC) + * + * When normal, commands are sent using the format defined by a + * standard message over KCS (NetFn must be even): + * + * +-----------+-----+------+ + * | NetFn/LUN | Cmd | Data | + * +-----------+-----+------+ + * + * And responses, similarly, with an completion code added (NetFn must + * be odd): + * + * +-----------+-----+------+------+ + * | NetFn/LUN | Cmd | CC | Data | + * +-----------+-----+------+------+ + * + * With normal messages, only commands are sent and only responses are + * received. + * + * In IPMB mode, we are acting as an IPMB device. Commands will be in + * the following format (NetFn must be even): + * + * +-------------+------+-------------+-----+------+ + * | NetFn/rsLUN | Addr | rqSeq/rqLUN | Cmd | Data | + * +-------------+------+-------------+-----+------+ + * + * Responses will using the following format: + * + * +-------------+------+-------------+-----+------+------+ + * | NetFn/rqLUN | Addr | rqSeq/rsLUN | Cmd | CC | Data | + * +-------------+------+-------------+-----+------+------+ + * + * This is similar to the format defined in the IPMB manual section + * 2.11.1 with the checksums and the first address removed. Also, the + * address is always the remote address. + * + * IPMB messages can be commands and responses in both directions. + * Received commands are handled as received commands from the message + * queue. + */ + +enum ipmi_smi_msg_type { + IPMI_SMI_MSG_TYPE_NORMAL = 0, + IPMI_SMI_MSG_TYPE_IPMB_DIRECT +}; + +/* * Messages to/from the lower layer. The smi interface will take one * of these to send. After the send has occurred and a response has * been received, it will report this same data structure back up to @@ -54,6 +107,8 @@ struct ipmi_smi; struct ipmi_smi_msg { struct list_head link; + enum ipmi_smi_msg_type type; + long msgid; void *user_data; @@ -73,6 +128,10 @@ struct ipmi_smi_msg { struct ipmi_smi_handlers { struct module *owner; + /* Capabilities of the SMI. */ +#define IPMI_SMI_CAN_HANDLE_IPMB_DIRECT (1 << 0) + unsigned int flags; + /* * The low-level interface cannot start sending messages to * the upper layer until this function is called. This may diff --git a/include/linux/irq.h b/include/linux/irq.h index c8293c817646..848e1e12c5c6 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -524,9 +524,10 @@ struct irq_chip { void (*irq_bus_lock)(struct irq_data *data); void (*irq_bus_sync_unlock)(struct irq_data *data); +#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE void (*irq_cpu_online)(struct irq_data *data); void (*irq_cpu_offline)(struct irq_data *data); - +#endif void (*irq_suspend)(struct irq_data *data); void (*irq_resume)(struct irq_data *data); void (*irq_pm_shutdown)(struct irq_data *data); @@ -606,8 +607,10 @@ struct irqaction; extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); +#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE extern void irq_cpu_online(void); extern void irq_cpu_offline(void); +#endif extern int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask, bool force); extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); @@ -1261,6 +1264,7 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)); * top-level IRQ handler. */ extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; +asmlinkage void generic_handle_arch_irq(struct pt_regs *regs); #else #ifndef set_handle_irq #define set_handle_irq(handle_irq) \ diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index ec2a47a81e42..8cd11a223260 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -3,6 +3,7 @@ #define _LINUX_IRQ_WORK_H #include <linux/smp_types.h> +#include <linux/rcuwait.h> /* * An entry can be in one of four states: @@ -16,11 +17,13 @@ struct irq_work { struct __call_single_node node; void (*func)(struct irq_work *); + struct rcuwait irqwait; }; #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \ .node = { .u_flags = (_flags), }, \ .func = (_func), \ + .irqwait = __RCUWAIT_INITIALIZER(irqwait), \ } #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0) @@ -46,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work) return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY; } +static inline bool irq_work_is_hard(struct irq_work *work) +{ + return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ; +} + bool irq_work_queue(struct irq_work *work); bool irq_work_queue_on(struct irq_work *work, int cpu); diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 67351aac65ef..3a091d0710ae 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -14,8 +14,15 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_irq.h> #include <linux/platform_device.h> +/* Undefined on purpose */ +extern of_irq_init_cb_t typecheck_irq_init_cb; + +#define typecheck_irq_init_cb(fn) \ + (__typecheck(typecheck_irq_init_cb, &fn) ? fn : fn) + /* * This macro must be used by the different irqchip drivers to declare * the association between their DT compatible string and their @@ -23,24 +30,27 @@ * * @name: name that must be unique across all IRQCHIP_DECLARE of the * same file. - * @compstr: compatible string of the irqchip driver + * @compat: compatible string of the irqchip driver * @fn: initialization function */ -#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) +#define IRQCHIP_DECLARE(name, compat, fn) \ + OF_DECLARE_2(irqchip, name, compat, typecheck_irq_init_cb(fn)) extern int platform_irqchip_probe(struct platform_device *pdev); #define IRQCHIP_PLATFORM_DRIVER_BEGIN(drv_name) \ static const struct of_device_id drv_name##_irqchip_match_table[] = { -#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, .data = fn }, +#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, \ + .data = typecheck_irq_init_cb(fn), }, #define IRQCHIP_PLATFORM_DRIVER_END(drv_name) \ {}, \ }; \ MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table); \ -static struct platform_driver drv_name##_driver = { \ - .probe = platform_irqchip_probe, \ +static struct platform_driver drv_name##_driver = { \ + .probe = IS_ENABLED(CONFIG_IRQCHIP) ? \ + platform_irqchip_probe : NULL, \ .driver = { \ .name = #drv_name, \ .owner = THIS_MODULE, \ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 59aea39785bf..93d270ca0c56 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -168,14 +168,7 @@ int generic_handle_irq(unsigned int irq); * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); - -#ifdef CONFIG_HANDLE_DOMAIN_IRQ -int handle_domain_irq(struct irq_domain *domain, - unsigned int hwirq, struct pt_regs *regs); - -int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, - struct pt_regs *regs); -#endif +int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq); #endif /* Test to see if a driver has successfully requested an irq */ diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 23e4ee523576..9ee238ad29ce 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -251,7 +251,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dd874a1ee862..de5f5913374d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -89,7 +89,7 @@ static __always_inline bool kasan_enabled(void) return static_branch_likely(&kasan_flag_enabled); } -static inline bool kasan_has_integrated_init(void) +static inline bool kasan_hw_tags_enabled(void) { return kasan_enabled(); } @@ -104,7 +104,7 @@ static inline bool kasan_enabled(void) return IS_ENABLED(CONFIG_KASAN); } -static inline bool kasan_has_integrated_init(void) +static inline bool kasan_hw_tags_enabled(void) { return false; } @@ -125,6 +125,11 @@ static __always_inline void kasan_free_pages(struct page *page, #endif /* CONFIG_KASAN_HW_TAGS */ +static inline bool kasan_has_integrated_init(void) +{ + return kasan_hw_tags_enabled(); +} + #ifdef CONFIG_KASAN struct kasan_cache { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2776423a587e..e8696e4a45aa 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -111,8 +111,8 @@ static __always_inline void might_resched(void) #endif /* CONFIG_PREEMPT_* */ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -extern void ___might_sleep(const char *file, int line, int preempt_offset); -extern void __might_sleep(const char *file, int line, int preempt_offset); +extern void __might_resched(const char *file, int line, unsigned int offsets); +extern void __might_sleep(const char *file, int line); extern void __cant_sleep(const char *file, int line, int preempt_offset); extern void __cant_migrate(const char *file, int line); @@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line); * supposed to. */ # define might_sleep() \ - do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) + do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) /** * cant_sleep - annotation for functions that cannot sleep * @@ -168,10 +168,9 @@ extern void __cant_migrate(const char *file, int line); */ # define non_block_end() WARN_ON(current->non_block_count-- == 0) #else - static inline void ___might_sleep(const char *file, int line, - int preempt_offset) { } - static inline void __might_sleep(const char *file, int line, - int preempt_offset) { } + static inline void __might_resched(const char *file, int line, + unsigned int offsets) { } +static inline void __might_sleep(const char *file, int line) { } # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) # define cant_migrate() do { } while (0) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 44ae1a7eb9e3..69ae6b278464 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -102,6 +102,7 @@ extern void account_system_index_time(struct task_struct *, u64, enum cpu_usage_stat); extern void account_steal_time(u64); extern void account_idle_time(u64); +extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h deleted file mode 100644 index a27605e2f826..000000000000 --- a/include/linux/keyslot-manager.h +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright 2019 Google LLC - */ - -#ifndef __LINUX_KEYSLOT_MANAGER_H -#define __LINUX_KEYSLOT_MANAGER_H - -#include <linux/bio.h> -#include <linux/blk-crypto.h> - -struct blk_keyslot_manager; - -/** - * struct blk_ksm_ll_ops - functions to manage keyslots in hardware - * @keyslot_program: Program the specified key into the specified slot in the - * inline encryption hardware. - * @keyslot_evict: Evict key from the specified keyslot in the hardware. - * The key is provided so that e.g. dm layers can evict - * keys from the devices that they map over. - * Returns 0 on success, -errno otherwise. - * - * This structure should be provided by storage device drivers when they set up - * a keyslot manager - this structure holds the function ptrs that the keyslot - * manager will use to manipulate keyslots in the hardware. - */ -struct blk_ksm_ll_ops { - int (*keyslot_program)(struct blk_keyslot_manager *ksm, - const struct blk_crypto_key *key, - unsigned int slot); - int (*keyslot_evict)(struct blk_keyslot_manager *ksm, - const struct blk_crypto_key *key, - unsigned int slot); -}; - -struct blk_keyslot_manager { - /* - * The struct blk_ksm_ll_ops that this keyslot manager will use - * to perform operations like programming and evicting keys on the - * device - */ - struct blk_ksm_ll_ops ksm_ll_ops; - - /* - * The maximum number of bytes supported for specifying the data unit - * number. - */ - unsigned int max_dun_bytes_supported; - - /* - * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents - * whether a crypto mode and data unit size are supported. The i'th - * bit of crypto_mode_supported[crypto_mode] is set iff a data unit - * size of (1 << i) is supported. We only support data unit sizes - * that are powers of 2. - */ - unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX]; - - /* Device for runtime power management (NULL if none) */ - struct device *dev; - - /* Here onwards are *private* fields for internal keyslot manager use */ - - unsigned int num_slots; - - /* Protects programming and evicting keys from the device */ - struct rw_semaphore lock; - - /* List of idle slots, with least recently used slot at front */ - wait_queue_head_t idle_slots_wait_queue; - struct list_head idle_slots; - spinlock_t idle_slots_lock; - - /* - * Hash table which maps struct *blk_crypto_key to keyslots, so that we - * can find a key's keyslot in O(1) time rather than O(num_slots). - * Protected by 'lock'. - */ - struct hlist_head *slot_hashtable; - unsigned int log_slot_ht_size; - - /* Per-keyslot data */ - struct blk_ksm_keyslot *slots; -}; - -int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots); - -int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm, - unsigned int num_slots); - -blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, - const struct blk_crypto_key *key, - struct blk_ksm_keyslot **slot_ptr); - -unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot); - -void blk_ksm_put_slot(struct blk_ksm_keyslot *slot); - -bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, - const struct blk_crypto_config *cfg); - -int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, - const struct blk_crypto_key *key); - -void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm); - -void blk_ksm_destroy(struct blk_keyslot_manager *ksm); - -void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, - const struct blk_keyslot_manager *child); - -void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm); - -bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset, - struct blk_keyslot_manager *ksm_subset); - -void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm, - struct blk_keyslot_manager *reference_ksm); - -#endif /* __LINUX_KEYSLOT_MANAGER_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e4f3bfe08757..e974caf39d3e 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -3,7 +3,6 @@ #define _LINUX_KPROBES_H /* * Kernel Probes (KProbes) - * include/linux/kprobes.h * * Copyright (C) IBM Corporation, 2002, 2004 * @@ -39,7 +38,7 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 -#else /* CONFIG_KPROBES */ +#else /* !CONFIG_KPROBES */ #include <asm-generic/kprobes.h> typedef int kprobe_opcode_t; struct arch_specific_insn { @@ -105,25 +104,25 @@ struct kprobe { #define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ /* Has this kprobe gone ? */ -static inline int kprobe_gone(struct kprobe *p) +static inline bool kprobe_gone(struct kprobe *p) { return p->flags & KPROBE_FLAG_GONE; } /* Is this kprobe disabled ? */ -static inline int kprobe_disabled(struct kprobe *p) +static inline bool kprobe_disabled(struct kprobe *p) { return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE); } /* Is this kprobe really running optimized path ? */ -static inline int kprobe_optimized(struct kprobe *p) +static inline bool kprobe_optimized(struct kprobe *p) { return p->flags & KPROBE_FLAG_OPTIMIZED; } /* Is this kprobe uses ftrace ? */ -static inline int kprobe_ftrace(struct kprobe *p) +static inline bool kprobe_ftrace(struct kprobe *p) { return p->flags & KPROBE_FLAG_FTRACE; } @@ -181,14 +180,6 @@ struct kprobe_blacklist_entry { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -/* - * For #ifdef avoidance: - */ -static inline int kprobes_built_in(void) -{ - return 1; -} - extern void kprobe_busy_begin(void); extern void kprobe_busy_end(void); @@ -197,15 +188,26 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr); + +void __kretprobe_trampoline(void); +/* + * Since some architecture uses structured function pointer, + * use dereference_function_descriptor() to get real function address. + */ +static nokprobe_inline void *kretprobe_trampoline_addr(void) +{ + return dereference_kernel_function_descriptor(__kretprobe_trampoline); +} + /* If the trampoline handler called from a kprobe, use this version */ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, - void *trampoline_address, - void *frame_pointer); + void *frame_pointer); static nokprobe_inline unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, - void *trampoline_address, - void *frame_pointer) + void *frame_pointer) { unsigned long ret; /* @@ -214,7 +216,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, * be running at this point. */ kprobe_busy_begin(); - ret = __kretprobe_trampoline_handler(regs, trampoline_address, frame_pointer); + ret = __kretprobe_trampoline_handler(regs, frame_pointer); kprobe_busy_end(); return ret; @@ -228,7 +230,7 @@ static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance return READ_ONCE(ri->rph->rp); } -#else /* CONFIG_KRETPROBES */ +#else /* !CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -239,11 +241,15 @@ static inline int arch_trampoline_kprobe(struct kprobe *p) } #endif /* CONFIG_KRETPROBES */ +/* Markers of '_kprobe_blacklist' section */ +extern unsigned long __start_kprobe_blacklist[]; +extern unsigned long __stop_kprobe_blacklist[]; + extern struct kretprobe_blackpoint kretprobe_blacklist[]; #ifdef CONFIG_KPROBES_SANITY_TEST extern int init_test_probes(void); -#else +#else /* !CONFIG_KPROBES_SANITY_TEST */ static inline int init_test_probes(void) { return 0; @@ -303,7 +309,7 @@ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ #define KPROBE_OPTINSN_PAGE_SYM "kprobe_optinsn_page" int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, unsigned long *value, char *type, char *sym); -#else /* __ARCH_WANT_KPROBES_INSN_SLOT */ +#else /* !__ARCH_WANT_KPROBES_INSN_SLOT */ #define DEFINE_INSN_CACHE_OPS(__name) \ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ { \ @@ -334,7 +340,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list); extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); extern int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr); + kprobe_opcode_t *addr); extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); @@ -345,18 +351,22 @@ extern int sysctl_kprobes_optimization; extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); -#endif +#endif /* CONFIG_SYSCTL */ extern void wait_for_kprobe_optimizer(void); -#else +#else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ + #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); -#endif - -int arch_check_ftrace_location(struct kprobe *p); +#else +static inline int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + return -EINVAL; +} +#endif /* CONFIG_KPROBES_ON_FTRACE */ /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); @@ -364,7 +374,7 @@ struct kprobe *get_kprobe(void *addr); /* kprobe_running() will just return the current_kprobe on this CPU */ static inline struct kprobe *kprobe_running(void) { - return (__this_cpu_read(current_kprobe)); + return __this_cpu_read(current_kprobe); } static inline void reset_current_kprobe(void) @@ -382,7 +392,6 @@ int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); void unregister_kprobes(struct kprobe **kps, int num); -unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); @@ -410,10 +419,6 @@ int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); #else /* !CONFIG_KPROBES: */ -static inline int kprobes_built_in(void) -{ - return 0; -} static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { return 0; @@ -428,11 +433,11 @@ static inline struct kprobe *kprobe_running(void) } static inline int register_kprobe(struct kprobe *p) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int register_kprobes(struct kprobe **kps, int num) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline void unregister_kprobe(struct kprobe *p) { @@ -442,11 +447,11 @@ static inline void unregister_kprobes(struct kprobe **kps, int num) } static inline int register_kretprobe(struct kretprobe *rp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int register_kretprobes(struct kretprobe **rps, int num) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline void unregister_kretprobe(struct kretprobe *rp) { @@ -462,11 +467,11 @@ static inline void kprobe_free_init_mem(void) } static inline int disable_kprobe(struct kprobe *kp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int enable_kprobe(struct kprobe *kp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline bool within_kprobe_blacklist(unsigned long addr) @@ -479,6 +484,7 @@ static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } #endif /* CONFIG_KPROBES */ + static inline int disable_kretprobe(struct kretprobe *rp) { return disable_kprobe(&rp->kp); @@ -493,19 +499,42 @@ static inline bool is_kprobe_insn_slot(unsigned long addr) { return false; } -#endif +#endif /* !CONFIG_KPROBES */ + #ifndef CONFIG_OPTPROBES static inline bool is_kprobe_optinsn_slot(unsigned long addr) { return false; } +#endif /* !CONFIG_OPTPROBES */ + +#ifdef CONFIG_KRETPROBES +static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) +{ + return (void *)addr == kretprobe_trampoline_addr(); +} + +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur); +#else +static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) +{ + return false; +} + +static nokprobe_inline +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur) +{ + return 0; +} #endif /* Returns true if kprobes handled the fault */ static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs, unsigned int trap) { - if (!kprobes_built_in()) + if (!IS_ENABLED(CONFIG_KPROBES)) return false; if (user_mode(regs)) return false; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 161e8164abcf..a38a5bca1ba5 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -52,7 +52,7 @@ struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); -void ksm_migrate_page(struct page *newpage, struct page *oldpage); +void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */ @@ -83,7 +83,7 @@ static inline void rmap_walk_ksm(struct page *page, { } -static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) +static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old) { } #endif /* CONFIG_MMU */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 041ca7f15ea4..0f18df7fe874 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -608,7 +608,6 @@ struct kvm { unsigned long mmu_notifier_range_start; unsigned long mmu_notifier_range_end; #endif - long tlbs_dirty; struct list_head devices; u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; @@ -721,11 +720,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } -static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) -{ - return vcpu->vcpu_idx; -} - #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + slots->used_slots; memslot++) \ diff --git a/include/linux/leds.h b/include/linux/leds.h index a0b730be40ad..ba4861ec73d3 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -360,7 +360,7 @@ struct led_trigger { struct led_hw_trigger_type *trigger_type; /* LEDs under control by this trigger (for simple triggers) */ - rwlock_t leddev_list_lock; + spinlock_t leddev_list_lock; struct list_head led_cdevs; /* Link to next registered trigger */ diff --git a/include/linux/libata.h b/include/linux/libata.h index c0c64f03e107..236ec689056a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -676,6 +676,18 @@ struct ata_ering { struct ata_ering_entry ring[ATA_ERING_SIZE]; }; +struct ata_cpr { + u8 num; + u8 num_storage_elements; + u64 start_lba; + u64 num_lbas; +}; + +struct ata_cpr_log { + u8 nr_cpr; + struct ata_cpr cpr[]; +}; + struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ @@ -735,6 +747,9 @@ struct ata_device { u32 zac_zones_optimal_nonseq; u32 zac_zones_max_open; + /* Concurrent positioning ranges */ + struct ata_cpr_log *cpr_log; + /* error history */ int spdn_cnt; /* ering is CLEAR_END, read comment above CLEAR_END */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 9fe165beb0f9..467b94257105 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -481,23 +481,6 @@ do { \ #endif /* CONFIG_LOCK_STAT */ -#ifdef CONFIG_LOCKDEP - -/* - * On lockdep we dont want the hand-coded irq-enable of - * _raw_*_lock_flags() code, because lockdep assumes - * that interrupts are not re-enabled during lock-acquire: - */ -#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \ - LOCK_CONTENDED((_lock), (try), (lock)) - -#else /* CONFIG_LOCKDEP */ - -#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \ - lockfl((_lock), (flags)) - -#endif /* CONFIG_LOCKDEP */ - #ifdef CONFIG_PROVE_LOCKING extern void print_irqtrace_events(struct task_struct *curr); #else diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 3e726ace5c62..d22430840b53 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -21,7 +21,7 @@ enum lockdep_wait_type { LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ #ifdef CONFIG_PROVE_RAW_LOCK_NESTING - LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ + LD_WAIT_CONFIG, /* preemptible in PREEMPT_RT, spinlock_t etc.. */ #else LD_WAIT_CONFIG = LD_WAIT_SPIN, #endif diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 2adeea44c0d5..a9ac70ae01ab 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -26,13 +26,13 @@ * #undef LSM_HOOK * }; */ -LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr) -LSM_HOOK(int, 0, binder_transaction, struct task_struct *from, - struct task_struct *to) -LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from, - struct task_struct *to) -LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from, - struct task_struct *to, struct file *file) +LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr) +LSM_HOOK(int, 0, binder_transaction, const struct cred *from, + const struct cred *to) +LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, + const struct cred *to) +LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, + const struct cred *to, struct file *file) LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, unsigned int mode) LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) @@ -83,7 +83,8 @@ LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val, LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, - int mode, const struct qstr *name, void **ctx, u32 *ctxlen) + int mode, const struct qstr *name, const char **xattr_name, + void **ctx, u32 *ctxlen) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, struct cred *new) @@ -402,3 +403,8 @@ LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #endif /* CONFIG_PERF_EVENTS */ + +#ifdef CONFIG_IO_URING +LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) +LSM_HOOK(int, 0, uring_sqpoll, void) +#endif /* CONFIG_IO_URING */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 5c4c5c0602cb..0bada4df23fc 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -196,6 +196,9 @@ * @dentry dentry to use in calculating the context. * @mode mode used to determine resource type. * @name name of the last path component used to create file + * @xattr_name pointer to place the pointer to security xattr name. + * Caller does not have to free the resulting pointer. Its + * a pointer to static string. * @ctx pointer to place the pointer to the resulting context in. * @ctxlen point to place the length of the resulting context. * @dentry_create_files_as: @@ -1313,22 +1316,22 @@ * * @binder_set_context_mgr: * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. + * @mgr contains the struct cred for the current binder process. * Return 0 if permission is granted. * @binder_transaction: * Check whether @from is allowed to invoke a binder transaction call * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. + * @from contains the struct cred for the sending process. * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. + * @to contains the struct cred for the receiving process. * * @ptrace_access_check: * Check permission before allowing the current process to trace the @@ -1557,6 +1560,19 @@ * Read perf_event security info if allowed. * @perf_event_write: * Write perf_event security info if allowed. + * + * Security hooks for io_uring + * + * @uring_override_creds: + * Check if the current task, executing an io_uring operation, is allowed + * to override it's credentials with @new. + * + * @new: the new creds to use + * + * @uring_sqpoll: + * Check whether the current task is allowed to spawn a io_uring polling + * thread (IORING_SETUP_SQPOLL). + * */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); diff --git a/include/linux/mdio.h b/include/linux/mdio.h index ffb787d5ebde..5e6dc38f418e 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -80,6 +80,9 @@ struct mdio_driver { /* Clears up any memory if needed */ void (*remove)(struct mdio_device *mdiodev); + + /* Quiesces the device on system shutdown, turns off interrupts etc */ + void (*shutdown)(struct mdio_device *mdiodev); }; static inline struct mdio_driver * diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index 5c4a18a91f89..ae4526389261 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -16,10 +16,6 @@ #include <asm/mem_encrypt.h> -#else /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */ - -static inline bool mem_encrypt_active(void) { return false; } - #endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ #ifdef CONFIG_AMD_MEM_ENCRYPT diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b066024c62e3..34de69b3b8ba 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,6 +118,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); +void memblock_free_ptr(void *ptr, size_t size); void reset_node_managed_pages(pg_data_t *pgdat); void reset_all_zones_managed_pages(void); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3096c9a0ee01..e34bf0cbdf55 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -369,7 +369,7 @@ enum page_memcg_data_flags { #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) -static inline bool PageMemcgKmem(struct page *page); +static inline bool folio_memcg_kmem(struct folio *folio); /* * After the initialization objcg->memcg is always pointing at @@ -384,89 +384,95 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) } /* - * __page_memcg - get the memory cgroup associated with a non-kmem page - * @page: a pointer to the page struct + * __folio_memcg - Get the memory cgroup associated with a non-kmem folio + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages or - * kmem pages. + * against some type of folios, e.g. slab folios or ex-slab folios or + * kmem folios. */ -static inline struct mem_cgroup *__page_memcg(struct page *page) +static inline struct mem_cgroup *__folio_memcg(struct folio *folio) { - unsigned long memcg_data = page->memcg_data; + unsigned long memcg_data = folio->memcg_data; - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio); return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* - * __page_objcg - get the object cgroup associated with a kmem page - * @page: a pointer to the page struct + * __folio_objcg - get the object cgroup associated with a kmem folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the object cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the object cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper object cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages or - * LRU pages. + * against some type of folios, e.g. slab folios or ex-slab folios or + * LRU folios. */ -static inline struct obj_cgroup *__page_objcg(struct page *page) +static inline struct obj_cgroup *__folio_objcg(struct folio *folio) { - unsigned long memcg_data = page->memcg_data; + unsigned long memcg_data = folio->memcg_data; - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio); return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* - * page_memcg - get the memory cgroup associated with a page - * @page: a pointer to the page struct + * folio_memcg - Get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages. + * against some type of folios, e.g. slab folios or ex-slab folios. * - * For a non-kmem page any of the following ensures page and memcg binding + * For a non-kmem folio any of the following ensures folio and memcg binding * stability: * - * - the page lock + * - the folio lock * - LRU isolation * - lock_page_memcg() * - exclusive reference * - * For a kmem page a caller should hold an rcu read lock to protect memcg - * associated with a kmem page from being released. + * For a kmem folio a caller should hold an rcu read lock to protect memcg + * associated with a kmem folio from being released. */ +static inline struct mem_cgroup *folio_memcg(struct folio *folio) +{ + if (folio_memcg_kmem(folio)) + return obj_cgroup_memcg(__folio_objcg(folio)); + return __folio_memcg(folio); +} + static inline struct mem_cgroup *page_memcg(struct page *page) { - if (PageMemcgKmem(page)) - return obj_cgroup_memcg(__page_objcg(page)); - else - return __page_memcg(page); + return folio_memcg(page_folio(page)); } -/* - * page_memcg_rcu - locklessly get the memory cgroup associated with a page - * @page: a pointer to the page struct +/** + * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages. + * against some type of folios, e.g. slab folios or ex-slab folios. + * + * Return: A pointer to the memory cgroup associated with the folio, + * or NULL. */ -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { - unsigned long memcg_data = READ_ONCE(page->memcg_data); + unsigned long memcg_data = READ_ONCE(folio->memcg_data); - VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); WARN_ON_ONCE(!rcu_read_lock_held()); if (memcg_data & MEMCG_DATA_KMEM) { @@ -523,17 +529,18 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) #ifdef CONFIG_MEMCG_KMEM /* - * PageMemcgKmem - check if the page has MemcgKmem flag set - * @page: a pointer to the page struct + * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. + * @folio: Pointer to the folio. * - * Checks if the page has MemcgKmem flag set. The caller must ensure that - * the page has an associated memory cgroup. It's not safe to call this function - * against some types of pages, e.g. slab pages. + * Checks if the folio has MemcgKmem flag set. The caller must ensure + * that the folio has an associated memory cgroup. It's not safe to call + * this function against some types of folios, e.g. slab folios. */ -static inline bool PageMemcgKmem(struct page *page) +static inline bool folio_memcg_kmem(struct folio *folio) { - VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); - return page->memcg_data & MEMCG_DATA_KMEM; + VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page); + VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio); + return folio->memcg_data & MEMCG_DATA_KMEM; } /* @@ -577,7 +584,7 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #else -static inline bool PageMemcgKmem(struct page *page) +static inline bool folio_memcg_kmem(struct folio *folio) { return false; } @@ -593,6 +600,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #endif +static inline bool PageMemcgKmem(struct page *page) +{ + return folio_memcg_kmem(page_folio(page)); +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); @@ -684,26 +696,47 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) page_counter_read(&memcg->memory); } -int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask); -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); + +/** + * mem_cgroup_charge - Charge a newly allocated folio to a cgroup. + * @folio: Folio to charge. + * @mm: mm context of the allocating task. + * @gfp: Reclaim mode. + * + * Try to charge @folio to the memcg that @mm belongs to, reclaiming + * pages according to @gfp if necessary. If @mm is NULL, try to + * charge to the active memcg. + * + * Do not use this for folios allocated for swapin. + * + * Return: 0 on success. Otherwise, an error code is returned. + */ +static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, + gfp_t gfp) { if (mem_cgroup_disabled()) return 0; - return __mem_cgroup_charge(page, mm, gfp_mask); + return __mem_cgroup_charge(folio, mm, gfp); } int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); -void __mem_cgroup_uncharge(struct page *page); -static inline void mem_cgroup_uncharge(struct page *page) +void __mem_cgroup_uncharge(struct folio *folio); + +/** + * mem_cgroup_uncharge - Uncharge a folio. + * @folio: Folio to uncharge. + * + * Uncharge a folio previously charged with mem_cgroup_charge(). + */ +static inline void mem_cgroup_uncharge(struct folio *folio) { if (mem_cgroup_disabled()) return; - __mem_cgroup_uncharge(page); + __mem_cgroup_uncharge(folio); } void __mem_cgroup_uncharge_list(struct list_head *page_list); @@ -714,7 +747,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) __mem_cgroup_uncharge_list(page_list); } -void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); +void mem_cgroup_migrate(struct folio *old, struct folio *new); /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node @@ -753,33 +786,33 @@ out: } /** - * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page - * @page: the page + * folio_lruvec - return lruvec for isolating/putting an LRU folio + * @folio: Pointer to the folio. * - * This function relies on page->mem_cgroup being stable. + * This function relies on folio->mem_cgroup being stable. */ -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec(struct folio *folio) { - pg_data_t *pgdat = page_pgdat(page); - struct mem_cgroup *memcg = page_memcg(page); + struct mem_cgroup *memcg = folio_memcg(folio); - VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); - return mem_cgroup_lruvec(memcg, pgdat); + VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio); + return mem_cgroup_lruvec(memcg, folio_pgdat(folio)); } struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); -struct lruvec *lock_page_lruvec(struct page *page); -struct lruvec *lock_page_lruvec_irq(struct page *page); -struct lruvec *lock_page_lruvec_irqsave(struct page *page, +struct lruvec *folio_lruvec_lock(struct folio *folio); +struct lruvec *folio_lruvec_lock_irq(struct folio *folio); +struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags); #ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page); +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio); #else -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { } #endif @@ -947,6 +980,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); extern bool cgroup_memory_noswap; #endif +void folio_memcg_lock(struct folio *folio); +void folio_memcg_unlock(struct folio *folio); void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); @@ -1115,12 +1150,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, #define MEM_CGROUP_ID_SHIFT 0 #define MEM_CGROUP_ID_MAX 0 +static inline struct mem_cgroup *folio_memcg(struct folio *folio) +{ + return NULL; +} + static inline struct mem_cgroup *page_memcg(struct page *page) { return NULL; } -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { WARN_ON_ONCE(!rcu_read_lock_held()); return NULL; @@ -1131,6 +1171,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; } +static inline bool folio_memcg_kmem(struct folio *folio) +{ + return false; +} + static inline bool PageMemcgKmem(struct page *page) { return false; @@ -1179,8 +1224,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) return false; } -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +static inline int mem_cgroup_charge(struct folio *folio, + struct mm_struct *mm, gfp_t gfp) { return 0; } @@ -1195,7 +1240,7 @@ static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) { } -static inline void mem_cgroup_uncharge(struct page *page) +static inline void mem_cgroup_uncharge(struct folio *folio) { } @@ -1203,7 +1248,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } -static inline void mem_cgroup_migrate(struct page *old, struct page *new) +static inline void mem_cgroup_migrate(struct folio *old, struct folio *new) { } @@ -1213,14 +1258,14 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, return &pgdat->__lruvec; } -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec(struct folio *folio) { - pg_data_t *pgdat = page_pgdat(page); - + struct pglist_data *pgdat = folio_pgdat(folio); return &pgdat->__lruvec; } -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { } @@ -1250,26 +1295,26 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } -static inline struct lruvec *lock_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec_lock(struct folio *folio) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } -static inline struct lruvec *lock_page_lruvec_irq(struct page *page) +static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock_irq(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } -static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page, +static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flagsp) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); return &pgdat->__lruvec; @@ -1356,6 +1401,14 @@ static inline void unlock_page_memcg(struct page *page) { } +static inline void folio_memcg_lock(struct folio *folio) +{ +} + +static inline void folio_memcg_unlock(struct folio *folio) +{ +} + static inline void mem_cgroup_handle_over_high(void) { } @@ -1517,38 +1570,39 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, } /* Test requires a stable page->memcg binding, see page_memcg() */ -static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) +static inline bool folio_matches_lruvec(struct folio *folio, + struct lruvec *lruvec) { - return lruvec_pgdat(lruvec) == page_pgdat(page) && - lruvec_memcg(lruvec) == page_memcg(page); + return lruvec_pgdat(lruvec) == folio_pgdat(folio) && + lruvec_memcg(lruvec) == folio_memcg(folio); } /* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *relock_page_lruvec_irq(struct page *page, +static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio, struct lruvec *locked_lruvec) { if (locked_lruvec) { - if (page_matches_lruvec(page, locked_lruvec)) + if (folio_matches_lruvec(folio, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irq(locked_lruvec); } - return lock_page_lruvec_irq(page); + return folio_lruvec_lock_irq(folio); } /* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, +static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio, struct lruvec *locked_lruvec, unsigned long *flags) { if (locked_lruvec) { - if (page_matches_lruvec(page, locked_lruvec)) + if (folio_matches_lruvec(folio, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irqrestore(locked_lruvec, *flags); } - return lock_page_lruvec_irqsave(page, flags); + return folio_lruvec_lock_irqsave(folio, flags); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -1558,17 +1612,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, unsigned long *pheadroom, unsigned long *pdirty, unsigned long *pwriteback); -void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, +void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb); -static inline void mem_cgroup_track_foreign_dirty(struct page *page, +static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { if (mem_cgroup_disabled()) return; - if (unlikely(&page_memcg(page)->css != wb->memcg_css)) - mem_cgroup_track_foreign_dirty_slowpath(page, wb); + if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) + mem_cgroup_track_foreign_dirty_slowpath(folio, wb); } void mem_cgroup_flush_foreign(struct bdi_writeback *wb); @@ -1588,7 +1642,7 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, { } -static inline void mem_cgroup_track_foreign_dirty(struct page *page, +static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { } diff --git a/include/linux/memory.h b/include/linux/memory.h index 7efc0a7c14c9..182c606adb06 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, #define register_hotmemory_notifier(nb) register_memory_notifier(nb) #define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) #else -#define hotplug_memory_notifier(fn, pri) ({ 0; }) +static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) +{ + return 0; +} /* These aren't inline functions due to a GCC bug. */ #define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) #define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 326250996b4e..0d2aeb9b0f66 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,6 +19,11 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 +/* + * Keep sync with: + * - macro MIGRATE_REASON in include/trace/events/migrate.h + * - migrate_reason_names[MR_TYPES] in mm/debug.c + */ enum migrate_reason { MR_COMPACTION, MR_MEMORY_FAILURE, @@ -32,7 +37,6 @@ enum migrate_reason { MR_TYPES }; -/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION @@ -53,6 +57,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +void folio_migrate_flags(struct folio *newfolio, struct folio *folio); +void folio_migrate_copy(struct folio *newfolio, struct folio *folio); +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count); #else static inline void putback_movable_pages(struct list_head *l) {} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e23417424373..f17d2101af7a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1138,7 +1138,6 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); -bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3638d09ba77..993204a6c1a1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9475,16 +9475,22 @@ struct mlx5_ifc_pcmr_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; + u8 entropy_force_cap[0x1]; u8 entropy_calc_cap[0x1]; u8 entropy_gre_calc_cap[0x1]; - u8 reserved_at_23[0x1b]; + u8 reserved_at_23[0xf]; + u8 rx_ts_over_crc_cap[0x1]; + u8 reserved_at_33[0xb]; u8 fcs_cap[0x1]; u8 reserved_at_3f[0x1]; + u8 entropy_force[0x1]; u8 entropy_calc[0x1]; u8 entropy_gre_calc[0x1]; - u8 reserved_at_43[0x1b]; + u8 reserved_at_43[0xf]; + u8 rx_ts_over_crc[0x1]; + u8 reserved_at_53[0xb]; u8 fcs_chk[0x1]; u8 reserved_at_5f[0x1]; }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f..40ff114aaf9e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -36,10 +36,7 @@ struct mempolicy; struct anon_vma; struct anon_vma_chain; -struct file_ra_state; struct user_struct; -struct writeback_control; -struct bdi_writeback; struct pt_regs; extern int sysctl_page_lock_unfairness; @@ -216,13 +213,6 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -/* - * Any attempt to mark this function as static leads to build failure - * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() - * is referred to by BPF code. This must be visible for error injection. - */ -int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp, void **shadowp); #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) @@ -748,13 +738,18 @@ static inline int put_page_testzero(struct page *page) return page_ref_dec_and_test(page); } +static inline int folio_put_testzero(struct folio *folio) +{ + return put_page_testzero(&folio->page); +} + /* * Try to grab a ref unless the page has a refcount of zero, return false if * that is the case. * This can be called when MMU is off so it must not access * any of the virtual mappings. */ -static inline int get_page_unless_zero(struct page *page) +static inline bool get_page_unless_zero(struct page *page) { return page_ref_add_unless(page, 1, 0); } @@ -907,7 +902,7 @@ void __put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); -void copy_huge_page(struct page *dst, struct page *src); +void folio_copy(struct folio *dst, struct folio *src); /* * Compound pages have a destructor function. Provide a @@ -950,6 +945,20 @@ static inline unsigned int compound_order(struct page *page) return page[1].compound_order; } +/** + * folio_order - The allocation order of a folio. + * @folio: The folio. + * + * A folio is composed of 2^order pages. See get_order() for the definition + * of order. + * + * Return: The order of the folio. + */ +static inline unsigned int folio_order(struct folio *folio) +{ + return compound_order(&folio->page); +} + static inline bool hpage_pincount_available(struct page *page) { /* @@ -1131,6 +1140,11 @@ static inline enum zone_type page_zonenum(const struct page *page) return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } +static inline enum zone_type folio_zonenum(const struct folio *folio) +{ + return page_zonenum(&folio->page); +} + #ifdef CONFIG_ZONE_DEVICE static inline bool is_zone_device_page(const struct page *page) { @@ -1200,18 +1214,26 @@ static inline bool is_pci_p2pdma_page(const struct page *page) } /* 127: arbitrary random number, small enough to assemble well */ -#define page_ref_zero_or_close_to_overflow(page) \ - ((unsigned int) page_ref_count(page) + 127u <= 127u) +#define folio_ref_zero_or_close_to_overflow(folio) \ + ((unsigned int) folio_ref_count(folio) + 127u <= 127u) + +/** + * folio_get - Increment the reference count on a folio. + * @folio: The folio. + * + * Context: May be called in any context, as long as you know that + * you have a refcount on the folio. If you do not already have one, + * folio_try_get() may be the right interface for you to use. + */ +static inline void folio_get(struct folio *folio) +{ + VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio); + folio_ref_inc(folio); +} static inline void get_page(struct page *page) { - page = compound_head(page); - /* - * Getting a normal page or the head of a compound page - * requires to already have an elevated page->_refcount. - */ - VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); - page_ref_inc(page); + folio_get(page_folio(page)); } bool __must_check try_grab_page(struct page *page, unsigned int flags); @@ -1228,9 +1250,28 @@ static inline __must_check bool try_get_page(struct page *page) return true; } +/** + * folio_put - Decrement the reference count on a folio. + * @folio: The folio. + * + * If the folio's reference count reaches zero, the memory will be + * released back to the page allocator and may be used by another + * allocation immediately. Do not access the memory or the struct folio + * after calling folio_put() unless you can be sure that it wasn't the + * last reference. + * + * Context: May be called in process or interrupt context, but not in NMI + * context. May be called while holding a spinlock. + */ +static inline void folio_put(struct folio *folio) +{ + if (folio_put_testzero(folio)) + __put_page(&folio->page); +} + static inline void put_page(struct page *page) { - page = compound_head(page); + struct folio *folio = page_folio(page); /* * For devmap managed pages we need to catch refcount transition from @@ -1238,13 +1279,12 @@ static inline void put_page(struct page *page) * need to inform the device driver through callback. See * include/linux/memremap.h and HMM for details. */ - if (page_is_devmap_managed(page)) { - put_devmap_managed_page(page); + if (page_is_devmap_managed(&folio->page)) { + put_devmap_managed_page(&folio->page); return; } - if (put_page_testzero(page)) - __put_page(page); + folio_put(folio); } /* @@ -1379,6 +1419,11 @@ static inline int page_to_nid(const struct page *page) } #endif +static inline int folio_nid(const struct folio *folio) +{ + return page_to_nid(&folio->page); +} + #ifdef CONFIG_NUMA_BALANCING static inline int cpu_pid_to_cpupid(int cpu, int pid) { @@ -1546,6 +1591,16 @@ static inline pg_data_t *page_pgdat(const struct page *page) return NODE_DATA(page_to_nid(page)); } +static inline struct zone *folio_zone(const struct folio *folio) +{ + return page_zone(&folio->page); +} + +static inline pg_data_t *folio_pgdat(const struct folio *folio) +{ + return page_pgdat(&folio->page); +} + #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { @@ -1559,6 +1614,20 @@ static inline unsigned long page_to_section(const struct page *page) } #endif +/** + * folio_pfn - Return the Page Frame Number of a folio. + * @folio: The folio. + * + * A folio may contain multiple pages. The pages have consecutive + * Page Frame Numbers. + * + * Return: The Page Frame Number of the first page in the folio. + */ +static inline unsigned long folio_pfn(struct folio *folio) +{ + return page_to_pfn(&folio->page); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) @@ -1595,6 +1664,89 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +/** + * folio_nr_pages - The number of pages in the folio. + * @folio: The folio. + * + * Return: A positive power of two. + */ +static inline long folio_nr_pages(struct folio *folio) +{ + return compound_nr(&folio->page); +} + +/** + * folio_next - Move to the next physical folio. + * @folio: The folio we're currently operating on. + * + * If you have physically contiguous memory which may span more than + * one folio (eg a &struct bio_vec), use this function to move from one + * folio to the next. Do not use it if the memory is only virtually + * contiguous as the folios are almost certainly not adjacent to each + * other. This is the folio equivalent to writing ``page++``. + * + * Context: We assume that the folios are refcounted and/or locked at a + * higher level and do not adjust the reference counts. + * Return: The next struct folio. + */ +static inline struct folio *folio_next(struct folio *folio) +{ + return (struct folio *)folio_page(folio, folio_nr_pages(folio)); +} + +/** + * folio_shift - The size of the memory described by this folio. + * @folio: The folio. + * + * A folio represents a number of bytes which is a power-of-two in size. + * This function tells you which power-of-two the folio is. See also + * folio_size() and folio_order(). + * + * Context: The caller should have a reference on the folio to prevent + * it from being split. It is not necessary for the folio to be locked. + * Return: The base-2 logarithm of the size of this folio. + */ +static inline unsigned int folio_shift(struct folio *folio) +{ + return PAGE_SHIFT + folio_order(folio); +} + +/** + * folio_size - The number of bytes in a folio. + * @folio: The folio. + * + * Context: The caller should have a reference on the folio to prevent + * it from being split. It is not necessary for the folio to be locked. + * Return: The number of bytes in this folio. + */ +static inline size_t folio_size(struct folio *folio) +{ + return PAGE_SIZE << folio_order(folio); +} + +#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE +static inline int arch_make_page_accessible(struct page *page) +{ + return 0; +} +#endif + +#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE +static inline int arch_make_folio_accessible(struct folio *folio) +{ + int ret; + long i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + ret = arch_make_page_accessible(folio_page(folio, i)); + if (ret) + break; + } + + return ret; +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ @@ -1635,19 +1787,6 @@ void page_address_init(void); extern void *page_rmapping(struct page *page); extern struct anon_vma *page_anon_vma(struct page *page); -extern struct address_space *page_mapping(struct page *page); - -extern struct address_space *__page_file_mapping(struct page *); - -static inline -struct address_space *page_file_mapping(struct page *page) -{ - if (unlikely(PageSwapCache(page))) - return __page_file_mapping(page); - - return page->mapping; -} - extern pgoff_t __page_file_index(struct page *page); /* @@ -1662,7 +1801,7 @@ static inline pgoff_t page_index(struct page *page) } bool page_mapped(struct page *page); -struct address_space *page_mapping(struct page *page); +bool folio_mapped(struct folio *folio); /* * Return true only if the page has been allocated with @@ -1700,6 +1839,7 @@ extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) #define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) +#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) /* * Flags passed to show_mem() and show_free_areas() to suppress output in @@ -1854,20 +1994,9 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); -int redirty_page_for_writepage(struct writeback_control *wbc, - struct page *page); -void account_page_cleaned(struct page *page, struct address_space *mapping, - struct bdi_writeback *wb); -int set_page_dirty(struct page *page); +bool folio_mark_dirty(struct folio *folio); +bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); -void __cancel_dirty_page(struct page *page); -static inline void cancel_dirty_page(struct page *page) -{ - /* Avoid atomic ops, locking, etc. when not actually needed. */ - if (PageDirty(page)) - __cancel_dirty_page(page); -} -int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); @@ -2659,10 +2788,6 @@ extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); -/* mm/page-writeback.c */ -int __must_check write_one_page(struct page *page); -void task_dirty_inc(struct task_struct *tsk); - extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355ea1ee32bd..e2ec68b0515c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -6,27 +6,33 @@ #include <linux/swap.h> /** - * page_is_file_lru - should the page be on a file LRU or anon LRU? - * @page: the page to test - * - * Returns 1 if @page is a regular filesystem backed page cache page or a lazily - * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal - * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by - * functions that manipulate the LRU lists, to sort a page onto the right LRU - * list. + * folio_is_file_lru - Should the folio be on a file LRU or anon LRU? + * @folio: The folio to test. * * We would like to get this info without a page flag, but the state - * needs to survive until the page is last deleted from the LRU, which + * needs to survive until the folio is last deleted from the LRU, which * could be as far down as __page_cache_release. + * + * Return: An integer (not a boolean!) used to sort a folio onto the + * right LRU list and to account folios correctly. + * 1 if @folio is a regular filesystem backed page cache folio + * or a lazily freed anonymous folio (e.g. via MADV_FREE). + * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise + * ram or swap backed folio. */ +static inline int folio_is_file_lru(struct folio *folio) +{ + return !folio_test_swapbacked(folio); +} + static inline int page_is_file_lru(struct page *page) { - return !PageSwapBacked(page); + return folio_is_file_lru(page_folio(page)); } static __always_inline void update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, - int nr_pages) + long nr_pages) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -39,69 +45,94 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, } /** - * __clear_page_lru_flags - clear page lru flags before releasing a page - * @page: the page that was on lru and now has a zero reference + * __folio_clear_lru_flags - Clear page lru flags before releasing a page. + * @folio: The folio that was on lru and now has a zero reference. */ -static __always_inline void __clear_page_lru_flags(struct page *page) +static __always_inline void __folio_clear_lru_flags(struct folio *folio) { - VM_BUG_ON_PAGE(!PageLRU(page), page); + VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio); - __ClearPageLRU(page); + __folio_clear_lru(folio); /* this shouldn't happen, so leave the flags to bad_page() */ - if (PageActive(page) && PageUnevictable(page)) + if (folio_test_active(folio) && folio_test_unevictable(folio)) return; - __ClearPageActive(page); - __ClearPageUnevictable(page); + __folio_clear_active(folio); + __folio_clear_unevictable(folio); +} + +static __always_inline void __clear_page_lru_flags(struct page *page) +{ + __folio_clear_lru_flags(page_folio(page)); } /** - * page_lru - which LRU list should a page be on? - * @page: the page to test + * folio_lru_list - Which LRU list should a folio be on? + * @folio: The folio to test. * - * Returns the LRU list a page should be on, as an index + * Return: The LRU list a folio should be on, as an index * into the array of LRU lists. */ -static __always_inline enum lru_list page_lru(struct page *page) +static __always_inline enum lru_list folio_lru_list(struct folio *folio) { enum lru_list lru; - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); - if (PageUnevictable(page)) + if (folio_test_unevictable(folio)) return LRU_UNEVICTABLE; - lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; - if (PageActive(page)) + lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + if (folio_test_active(folio)) lru += LRU_ACTIVE; return lru; } +static __always_inline +void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) +{ + enum lru_list lru = folio_lru_list(folio); + + update_lru_size(lruvec, lru, folio_zonenum(folio), + folio_nr_pages(folio)); + list_add(&folio->lru, &lruvec->lists[lru]); +} + static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec) { - enum lru_list lru = page_lru(page); + lruvec_add_folio(lruvec, page_folio(page)); +} - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add(&page->lru, &lruvec->lists[lru]); +static __always_inline +void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) +{ + enum lru_list lru = folio_lru_list(folio); + + update_lru_size(lruvec, lru, folio_zonenum(folio), + folio_nr_pages(folio)); + list_add_tail(&folio->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, struct lruvec *lruvec) { - enum lru_list lru = page_lru(page); + lruvec_add_folio_tail(lruvec, page_folio(page)); +} - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add_tail(&page->lru, &lruvec->lists[lru]); +static __always_inline +void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) +{ + list_del(&folio->lru); + update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio), + -folio_nr_pages(folio)); } static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec) { - list_del(&page->lru); - update_lru_size(lruvec, page_lru(page), page_zonenum(page), - -thp_nr_pages(page)); + lruvec_del_folio(lruvec, page_folio(page)); } #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f8ee09c711f..f92686cf2cdb 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -12,6 +12,7 @@ #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/uprobes.h> +#include <linux/rcupdate.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <linux/seqlock.h> @@ -239,6 +240,72 @@ struct page { #endif } _struct_page_alignment; +/** + * struct folio - Represents a contiguous set of bytes. + * @flags: Identical to the page flags. + * @lru: Least Recently Used list; tracks how recently this folio was used. + * @mapping: The file this page belongs to, or refers to the anon_vma for + * anonymous memory. + * @index: Offset within the file, in units of pages. For anonymous memory, + * this is the index from the beginning of the mmap. + * @private: Filesystem per-folio data (see folio_attach_private()). + * Used for swp_entry_t if folio_test_swapcache(). + * @_mapcount: Do not access this member directly. Use folio_mapcount() to + * find out how many times this folio is mapped by userspace. + * @_refcount: Do not access this member directly. Use folio_ref_count() + * to find how many references there are to this folio. + * @memcg_data: Memory Control Group data. + * + * A folio is a physically, virtually and logically contiguous set + * of bytes. It is a power-of-two in size, and it is aligned to that + * same power-of-two. It is at least as large as %PAGE_SIZE. If it is + * in the page cache, it is at a file offset which is a multiple of that + * power-of-two. It may be mapped into userspace at an address which is + * at an arbitrary page offset, but its kernel virtual address is aligned + * to its size. + */ +struct folio { + /* private: don't document the anon union */ + union { + struct { + /* public: */ + unsigned long flags; + struct list_head lru; + struct address_space *mapping; + pgoff_t index; + void *private; + atomic_t _mapcount; + atomic_t _refcount; +#ifdef CONFIG_MEMCG + unsigned long memcg_data; +#endif + /* private: the union with struct page is transitional */ + }; + struct page page; + }; +}; + +static_assert(sizeof(struct page) == sizeof(struct folio)); +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) +FOLIO_MATCH(flags, flags); +FOLIO_MATCH(lru, lru); +FOLIO_MATCH(compound_head, lru); +FOLIO_MATCH(index, index); +FOLIO_MATCH(private, private); +FOLIO_MATCH(_mapcount, _mapcount); +FOLIO_MATCH(_refcount, _refcount); +#ifdef CONFIG_MEMCG +FOLIO_MATCH(memcg_data, memcg_data); +#endif +#undef FOLIO_MATCH + +static inline atomic_t *folio_mapcount_ptr(struct folio *folio) +{ + struct page *tail = &folio->page + 1; + return &tail->compound_mapcount; +} + static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; @@ -257,6 +324,12 @@ static inline atomic_t *compound_pincount_ptr(struct page *page) #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) +/* + * page_private can be used on tail pages. However, PagePrivate is only + * checked by the VM on the head page. So page_private on the tail pages + * should be used for data that's ancillary to the head page (eg attaching + * buffer heads to tail pages after attaching buffer heads to the head page) + */ #define page_private(page) ((page)->private) static inline void set_page_private(struct page *page, unsigned long private) @@ -264,6 +337,11 @@ static inline void set_page_private(struct page *page, unsigned long private) page->private = private; } +static inline void *folio_get_private(struct folio *folio) +{ + return folio->private; +} + struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) @@ -572,6 +650,9 @@ struct mm_struct { bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; +#ifdef CONFIG_PREEMPT_RT + struct rcu_head delayed_drop; +#endif #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index b179f1e3541a..96e113e23d04 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm) up_read(&mm->mmap_lock); } -static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) -{ - if (mmap_read_trylock(mm)) { - rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); - return true; - } - return false; -} - static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { __mmap_lock_trace_released(mm, false); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0c0c9a0fdf57..7afb57cab00b 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -15,7 +15,7 @@ #include <linux/mmc/card.h> #include <linux/mmc/pm.h> #include <linux/dma-direction.h> -#include <linux/keyslot-manager.h> +#include <linux/blk-crypto-profile.h> struct mmc_ios { unsigned int clock; /* clock rate */ @@ -162,6 +162,9 @@ struct mmc_host_ops { /* Prepare HS400 target operating frequency depending host driver */ int (*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios); + /* Execute HS400 tuning depending host driver */ + int (*execute_hs400_tuning)(struct mmc_host *host, struct mmc_card *card); + /* Prepare switch to DDR during the HS400 init sequence */ int (*hs400_prepare_ddr)(struct mmc_host *host); @@ -492,7 +495,7 @@ struct mmc_host { /* Inline encryption support */ #ifdef CONFIG_MMC_CRYPTO - struct blk_keyslot_manager ksm; + struct blk_crypto_profile crypto_profile; #endif /* Host Software Queue support */ @@ -634,5 +637,6 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data) int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); +int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); #endif /* LINUX_MMC_HOST_H */ diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h deleted file mode 100644 index 1d42872d22f3..000000000000 --- a/include/linux/mmc/sdhci-pci-data.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LINUX_MMC_SDHCI_PCI_DATA_H -#define LINUX_MMC_SDHCI_PCI_DATA_H - -struct pci_dev; - -struct sdhci_pci_data { - struct pci_dev *pdev; - int slotno; - int rst_n_gpio; /* Set to -EINVAL if unused */ - int cd_gpio; /* Set to -EINVAL if unused */ - int (*setup)(struct sdhci_pci_data *data); - void (*cleanup)(struct sdhci_pci_data *data); -}; - -extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, - int slotno); -#endif diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 1935d4c72d10..d7285f8148a3 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -22,6 +22,13 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_BUG_ON_FOLIO(cond, folio) \ + do { \ + if (unlikely(cond)) { \ + dump_page(&folio->page, "VM_BUG_ON_FOLIO(" __stringify(cond)")");\ + BUG(); \ + } \ + } while (0) #define VM_BUG_ON_VMA(cond, vma) \ do { \ if (unlikely(cond)) { \ @@ -47,6 +54,17 @@ void dump_mm(const struct mm_struct *mm); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(&folio->page, "VM_WARN_ON_ONCE_FOLIO(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) @@ -55,11 +73,13 @@ void dump_mm(const struct mm_struct *mm); #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) +#define VM_BUG_ON_FOLIO(cond, folio) VM_BUG_ON(cond) #define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond) #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d6a4158a9a6..12c4177f7703 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -22,6 +22,7 @@ * Overload PG_private_2 to give us PG_fscache - this is used to indicate that * a page is currently backed by a local disk cache */ +#define folio_test_fscache(folio) folio_test_private_2(folio) #define PageFsCache(page) PagePrivate2((page)) #define SetPageFsCache(page) SetPagePrivate2((page)) #define ClearPageFsCache(page) ClearPagePrivate2((page)) @@ -29,60 +30,80 @@ #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) /** - * set_page_fscache - Set PG_fscache on a page and take a ref - * @page: The page. + * folio_start_fscache - Start an fscache write on a folio. + * @folio: The folio. * - * Set the PG_fscache (PG_private_2) flag on a page and take the reference - * needed for the VM to handle its lifetime correctly. This sets the flag and - * takes the reference unconditionally, so care must be taken not to set the - * flag again if it's already set. + * Call this function before writing a folio to a local cache. Starting a + * second write before the first one finishes is not allowed. */ -static inline void set_page_fscache(struct page *page) +static inline void folio_start_fscache(struct folio *folio) { - set_page_private_2(page); + VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); + folio_get(folio); + folio_set_private_2(folio); } /** - * end_page_fscache - Clear PG_fscache and release any waiters - * @page: The page - * - * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers - * waiting for this. The page ref held for PG_private_2 being set is released. + * folio_end_fscache - End an fscache write on a folio. + * @folio: The folio. * - * This is, for example, used when a netfs page is being written to a local - * disk cache, thereby allowing writes to the cache for the same page to be - * serialised. + * Call this function after the folio has been written to the local cache. + * This will wake any sleepers waiting on this folio. */ -static inline void end_page_fscache(struct page *page) +static inline void folio_end_fscache(struct folio *folio) { - end_page_private_2(page); + folio_end_private_2(folio); } /** - * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page - * @page: The page to wait on + * folio_wait_fscache - Wait for an fscache write on this folio to end. + * @folio: The folio. * - * Wait for PG_fscache (aka PG_private_2) to be cleared on a page. + * If this folio is currently being written to a local cache, wait for + * the write to finish. Another write may start after this one finishes, + * unless the caller holds the folio lock. */ -static inline void wait_on_page_fscache(struct page *page) +static inline void folio_wait_fscache(struct folio *folio) { - wait_on_page_private_2(page); + folio_wait_private_2(folio); } /** - * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page - * @page: The page to wait on + * folio_wait_fscache_killable - Wait for an fscache write on this folio to end. + * @folio: The folio. * - * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a - * fatal signal is received by the calling task. + * If this folio is currently being written to a local cache, wait + * for the write to finish or for a fatal signal to be received. + * Another write may start after this one finishes, unless the caller + * holds the folio lock. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ +static inline int folio_wait_fscache_killable(struct folio *folio) +{ + return folio_wait_private_2_killable(folio); +} + +static inline void set_page_fscache(struct page *page) +{ + folio_start_fscache(page_folio(page)); +} + +static inline void end_page_fscache(struct page *page) +{ + folio_end_private_2(page_folio(page)); +} + +static inline void wait_on_page_fscache(struct page *page) +{ + folio_wait_private_2(page_folio(page)); +} + static inline int wait_on_page_fscache_killable(struct page *page) { - return wait_on_page_private_2_killable(page); + return folio_wait_private_2_killable(page_folio(page)); } enum netfs_read_source { diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 2a38f2b477a5..cb909edb76c4 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -7,6 +7,7 @@ #define _NVME_FC_DRIVER_H 1 #include <linux/scatterlist.h> +#include <linux/blk-mq.h> /* @@ -497,6 +498,8 @@ struct nvme_fc_port_template { int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *rport, struct nvmefc_ls_rsp *ls_rsp); + void (*map_queues)(struct nvme_fc_local_port *localport, + struct blk_mq_queue_map *map); u32 max_hw_queues; u16 max_sgl_segments; @@ -779,6 +782,10 @@ struct nvmet_fc_target_port { * LS received. * Entrypoint is Mandatory. * + * @map_queues: This functions lets the driver expose the queue mapping + * to the block layer. + * Entrypoint is Optional. + * * @fcp_op: Called to perform a data transfer or transmit a response. * The nvmefc_tgt_fcp_req structure is the same LLDD-supplied * exchange structure specified in the nvmet_fc_rcv_fcp_req() call diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 3ec8e50efa16..4dd7e6fe92fb 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -6,6 +6,8 @@ #ifndef _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H +#define NVME_RDMA_MAX_QUEUE_SIZE 128 + enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, }; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b7c4c4130b65..855dd9b3e84b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -27,8 +27,20 @@ #define NVME_NSID_ALL 0xffffffff enum nvme_subsys_type { - NVME_NQN_DISC = 1, /* Discovery type target subsystem */ - NVME_NQN_NVME = 2, /* NVME type target subsystem */ + /* Referral to another discovery type target subsystem */ + NVME_NQN_DISC = 1, + + /* NVME type target subsystem */ + NVME_NQN_NVME = 2, + + /* Current discovery type target subsystem */ + NVME_NQN_CURR = 3, +}; + +enum nvme_ctrl_type { + NVME_CTRL_IO = 1, /* I/O controller */ + NVME_CTRL_DISC = 2, /* Discovery controller */ + NVME_CTRL_ADMIN = 3, /* Administrative controller */ }; /* Address Family codes for Discovery Log Page entry ADRFAM field */ @@ -244,7 +256,9 @@ struct nvme_id_ctrl { __le32 rtd3e; __le32 oaes; __le32 ctratt; - __u8 rsvd100[28]; + __u8 rsvd100[11]; + __u8 cntrltype; + __u8 fguid[16]; __le16 crdt1; __le16 crdt2; __le16 crdt3; @@ -312,6 +326,7 @@ struct nvme_id_ctrl { }; enum { + NVME_CTRL_CMIC_MULTI_PORT = 1 << 0, NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1, NVME_CTRL_CMIC_ANA = 1 << 3, NVME_CTRL_ONCS_COMPARE = 1 << 0, @@ -1303,6 +1318,12 @@ struct nvmf_common_command { #define MAX_DISC_LOGS 255 +/* Discovery log page entry flags (EFLAGS): */ +enum { + NVME_DISC_EFLAGS_EPCSD = (1 << 1), + NVME_DISC_EFLAGS_DUPRETINFO = (1 << 0), +}; + /* Discovery log page entry */ struct nvmf_disc_rsp_page_entry { __u8 trtype; @@ -1312,7 +1333,8 @@ struct nvmf_disc_rsp_page_entry { __le16 portid; __le16 cntlid; __le16 asqsz; - __u8 resv8[22]; + __le16 eflags; + __u8 resv10[20]; char trsvcid[NVMF_TRSVCID_SIZE]; __u8 resv64[192]; char subnqn[NVMF_NQN_FIELD_LEN]; diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 923dada24eb4..c0c0cefc3b92 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -150,6 +150,20 @@ static inline int nvmem_cell_read_u64(struct device *dev, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_variable_le_u32(struct device *dev, + const char *cell_id, + u32 *val) +{ + return -EOPNOTSUPP; +} + +static inline int nvmem_cell_read_variable_le_u64(struct device *dev, + const char *cell_id, + u64 *val) +{ + return -EOPNOTSUPP; +} + static inline struct nvmem_device *nvmem_device_get(struct device *dev, const char *name) { diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 7e72d975cb76..aca52db2f3f3 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -66,6 +66,17 @@ struct unwind_hint { static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func +/* + * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore + * for the case where a function is intentionally missing frame pointer setup, + * but otherwise needs objtool/ORC coverage when frame pointers are disabled. + */ +#ifdef CONFIG_FRAME_POINTER +#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func) +#else +#define STACK_FRAME_NON_STANDARD_FP(func) +#endif + #else /* __ASSEMBLY__ */ /* @@ -127,6 +138,7 @@ struct unwind_hint { #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#define STACK_FRAME_NON_STANDARD_FP(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index cf6a65b94d40..cf48983d3c86 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -65,7 +65,6 @@ extern int early_init_dt_scan_memory(unsigned long node, const char *uname, extern int early_init_dt_scan_chosen_stdout(void); extern void early_init_fdt_scan_reserved_mem(void); extern void early_init_fdt_reserve_self(void); -extern void __init early_init_dt_scan_chosen_arch(unsigned long node); extern void early_init_dt_add_memory_arch(u64 base, u64 size); extern u64 dt_mem_next_cell(int s, const __be32 **cellp); diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0f12345c21fb..4669632bd72b 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -6,12 +6,9 @@ #include <linux/limits.h> /* - * In the fallback code below, we need to compute the minimum and - * maximum values representable in a given type. These macros may also - * be useful elsewhere, so we provide them outside the - * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. - * - * It would seem more obvious to do something like + * We need to compute the minimum and maximum values representable in a given + * type. These macros may also be useful elsewhere. It would seem more obvious + * to do something like: * * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) @@ -54,7 +51,6 @@ static inline bool __must_check __must_check_overflow(bool overflow) return unlikely(overflow); } -#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* * For simplicity and code hygiene, the fallback code below insists on * a, b and *d having the same type (similar to the min() and max() @@ -90,134 +86,6 @@ static inline bool __must_check __must_check_overflow(bool overflow) __builtin_mul_overflow(__a, __b, __d); \ })) -#else - - -/* Checking for unsigned overflow is relatively easy without causing UB. */ -#define __unsigned_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a + __b; \ - *__d < __a; \ -}) -#define __unsigned_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a - __b; \ - __a < __b; \ -}) -/* - * If one of a or b is a compile-time constant, this avoids a division. - */ -#define __unsigned_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a * __b; \ - __builtin_constant_p(__b) ? \ - __b > 0 && __a > type_max(typeof(__a)) / __b : \ - __a > 0 && __b > type_max(typeof(__b)) / __a; \ -}) - -/* - * For signed types, detecting overflow is much harder, especially if - * we want to avoid UB. But the interface of these macros is such that - * we must provide a result in *d, and in fact we must produce the - * result promised by gcc's builtins, which is simply the possibly - * wrapped-around value. Fortunately, we can just formally do the - * operations in the widest relevant unsigned type (u64) and then - * truncate the result - gcc is smart enough to generate the same code - * with and without the (u64) casts. - */ - -/* - * Adding two signed integers can overflow only if they have the same - * sign, and overflow has happened iff the result has the opposite - * sign. - */ -#define __signed_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a + (u64)__b; \ - (((~(__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Subtraction is similar, except that overflow can now happen only - * when the signs are opposite. In this case, overflow has happened if - * the result has the opposite sign of a. - */ -#define __signed_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a - (u64)__b; \ - ((((__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Signed multiplication is rather hard. gcc always follows C99, so - * division is truncated towards 0. This means that we can write the - * overflow check like this: - * - * (a > 0 && (b > MAX/a || b < MIN/a)) || - * (a < -1 && (b > MIN/a || b < MAX/a) || - * (a == -1 && b == MIN) - * - * The redundant casts of -1 are to silence an annoying -Wtype-limits - * (included in -Wextra) warning: When the type is u8 or u16, the - * __b_c_e in check_mul_overflow obviously selects - * __unsigned_mul_overflow, but unfortunately gcc still parses this - * code and warns about the limited range of __b. - */ - -#define __signed_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - typeof(a) __tmax = type_max(typeof(a)); \ - typeof(a) __tmin = type_min(typeof(a)); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a * (u64)__b; \ - (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ - (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ - (__b == (typeof(__b))-1 && __a == __tmin); \ -}) - - -#define check_add_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_add_overflow(a, b, d), \ - __unsigned_add_overflow(a, b, d))) - -#define check_sub_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_sub_overflow(a, b, d), \ - __unsigned_sub_overflow(a, b, d))) - -#define check_mul_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d))) - -#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ - /** check_shl_overflow() - Calculate a left-shifted value and check overflow * * @a: Value to be shifted diff --git a/include/linux/packing.h b/include/linux/packing.h index 54667735cc67..8d6571feb95d 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2016-2018, NXP Semiconductors + * Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com> */ #ifndef _LINUX_PACKING_H diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a558d67ee86f..981341a3c3c4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -143,6 +143,8 @@ enum pageflags { #endif __NR_PAGEFLAGS, + PG_readahead = PG_reclaim, + /* Filesystems */ PG_checked = PG_owner_priv_1, @@ -171,6 +173,15 @@ enum pageflags { /* Compound pages. Stored in first tail page's flags */ PG_double_map = PG_workingset, +#ifdef CONFIG_MEMORY_FAILURE + /* + * Compound pages. Stored in first tail page's flags. + * Indicates that at least one subpage is hwpoisoned in the + * THP. + */ + PG_has_hwpoisoned = PG_mappedtodisk, +#endif + /* non-lru isolated movable page */ PG_isolated = PG_reclaim, @@ -193,6 +204,34 @@ static inline unsigned long _compound_head(const struct page *page) #define compound_head(page) ((typeof(page))_compound_head(page)) +/** + * page_folio - Converts from page to folio. + * @p: The page. + * + * Every page is part of a folio. This function cannot be called on a + * NULL pointer. + * + * Context: No reference, nor lock is required on @page. If the caller + * does not hold a reference, this call may race with a folio split, so + * it should re-check the folio still contains this page after gaining + * a reference on the folio. + * Return: The folio which contains this page. + */ +#define page_folio(p) (_Generic((p), \ + const struct page *: (const struct folio *)_compound_head(p), \ + struct page *: (struct folio *)_compound_head(p))) + +/** + * folio_page - Return a page from a folio. + * @folio: The folio. + * @n: The page number to return. + * + * @n is relative to the start of the folio. This function does not + * check that the page number lies within @folio; the caller is presumed + * to have a reference to the page. + */ +#define folio_page(folio, n) nth_page(&(folio)->page, n) + static __always_inline int PageTail(struct page *page) { return READ_ONCE(page->compound_head) & 1; @@ -217,6 +256,15 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static unsigned long *folio_flags(struct folio *folio, unsigned n) +{ + struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + /* * Page flags policies wrt compound pages * @@ -261,36 +309,64 @@ static inline void page_init_poison(struct page *page, size_t size) VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ PF_POISONED_CHECK(&page[1]); }) +/* Which page is the flag stored in */ +#define FOLIO_PF_ANY 0 +#define FOLIO_PF_HEAD 0 +#define FOLIO_PF_ONLY_HEAD 0 +#define FOLIO_PF_NO_TAIL 0 +#define FOLIO_PF_NO_COMPOUND 0 +#define FOLIO_PF_SECOND 1 + /* * Macros to create function definitions for page flags */ #define TESTPAGEFLAG(uname, lname, policy) \ +static __always_inline bool folio_test_##lname(struct folio *folio) \ +{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int Page##uname(struct page *page) \ - { return test_bit(PG_##lname, &policy(page, 0)->flags); } +{ return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_set_##lname(struct folio *folio) \ +{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void SetPage##uname(struct page *page) \ - { set_bit(PG_##lname, &policy(page, 1)->flags); } +{ set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_clear_##lname(struct folio *folio) \ +{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void ClearPage##uname(struct page *page) \ - { clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_set_##lname(struct folio *folio) \ +{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __SetPage##uname(struct page *page) \ - { __set_bit(PG_##lname, &policy(page, 1)->flags); } +{ __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_clear_##lname(struct folio *folio) \ +{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __ClearPage##uname(struct page *page) \ - { __clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_set_##lname(struct folio *folio) \ +{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestSetPage##uname(struct page *page) \ - { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_clear_##lname(struct folio *folio) \ +{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestClearPage##uname(struct page *page) \ - { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ @@ -306,29 +382,37 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) -#define TESTPAGEFLAG_FALSE(uname) \ +#define TESTPAGEFLAG_FALSE(uname, lname) \ +static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \ static inline int Page##uname(const struct page *page) { return 0; } -#define SETPAGEFLAG_NOOP(uname) \ +#define SETPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_set_##lname(struct folio *folio) { } \ static inline void SetPage##uname(struct page *page) { } -#define CLEARPAGEFLAG_NOOP(uname) \ +#define CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_clear_##lname(struct folio *folio) { } \ static inline void ClearPage##uname(struct page *page) { } -#define __CLEARPAGEFLAG_NOOP(uname) \ +#define __CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void __folio_clear_##lname(struct folio *folio) { } \ static inline void __ClearPage##uname(struct page *page) { } -#define TESTSETFLAG_FALSE(uname) \ +#define TESTSETFLAG_FALSE(uname, lname) \ +static inline bool folio_test_set_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestSetPage##uname(struct page *page) { return 0; } -#define TESTCLEARFLAG_FALSE(uname) \ +#define TESTCLEARFLAG_FALSE(uname, lname) \ +static inline bool folio_test_clear_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestClearPage##uname(struct page *page) { return 0; } -#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \ - SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname) +#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ + SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname) -#define TESTSCFLAG_FALSE(uname) \ - TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) +#define TESTSCFLAG_FALSE(uname, lname) \ + TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) @@ -384,8 +468,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) -PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) - TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND) +PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) + TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) #ifdef CONFIG_HIGHMEM /* @@ -394,22 +478,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) #else -PAGEFLAG_FALSE(HighMem) +PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP -static __always_inline int PageSwapCache(struct page *page) +static __always_inline bool folio_test_swapcache(struct folio *folio) { -#ifdef CONFIG_THP_SWAP - page = compound_head(page); -#endif - return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags); + return folio_test_swapbacked(folio) && + test_bit(PG_swapcache, folio_flags(folio, 0)); +} +static __always_inline bool PageSwapCache(struct page *page) +{ + return folio_test_swapcache(page_folio(page)); } + SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) #else -PAGEFLAG_FALSE(SwapCache) +PAGEFLAG_FALSE(SwapCache, swapcache) #endif PAGEFLAG(Unevictable, unevictable, PF_HEAD) @@ -421,14 +508,14 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) #else -PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked) - TESTSCFLAG_FALSE(Mlocked) +PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) + TESTSCFLAG_FALSE(Mlocked, mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) #else -PAGEFLAG_FALSE(Uncached) +PAGEFLAG_FALSE(Uncached, uncached) #endif #ifdef CONFIG_MEMORY_FAILURE @@ -437,7 +524,7 @@ TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) extern bool take_page_off_buddy(struct page *page); #else -PAGEFLAG_FALSE(HWPoison) +PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif @@ -451,7 +538,7 @@ PAGEFLAG(Idle, idle, PF_ANY) #ifdef CONFIG_KASAN_HW_TAGS PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) #else -PAGEFLAG_FALSE(SkipKASanPoison) +PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison) #endif /* @@ -489,10 +576,14 @@ static __always_inline int PageMappingFlags(struct page *page) return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageAnon(struct page *page) +static __always_inline bool folio_test_anon(struct folio *folio) +{ + return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; +} + +static __always_inline bool PageAnon(struct page *page) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; + return folio_test_anon(page_folio(page)); } static __always_inline int __PageMovable(struct page *page) @@ -508,30 +599,32 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline int PageKsm(struct page *page) +static __always_inline bool folio_test_ksm(struct folio *folio) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == + return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } + +static __always_inline bool PageKsm(struct page *page) +{ + return folio_test_ksm(page_folio(page)); +} #else -TESTPAGEFLAG_FALSE(Ksm) +TESTPAGEFLAG_FALSE(Ksm, ksm) #endif u64 stable_page_flags(struct page *page); -static inline int PageUptodate(struct page *page) +static inline bool folio_test_uptodate(struct folio *folio) { - int ret; - page = compound_head(page); - ret = test_bit(PG_uptodate, &(page)->flags); + bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); /* - * Must ensure that the data we read out of the page is loaded - * _after_ we've loaded page->flags to check for PageUptodate. - * We can skip the barrier if the page is not uptodate, because + * Must ensure that the data we read out of the folio is loaded + * _after_ we've loaded folio->flags to check the uptodate bit. + * We can skip the barrier if the folio is not uptodate, because * we wouldn't be reading anything from it. * - * See SetPageUptodate() for the other side of the story. + * See folio_mark_uptodate() for the other side of the story. */ if (ret) smp_rmb(); @@ -539,47 +632,71 @@ static inline int PageUptodate(struct page *page) return ret; } -static __always_inline void __SetPageUptodate(struct page *page) +static inline int PageUptodate(struct page *page) +{ + return folio_test_uptodate(page_folio(page)); +} + +static __always_inline void __folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); smp_wmb(); - __set_bit(PG_uptodate, &page->flags); + __set_bit(PG_uptodate, folio_flags(folio, 0)); } -static __always_inline void SetPageUptodate(struct page *page) +static __always_inline void folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); /* * Memory barrier must be issued before setting the PG_uptodate bit, - * so that all previous stores issued in order to bring the page - * uptodate are actually visible before PageUptodate becomes true. + * so that all previous stores issued in order to bring the folio + * uptodate are actually visible before folio_test_uptodate becomes true. */ smp_wmb(); - set_bit(PG_uptodate, &page->flags); + set_bit(PG_uptodate, folio_flags(folio, 0)); +} + +static __always_inline void __SetPageUptodate(struct page *page) +{ + __folio_mark_uptodate((struct folio *)page); +} + +static __always_inline void SetPageUptodate(struct page *page) +{ + folio_mark_uptodate((struct folio *)page); } CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -int test_clear_page_writeback(struct page *page); -int __test_set_page_writeback(struct page *page, bool keep_write); +bool __folio_start_writeback(struct folio *folio, bool keep_write); +bool set_page_writeback(struct page *page); -#define test_set_page_writeback(page) \ - __test_set_page_writeback(page, false) -#define test_set_page_writeback_keepwrite(page) \ - __test_set_page_writeback(page, true) +#define folio_start_writeback(folio) \ + __folio_start_writeback(folio, false) +#define folio_start_writeback_keepwrite(folio) \ + __folio_start_writeback(folio, true) -static inline void set_page_writeback(struct page *page) +static inline void set_page_writeback_keepwrite(struct page *page) { - test_set_page_writeback(page); + folio_start_writeback_keepwrite(page_folio(page)); } -static inline void set_page_writeback_keepwrite(struct page *page) +static inline bool test_set_page_writeback(struct page *page) { - test_set_page_writeback_keepwrite(page); + return set_page_writeback(page); } __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) +/* Whether there are one or multiple pages in a folio */ +static inline bool folio_test_single(struct folio *folio) +{ + return !folio_test_head(folio); +} + +static inline bool folio_test_multi(struct folio *folio) +{ + return folio_test_head(folio); +} + static __always_inline void set_compound_head(struct page *page, struct page *head) { WRITE_ONCE(page->compound_head, (unsigned long)head + 1); @@ -603,12 +720,15 @@ static inline void ClearPageCompound(struct page *page) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); int PageHeadHuge(struct page *page); +static inline bool folio_test_hugetlb(struct folio *folio) +{ + return PageHeadHuge(&folio->page); +} #else -TESTPAGEFLAG_FALSE(Huge) -TESTPAGEFLAG_FALSE(HeadHuge) +TESTPAGEFLAG_FALSE(Huge, hugetlb) +TESTPAGEFLAG_FALSE(HeadHuge, headhuge) #endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -624,6 +744,11 @@ static inline int PageTransHuge(struct page *page) return PageHead(page); } +static inline bool folio_test_transhuge(struct folio *folio) +{ + return folio_test_head(folio); +} + /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known @@ -660,12 +785,26 @@ static inline int PageTransTail(struct page *page) PAGEFLAG(DoubleMap, double_map, PF_SECOND) TESTSCFLAG(DoubleMap, double_map, PF_SECOND) #else -TESTPAGEFLAG_FALSE(TransHuge) -TESTPAGEFLAG_FALSE(TransCompound) -TESTPAGEFLAG_FALSE(TransCompoundMap) -TESTPAGEFLAG_FALSE(TransTail) -PAGEFLAG_FALSE(DoubleMap) - TESTSCFLAG_FALSE(DoubleMap) +TESTPAGEFLAG_FALSE(TransHuge, transhuge) +TESTPAGEFLAG_FALSE(TransCompound, transcompound) +TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) +TESTPAGEFLAG_FALSE(TransTail, transtail) +PAGEFLAG_FALSE(DoubleMap, double_map) + TESTSCFLAG_FALSE(DoubleMap, double_map) +#endif + +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +/* + * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the + * compound page. + * + * This flag is set by hwpoison handler. Cleared by THP split or free page. + */ +PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) + TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) +#else +PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) + TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* @@ -849,6 +988,11 @@ static inline int page_has_private(struct page *page) return !!(page->flags & PAGE_FLAGS_PRIVATE); } +static inline bool folio_has_private(struct folio *folio) +{ + return page_has_private(&folio->page); +} + #undef PF_ANY #undef PF_HEAD #undef PF_ONLY_HEAD diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index d8a6aecf99cb..83abf95e9fa7 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -8,46 +8,16 @@ #ifdef CONFIG_PAGE_IDLE_FLAG -#ifdef CONFIG_64BIT -static inline bool page_is_young(struct page *page) -{ - return PageYoung(page); -} - -static inline void set_page_young(struct page *page) -{ - SetPageYoung(page); -} - -static inline bool test_and_clear_page_young(struct page *page) -{ - return TestClearPageYoung(page); -} - -static inline bool page_is_idle(struct page *page) -{ - return PageIdle(page); -} - -static inline void set_page_idle(struct page *page) -{ - SetPageIdle(page); -} - -static inline void clear_page_idle(struct page *page) -{ - ClearPageIdle(page); -} -#else /* !CONFIG_64BIT */ +#ifndef CONFIG_64BIT /* * If there is not enough space to store Idle and Young bits in page flags, use * page ext flags instead. */ extern struct page_ext_operations page_idle_ops; -static inline bool page_is_young(struct page *page) +static inline bool folio_test_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -55,9 +25,9 @@ static inline bool page_is_young(struct page *page) return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline void set_page_young(struct page *page) +static inline void folio_set_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; @@ -65,9 +35,9 @@ static inline void set_page_young(struct page *page) set_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline bool test_and_clear_page_young(struct page *page) +static inline bool folio_test_clear_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -75,9 +45,9 @@ static inline bool test_and_clear_page_young(struct page *page) return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline bool page_is_idle(struct page *page) +static inline bool folio_test_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -85,9 +55,9 @@ static inline bool page_is_idle(struct page *page) return test_bit(PAGE_EXT_IDLE, &page_ext->flags); } -static inline void set_page_idle(struct page *page) +static inline void folio_set_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; @@ -95,46 +65,75 @@ static inline void set_page_idle(struct page *page) set_bit(PAGE_EXT_IDLE, &page_ext->flags); } -static inline void clear_page_idle(struct page *page) +static inline void folio_clear_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } -#endif /* CONFIG_64BIT */ +#endif /* !CONFIG_64BIT */ #else /* !CONFIG_PAGE_IDLE_FLAG */ -static inline bool page_is_young(struct page *page) +static inline bool folio_test_young(struct folio *folio) { return false; } -static inline void set_page_young(struct page *page) +static inline void folio_set_young(struct folio *folio) { } -static inline bool test_and_clear_page_young(struct page *page) +static inline bool folio_test_clear_young(struct folio *folio) { return false; } -static inline bool page_is_idle(struct page *page) +static inline bool folio_test_idle(struct folio *folio) { return false; } -static inline void set_page_idle(struct page *page) +static inline void folio_set_idle(struct folio *folio) { } -static inline void clear_page_idle(struct page *page) +static inline void folio_clear_idle(struct folio *folio) { } #endif /* CONFIG_PAGE_IDLE_FLAG */ +static inline bool page_is_young(struct page *page) +{ + return folio_test_young(page_folio(page)); +} + +static inline void set_page_young(struct page *page) +{ + folio_set_young(page_folio(page)); +} + +static inline bool test_and_clear_page_young(struct page *page) +{ + return folio_test_clear_young(page_folio(page)); +} + +static inline bool page_is_idle(struct page *page) +{ + return folio_test_idle(page_folio(page)); +} + +static inline void set_page_idle(struct page *page) +{ + folio_set_idle(page_folio(page)); +} + +static inline void clear_page_idle(struct page *page) +{ + folio_clear_idle(page_folio(page)); +} #endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 719bfe5108c5..43c638c51c1f 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -12,7 +12,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order); extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); extern void __split_page_owner(struct page *page, unsigned int nr); -extern void __copy_page_owner(struct page *oldpage, struct page *newpage); +extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, @@ -36,10 +36,10 @@ static inline void split_page_owner(struct page *page, unsigned int nr) if (static_branch_unlikely(&page_owner_inited)) __split_page_owner(page, nr); } -static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) { if (static_branch_unlikely(&page_owner_inited)) - __copy_page_owner(oldpage, newpage); + __folio_copy_owner(newfolio, old); } static inline void set_page_owner_migrate_reason(struct page *page, int reason) { @@ -63,7 +63,7 @@ static inline void split_page_owner(struct page *page, unsigned int order) { } -static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) { } static inline void set_page_owner_migrate_reason(struct page *page, int reason) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 7ad46f45df39..2e677e6ad09f 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -67,9 +67,31 @@ static inline int page_ref_count(const struct page *page) return atomic_read(&page->_refcount); } +/** + * folio_ref_count - The reference count on this folio. + * @folio: The folio. + * + * The refcount is usually incremented by calls to folio_get() and + * decremented by calls to folio_put(). Some typical users of the + * folio refcount: + * + * - Each reference from a page table + * - The page cache + * - Filesystem private data + * - The LRU list + * - Pipes + * - Direct IO which references this page in the process address space + * + * Return: The number of references to this folio. + */ +static inline int folio_ref_count(const struct folio *folio) +{ + return page_ref_count(&folio->page); +} + static inline int page_count(const struct page *page) { - return atomic_read(&compound_head(page)->_refcount); + return folio_ref_count(page_folio(page)); } static inline void set_page_count(struct page *page, int v) @@ -79,6 +101,11 @@ static inline void set_page_count(struct page *page, int v) __page_ref_set(page, v); } +static inline void folio_set_count(struct folio *folio, int v) +{ + set_page_count(&folio->page, v); +} + /* * Setup the page count before being freed into the page allocator for * the first time (boot or memory hotplug) @@ -95,6 +122,11 @@ static inline void page_ref_add(struct page *page, int nr) __page_ref_mod(page, nr); } +static inline void folio_ref_add(struct folio *folio, int nr) +{ + page_ref_add(&folio->page, nr); +} + static inline void page_ref_sub(struct page *page, int nr) { atomic_sub(nr, &page->_refcount); @@ -102,6 +134,11 @@ static inline void page_ref_sub(struct page *page, int nr) __page_ref_mod(page, -nr); } +static inline void folio_ref_sub(struct folio *folio, int nr) +{ + page_ref_sub(&folio->page, nr); +} + static inline int page_ref_sub_return(struct page *page, int nr) { int ret = atomic_sub_return(nr, &page->_refcount); @@ -111,6 +148,11 @@ static inline int page_ref_sub_return(struct page *page, int nr) return ret; } +static inline int folio_ref_sub_return(struct folio *folio, int nr) +{ + return page_ref_sub_return(&folio->page, nr); +} + static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); @@ -118,6 +160,11 @@ static inline void page_ref_inc(struct page *page) __page_ref_mod(page, 1); } +static inline void folio_ref_inc(struct folio *folio) +{ + page_ref_inc(&folio->page); +} + static inline void page_ref_dec(struct page *page) { atomic_dec(&page->_refcount); @@ -125,6 +172,11 @@ static inline void page_ref_dec(struct page *page) __page_ref_mod(page, -1); } +static inline void folio_ref_dec(struct folio *folio) +{ + page_ref_dec(&folio->page); +} + static inline int page_ref_sub_and_test(struct page *page, int nr) { int ret = atomic_sub_and_test(nr, &page->_refcount); @@ -134,6 +186,11 @@ static inline int page_ref_sub_and_test(struct page *page, int nr) return ret; } +static inline int folio_ref_sub_and_test(struct folio *folio, int nr) +{ + return page_ref_sub_and_test(&folio->page, nr); +} + static inline int page_ref_inc_return(struct page *page) { int ret = atomic_inc_return(&page->_refcount); @@ -143,6 +200,11 @@ static inline int page_ref_inc_return(struct page *page) return ret; } +static inline int folio_ref_inc_return(struct folio *folio) +{ + return page_ref_inc_return(&folio->page); +} + static inline int page_ref_dec_and_test(struct page *page) { int ret = atomic_dec_and_test(&page->_refcount); @@ -152,6 +214,11 @@ static inline int page_ref_dec_and_test(struct page *page) return ret; } +static inline int folio_ref_dec_and_test(struct folio *folio) +{ + return page_ref_dec_and_test(&folio->page); +} + static inline int page_ref_dec_return(struct page *page) { int ret = atomic_dec_return(&page->_refcount); @@ -161,15 +228,91 @@ static inline int page_ref_dec_return(struct page *page) return ret; } -static inline int page_ref_add_unless(struct page *page, int nr, int u) +static inline int folio_ref_dec_return(struct folio *folio) +{ + return page_ref_dec_return(&folio->page); +} + +static inline bool page_ref_add_unless(struct page *page, int nr, int u) { - int ret = atomic_add_unless(&page->_refcount, nr, u); + bool ret = atomic_add_unless(&page->_refcount, nr, u); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); return ret; } +static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) +{ + return page_ref_add_unless(&folio->page, nr, u); +} + +/** + * folio_try_get - Attempt to increase the refcount on a folio. + * @folio: The folio. + * + * If you do not already have a reference to a folio, you can attempt to + * get one using this function. It may fail if, for example, the folio + * has been freed since you found a pointer to it, or it is frozen for + * the purposes of splitting or migration. + * + * Return: True if the reference count was successfully incremented. + */ +static inline bool folio_try_get(struct folio *folio) +{ + return folio_ref_add_unless(folio, 1, 0); +} + +static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) +{ +#ifdef CONFIG_TINY_RCU + /* + * The caller guarantees the folio will not be freed from interrupt + * context, so (on !SMP) we only need preemption to be disabled + * and TINY_RCU does that for us. + */ +# ifdef CONFIG_PREEMPT_COUNT + VM_BUG_ON(!in_atomic() && !irqs_disabled()); +# endif + VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); + folio_ref_add(folio, count); +#else + if (unlikely(!folio_ref_add_unless(folio, count, 0))) { + /* Either the folio has been freed, or will be freed. */ + return false; + } +#endif + return true; +} + +/** + * folio_try_get_rcu - Attempt to increase the refcount on a folio. + * @folio: The folio. + * + * This is a version of folio_try_get() optimised for non-SMP kernels. + * If you are still holding the rcu_read_lock() after looking up the + * page and know that the page cannot have its refcount decreased to + * zero in interrupt context, you can use this instead of folio_try_get(). + * + * Example users include get_user_pages_fast() (as pages are not unmapped + * from interrupt context) and the page cache lookups (as pages are not + * truncated from interrupt context). We also know that pages are not + * frozen in interrupt context for the purposes of splitting or migration. + * + * You can also use this function if you're holding a lock that prevents + * pages being frozen & removed; eg the i_pages lock for the page cache + * or the mmap_sem or page table lock for page tables. In this case, + * it will always succeed, and you could have used a plain folio_get(), + * but it's sometimes more convenient to have a common function called + * from both locked and RCU-protected contexts. + * + * Return: True if the reference count was successfully incremented. + */ +static inline bool folio_try_get_rcu(struct folio *folio) +{ + return folio_ref_try_add_rcu(folio, 1); +} + static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); @@ -179,6 +322,11 @@ static inline int page_ref_freeze(struct page *page, int count) return ret; } +static inline int folio_ref_freeze(struct folio *folio, int count) +{ + return page_ref_freeze(&folio->page, count); +} + static inline void page_ref_unfreeze(struct page *page, int count) { VM_BUG_ON_PAGE(page_count(page) != 0, page); @@ -189,4 +337,8 @@ static inline void page_ref_unfreeze(struct page *page, int count) __page_ref_unfreeze(page, count); } +static inline void folio_ref_unfreeze(struct folio *folio, int count) +{ + page_ref_unfreeze(&folio->page, count); +} #endif diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 62db6b0176b9..013cdc90f5fd 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -162,149 +162,119 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) void release_pages(struct page **pages, int nr); -/* - * For file cache pages, return the address_space, otherwise return NULL +struct address_space *page_mapping(struct page *); +struct address_space *folio_mapping(struct folio *); +struct address_space *swapcache_mapping(struct folio *); + +/** + * folio_file_mapping - Find the mapping this folio belongs to. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Folios in the swap cache return the mapping of the + * swap file or swap device where the data is stored. This is different + * from the mapping returned by folio_mapping(). The only reason to + * use it is if, like NFS, you return 0 from ->activate_swapfile. + * + * Do not call this for folios which aren't in the page cache or swap cache. */ -static inline struct address_space *page_mapping_file(struct page *page) +static inline struct address_space *folio_file_mapping(struct folio *folio) { - if (unlikely(PageSwapCache(page))) - return NULL; - return page_mapping(page); + if (unlikely(folio_test_swapcache(folio))) + return swapcache_mapping(folio); + + return folio->mapping; +} + +static inline struct address_space *page_file_mapping(struct page *page) +{ + return folio_file_mapping(page_folio(page)); } /* - * speculatively take a reference to a page. - * If the page is free (_refcount == 0), then _refcount is untouched, and 0 - * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned. - * - * This function must be called inside the same rcu_read_lock() section as has - * been used to lookup the page in the pagecache radix-tree (or page table): - * this allows allocators to use a synchronize_rcu() to stabilize _refcount. - * - * Unless an RCU grace period has passed, the count of all pages coming out - * of the allocator must be considered unstable. page_count may return higher - * than expected, and put_page must be able to do the right thing when the - * page has been finished with, no matter what it is subsequently allocated - * for (because put_page is what is used here to drop an invalid speculative - * reference). - * - * This is the interesting part of the lockless pagecache (and lockless - * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page) - * has the following pattern: - * 1. find page in radix tree - * 2. conditionally increment refcount - * 3. check the page is still in pagecache (if no, goto 1) - * - * Remove-side that cares about stability of _refcount (eg. reclaim) has the - * following (with the i_pages lock held): - * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) - * B. remove page from pagecache - * C. free the page - * - * There are 2 critical interleavings that matter: - * - 2 runs before A: in this case, A sees elevated refcount and bails out - * - A runs before 2: in this case, 2 sees zero refcount and retries; - * subsequently, B will complete and 1 will find no page, causing the - * lookup to return NULL. - * - * It is possible that between 1 and 2, the page is removed then the exact same - * page is inserted into the same position in pagecache. That's OK: the - * old find_get_page using a lock could equally have run before or after - * such a re-insertion, depending on order that locks are granted. - * - * Lookups racing against pagecache insertion isn't a big problem: either 1 - * will find the page or it will not. Likewise, the old find_get_page could run - * either before the insertion or afterwards, depending on timing. + * For file cache pages, return the address_space, otherwise return NULL */ -static inline int __page_cache_add_speculative(struct page *page, int count) +static inline struct address_space *page_mapping_file(struct page *page) { -#ifdef CONFIG_TINY_RCU -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - /* - * Preempt must be disabled here - we rely on rcu_read_lock doing - * this for us. - * - * Pagecache won't be truncated from interrupt context, so if we have - * found a page in the radix tree here, we have pinned its refcount by - * disabling preempt, and hence no need for the "speculative get" that - * SMP requires. - */ - VM_BUG_ON_PAGE(page_count(page) == 0, page); - page_ref_add(page, count); + struct folio *folio = page_folio(page); -#else - if (unlikely(!page_ref_add_unless(page, count, 0))) { - /* - * Either the page has been freed, or will be freed. - * In either case, retry here and the caller should - * do the right thing (see comments above). - */ - return 0; - } -#endif - VM_BUG_ON_PAGE(PageTail(page), page); - - return 1; + if (unlikely(folio_test_swapcache(folio))) + return NULL; + return folio_mapping(folio); } -static inline int page_cache_get_speculative(struct page *page) +static inline bool page_cache_add_speculative(struct page *page, int count) { - return __page_cache_add_speculative(page, 1); + VM_BUG_ON_PAGE(PageTail(page), page); + return folio_ref_try_add_rcu((struct folio *)page, count); } -static inline int page_cache_add_speculative(struct page *page, int count) +static inline bool page_cache_get_speculative(struct page *page) { - return __page_cache_add_speculative(page, count); + return page_cache_add_speculative(page, 1); } /** - * attach_page_private - Attach private data to a page. - * @page: Page to attach data to. - * @data: Data to attach to page. + * folio_attach_private - Attach private data to a folio. + * @folio: Folio to attach data to. + * @data: Data to attach to folio. * - * Attaching private data to a page increments the page's reference count. - * The data must be detached before the page will be freed. + * Attaching private data to a folio increments the page's reference count. + * The data must be detached before the folio will be freed. */ -static inline void attach_page_private(struct page *page, void *data) +static inline void folio_attach_private(struct folio *folio, void *data) { - get_page(page); - set_page_private(page, (unsigned long)data); - SetPagePrivate(page); + folio_get(folio); + folio->private = data; + folio_set_private(folio); } /** - * detach_page_private - Detach private data from a page. - * @page: Page to detach data from. + * folio_detach_private - Detach private data from a folio. + * @folio: Folio to detach data from. * - * Removes the data that was previously attached to the page and decrements + * Removes the data that was previously attached to the folio and decrements * the refcount on the page. * - * Return: Data that was attached to the page. + * Return: Data that was attached to the folio. */ -static inline void *detach_page_private(struct page *page) +static inline void *folio_detach_private(struct folio *folio) { - void *data = (void *)page_private(page); + void *data = folio_get_private(folio); - if (!PagePrivate(page)) + if (!folio_test_private(folio)) return NULL; - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); + folio_clear_private(folio); + folio->private = NULL; + folio_put(folio); return data; } +static inline void attach_page_private(struct page *page, void *data) +{ + folio_attach_private(page_folio(page), data); +} + +static inline void *detach_page_private(struct page *page) +{ + return folio_detach_private(page_folio(page)); +} + #ifdef CONFIG_NUMA -extern struct page *__page_cache_alloc(gfp_t gfp); +struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); #else -static inline struct page *__page_cache_alloc(gfp_t gfp) +static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) { - return alloc_pages(gfp, 0); + return folio_alloc(gfp, order); } #endif +static inline struct page *__page_cache_alloc(gfp_t gfp) +{ + return &filemap_alloc_folio(gfp, 0)->page; +} + static inline struct page *page_cache_alloc(struct address_space *x) { return __page_cache_alloc(mapping_gfp_mask(x)); @@ -331,9 +301,28 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_FOR_MMAP 0x00000040 #define FGP_HEAD 0x00000080 #define FGP_ENTRY 0x00000100 +#define FGP_STABLE 0x00000200 -struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, - int fgp_flags, gfp_t cache_gfp_mask); +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp); +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp); + +/** + * filemap_get_folio - Find and get a folio. + * @mapping: The address_space to search. + * @index: The page index. + * + * Looks up the page cache entry at @mapping & @index. If a folio is + * present, it is returned with an increased refcount. + * + * Otherwise, %NULL is returned. + */ +static inline struct folio *filemap_get_folio(struct address_space *mapping, + pgoff_t index) +{ + return __filemap_get_folio(mapping, index, 0, 0); +} /** * find_get_page - find and get a page reference @@ -377,25 +366,6 @@ static inline struct page *find_lock_page(struct address_space *mapping, } /** - * find_lock_head - Locate, pin and lock a pagecache page. - * @mapping: The address_space to search. - * @index: The page index. - * - * Looks up the page cache entry at @mapping & @index. If there is a - * page cache page, its head page is returned locked and with an increased - * refcount. - * - * Context: May sleep. - * Return: A struct page which is !PageTail, or %NULL if there is no page - * in the cache for this index. - */ -static inline struct page *find_lock_head(struct address_space *mapping, - pgoff_t index) -{ - return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0); -} - -/** * find_or_create_page - locate or add a pagecache page * @mapping: the page's address_space * @index: the page's index into the mapping @@ -452,6 +422,73 @@ static inline bool thp_contains(struct page *head, pgoff_t index) return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); } +#define swapcache_index(folio) __page_file_index(&(folio)->page) + +/** + * folio_index - File index of a folio. + * @folio: The folio. + * + * For a folio which is either in the page cache or the swap cache, + * return its index within the address_space it belongs to. If you know + * the page is definitely in the page cache, you can look at the folio's + * index directly. + * + * Return: The index (offset in units of pages) of a folio in its file. + */ +static inline pgoff_t folio_index(struct folio *folio) +{ + if (unlikely(folio_test_swapcache(folio))) + return swapcache_index(folio); + return folio->index; +} + +/** + * folio_next_index - Get the index of the next folio. + * @folio: The current folio. + * + * Return: The index of the folio which follows this folio in the file. + */ +static inline pgoff_t folio_next_index(struct folio *folio) +{ + return folio->index + folio_nr_pages(folio); +} + +/** + * folio_file_page - The page for a particular index. + * @folio: The folio which contains this index. + * @index: The index we want to look up. + * + * Sometimes after looking up a folio in the page cache, we need to + * obtain the specific page for an index (eg a page fault). + * + * Return: The page containing the file data for this index. + */ +static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (folio_test_hugetlb(folio)) + return &folio->page; + return folio_page(folio, index & (folio_nr_pages(folio) - 1)); +} + +/** + * folio_contains - Does this folio contain this index? + * @folio: The folio. + * @index: The page index within the file. + * + * Context: The caller should have the page locked in order to prevent + * (eg) shmem from moving the page between the page cache and swap cache + * and changing its index in the middle of the operation. + * Return: true or false. + */ +static inline bool folio_contains(struct folio *folio, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (folio_test_hugetlb(folio)) + return folio->index == index; + return index - folio_index(folio) < folio_nr_pages(folio); +} + /* * Given the page we found in the page cache, return the page corresponding * to this index in the file @@ -560,6 +597,27 @@ static inline loff_t page_file_offset(struct page *page) return ((loff_t)page_index(page)) << PAGE_SHIFT; } +/** + * folio_pos - Returns the byte position of this folio in its file. + * @folio: The folio. + */ +static inline loff_t folio_pos(struct folio *folio) +{ + return page_offset(&folio->page); +} + +/** + * folio_file_pos - Returns the byte position of this folio in its file. + * @folio: The folio. + * + * This differs from folio_pos() for folios which belong to a swap file. + * NFS is the only filesystem today which needs to use folio_file_pos(). + */ +static inline loff_t folio_file_pos(struct folio *folio) +{ + return page_file_offset(&folio->page); +} + extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, unsigned long address); @@ -575,13 +633,13 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, } struct wait_page_key { - struct page *page; + struct folio *folio; int bit_nr; int page_match; }; struct wait_page_queue { - struct page *page; + struct folio *folio; int bit_nr; wait_queue_entry_t wait; }; @@ -589,7 +647,7 @@ struct wait_page_queue { static inline bool wake_page_match(struct wait_page_queue *wait_page, struct wait_page_key *key) { - if (wait_page->page != key->page) + if (wait_page->folio != key->folio) return false; key->page_match = 1; @@ -599,20 +657,31 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, return true; } -extern void __lock_page(struct page *page); -extern int __lock_page_killable(struct page *page); -extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); -extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, +void __folio_lock(struct folio *folio); +int __folio_lock_killable(struct folio *folio); +bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, unsigned int flags); -extern void unlock_page(struct page *page); +void unlock_page(struct page *page); +void folio_unlock(struct folio *folio); + +static inline bool folio_trylock(struct folio *folio) +{ + return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0))); +} /* * Return true if the page was successfully locked */ static inline int trylock_page(struct page *page) { - page = compound_head(page); - return (likely(!test_and_set_bit_lock(PG_locked, &page->flags))); + return folio_trylock(page_folio(page)); +} + +static inline void folio_lock(struct folio *folio) +{ + might_sleep(); + if (!folio_trylock(folio)) + __folio_lock(folio); } /* @@ -620,38 +689,30 @@ static inline int trylock_page(struct page *page) */ static inline void lock_page(struct page *page) { + struct folio *folio; might_sleep(); - if (!trylock_page(page)) - __lock_page(page); + + folio = page_folio(page); + if (!folio_trylock(folio)) + __folio_lock(folio); } -/* - * lock_page_killable is like lock_page but can be interrupted by fatal - * signals. It returns 0 if it locked the page and -EINTR if it was - * killed while waiting. - */ -static inline int lock_page_killable(struct page *page) +static inline int folio_lock_killable(struct folio *folio) { might_sleep(); - if (!trylock_page(page)) - return __lock_page_killable(page); + if (!folio_trylock(folio)) + return __folio_lock_killable(folio); return 0; } /* - * lock_page_async - Lock the page, unless this would block. If the page - * is already locked, then queue a callback when the page becomes unlocked. - * This callback can then retry the operation. - * - * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page - * was already locked and the callback defined in 'wait' was queued. + * lock_page_killable is like lock_page but can be interrupted by fatal + * signals. It returns 0 if it locked the page and -EINTR if it was + * killed while waiting. */ -static inline int lock_page_async(struct page *page, - struct wait_page_queue *wait) +static inline int lock_page_killable(struct page *page) { - if (!trylock_page(page)) - return __lock_page_async(page, wait); - return 0; + return folio_lock_killable(page_folio(page)); } /* @@ -659,78 +720,108 @@ static inline int lock_page_async(struct page *page, * caller indicated that it can handle a retry. * * Return value and mmap_lock implications depend on flags; see - * __lock_page_or_retry(). + * __folio_lock_or_retry(). */ -static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, +static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { + struct folio *folio; might_sleep(); - return trylock_page(page) || __lock_page_or_retry(page, mm, flags); + + folio = page_folio(page); + return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags); } /* - * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc., + * This is exported only for folio_wait_locked/folio_wait_writeback, etc., * and should not be used directly. */ -extern void wait_on_page_bit(struct page *page, int bit_nr); -extern int wait_on_page_bit_killable(struct page *page, int bit_nr); +void folio_wait_bit(struct folio *folio, int bit_nr); +int folio_wait_bit_killable(struct folio *folio, int bit_nr); /* - * Wait for a page to be unlocked. + * Wait for a folio to be unlocked. * - * This must be called with the caller "holding" the page, - * ie with increased "page->count" so that the page won't + * This must be called with the caller "holding" the folio, + * ie with increased "page->count" so that the folio won't * go away during the wait.. */ +static inline void folio_wait_locked(struct folio *folio) +{ + if (folio_test_locked(folio)) + folio_wait_bit(folio, PG_locked); +} + +static inline int folio_wait_locked_killable(struct folio *folio) +{ + if (!folio_test_locked(folio)) + return 0; + return folio_wait_bit_killable(folio, PG_locked); +} + static inline void wait_on_page_locked(struct page *page) { - if (PageLocked(page)) - wait_on_page_bit(compound_head(page), PG_locked); + folio_wait_locked(page_folio(page)); } static inline int wait_on_page_locked_killable(struct page *page) { - if (!PageLocked(page)) - return 0; - return wait_on_page_bit_killable(compound_head(page), PG_locked); + return folio_wait_locked_killable(page_folio(page)); } int put_and_wait_on_page_locked(struct page *page, int state); void wait_on_page_writeback(struct page *page); -int wait_on_page_writeback_killable(struct page *page); -extern void end_page_writeback(struct page *page); +void folio_wait_writeback(struct folio *folio); +int folio_wait_writeback_killable(struct folio *folio); +void end_page_writeback(struct page *page); +void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); +void folio_wait_stable(struct folio *folio); +void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); +static inline void __set_page_dirty(struct page *page, + struct address_space *mapping, int warn) +{ + __folio_mark_dirty(page_folio(page), mapping, warn); +} +void folio_account_cleaned(struct folio *folio, struct address_space *mapping, + struct bdi_writeback *wb); +static inline void account_page_cleaned(struct page *page, + struct address_space *mapping, struct bdi_writeback *wb) +{ + return folio_account_cleaned(page_folio(page), mapping, wb); +} +void __folio_cancel_dirty(struct folio *folio); +static inline void folio_cancel_dirty(struct folio *folio) +{ + /* Avoid atomic ops, locking, etc. when not actually needed. */ + if (folio_test_dirty(folio)) + __folio_cancel_dirty(folio); +} +static inline void cancel_dirty_page(struct page *page) +{ + folio_cancel_dirty(page_folio(page)); +} +bool folio_clear_dirty_for_io(struct folio *folio); +bool clear_page_dirty_for_io(struct page *page); +int __must_check folio_write_one(struct folio *folio); +static inline int __must_check write_one_page(struct page *page) +{ + return folio_write_one(page_folio(page)); +} -void __set_page_dirty(struct page *, struct address_space *, int warn); int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); void page_endio(struct page *page, bool is_write, int err); -/** - * set_page_private_2 - Set PG_private_2 on a page and take a ref - * @page: The page. - * - * Set the PG_private_2 flag on a page and take the reference needed for the VM - * to handle its lifetime correctly. This sets the flag and takes the - * reference unconditionally, so care must be taken not to set the flag again - * if it's already set. - */ -static inline void set_page_private_2(struct page *page) -{ - page = compound_head(page); - get_page(page); - SetPagePrivate2(page); -} - -void end_page_private_2(struct page *page); -void wait_on_page_private_2(struct page *page); -int wait_on_page_private_2_killable(struct page *page); +void folio_end_private_2(struct folio *folio); +void folio_wait_private_2(struct folio *folio); +int folio_wait_private_2_killable(struct folio *folio); /* * Add an arbitrary waiter to a page's wait queue */ -extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); +void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); /* * Fault everything in given userspace address range in. @@ -790,9 +881,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, size_t size) } int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); + pgoff_t index, gfp_t gfp); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); + pgoff_t index, gfp_t gfp); +int filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp); extern void delete_from_page_cache(struct page *page); extern void __delete_from_page_cache(struct page *page, void *shadow); void replace_page_cache_page(struct page *old, struct page *new); @@ -817,6 +910,10 @@ static inline int add_to_page_cache(struct page *page, return error; } +/* Must be non-static for BPF error injection */ +int __filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp, void **shadowp); + /** * struct readahead_control - Describes a readahead request. * @@ -906,33 +1003,57 @@ void page_cache_async_readahead(struct address_space *mapping, page_cache_async_ra(&ractl, page, req_count); } +static inline struct folio *__readahead_folio(struct readahead_control *ractl) +{ + struct folio *folio; + + BUG_ON(ractl->_batch_count > ractl->_nr_pages); + ractl->_nr_pages -= ractl->_batch_count; + ractl->_index += ractl->_batch_count; + + if (!ractl->_nr_pages) { + ractl->_batch_count = 0; + return NULL; + } + + folio = xa_load(&ractl->mapping->i_pages, ractl->_index); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + ractl->_batch_count = folio_nr_pages(folio); + + return folio; +} + /** * readahead_page - Get the next page to read. - * @rac: The current readahead request. + * @ractl: The current readahead request. * * Context: The page is locked and has an elevated refcount. The caller * should decreases the refcount once the page has been submitted for I/O * and unlock the page once all I/O to that page has completed. * Return: A pointer to the next page, or %NULL if we are done. */ -static inline struct page *readahead_page(struct readahead_control *rac) +static inline struct page *readahead_page(struct readahead_control *ractl) { - struct page *page; - - BUG_ON(rac->_batch_count > rac->_nr_pages); - rac->_nr_pages -= rac->_batch_count; - rac->_index += rac->_batch_count; + struct folio *folio = __readahead_folio(ractl); - if (!rac->_nr_pages) { - rac->_batch_count = 0; - return NULL; - } + return &folio->page; +} - page = xa_load(&rac->mapping->i_pages, rac->_index); - VM_BUG_ON_PAGE(!PageLocked(page), page); - rac->_batch_count = thp_nr_pages(page); +/** + * readahead_folio - Get the next folio to read. + * @ractl: The current readahead request. + * + * Context: The folio is locked. The caller should unlock the folio once + * all I/O to that folio has completed. + * Return: A pointer to the next folio, or %NULL if we are done. + */ +static inline struct folio *readahead_folio(struct readahead_control *ractl) +{ + struct folio *folio = __readahead_folio(ractl); - return page; + if (folio) + folio_put(folio); + return folio; } static inline unsigned int __readahead_batch(struct readahead_control *rac, @@ -1040,6 +1161,34 @@ static inline unsigned long dir_pages(struct inode *inode) } /** + * folio_mkwrite_check_truncate - check if folio was truncated + * @folio: the folio to check + * @inode: the inode to check the folio against + * + * Return: the number of bytes in the folio up to EOF, + * or -EFAULT if the folio was truncated. + */ +static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, + struct inode *inode) +{ + loff_t size = i_size_read(inode); + pgoff_t index = size >> PAGE_SHIFT; + size_t offset = offset_in_folio(folio, size); + + if (!folio->mapping) + return -EFAULT; + + /* folio is wholly inside EOF */ + if (folio_next_index(folio) - 1 < index) + return folio_size(folio); + /* folio is wholly past EOF */ + if (folio->index > index || !offset) + return -EFAULT; + /* folio is partially inside EOF */ + return offset; +} + +/** * page_mkwrite_check_truncate - check if page was truncated * @page: the page to check * @inode: the inode to check the page against @@ -1068,19 +1217,25 @@ static inline int page_mkwrite_check_truncate(struct page *page, } /** - * i_blocks_per_page - How many blocks fit in this page. + * i_blocks_per_folio - How many blocks fit in this folio. * @inode: The inode which contains the blocks. - * @page: The page (head page if the page is a THP). + * @folio: The folio. * - * If the block size is larger than the size of this page, return zero. + * If the block size is larger than the size of this folio, return zero. * - * Context: The caller should hold a refcount on the page to prevent it + * Context: The caller should hold a refcount on the folio to prevent it * from being split. - * Return: The number of filesystem blocks covered by this page. + * Return: The number of filesystem blocks covered by this folio. */ static inline +unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) +{ + return folio_size(folio) >> inode->i_blkbits; +} + +static inline unsigned int i_blocks_per_page(struct inode *inode, struct page *page) { - return thp_size(page) >> inode->i_blkbits; + return i_blocks_per_folio(inode, page_folio(page)); } #endif /* _LINUX_PAGEMAP_H */ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index d2558121d48c..6f7949b2fd8d 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -3,6 +3,7 @@ #define _LINUX_PART_STAT_H #include <linux/genhd.h> +#include <asm/local.h> struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index ae16a9856305..b31d3f3312ce 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -267,6 +267,28 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) } /** + * percpu_ref_tryget_live_rcu - same as percpu_ref_tryget_live() but the + * caller is responsible for taking RCU. + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline bool percpu_ref_tryget_live_rcu(struct percpu_ref *ref) +{ + unsigned long __percpu *percpu_count; + bool ret = false; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (likely(__ref_is_percpu(ref, &percpu_count))) { + this_cpu_inc(*percpu_count); + ret = true; + } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { + ret = atomic_long_inc_not_zero(&ref->data->count); + } + return ret; +} + +/** * percpu_ref_tryget_live - try to increment a live percpu refcount * @ref: percpu_ref to try-get * @@ -283,20 +305,11 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) */ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { - unsigned long __percpu *percpu_count; bool ret = false; rcu_read_lock(); - - if (__ref_is_percpu(ref, &percpu_count)) { - this_cpu_inc(*percpu_count); - ret = true; - } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { - ret = atomic_long_inc_not_zero(&ref->data->count); - } - + ret = percpu_ref_tryget_live_rcu(ref); rcu_read_unlock(); - return ret; } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 505480217cf1..2512e2f9cd4e 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif +#ifdef CONFIG_KVM +void kvm_host_pmu_init(struct arm_pmu *pmu); +#else +#define kvm_host_pmu_init(x) do { } while(0) +#endif + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fe156a8170aa..22212f2a8219 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -683,7 +683,9 @@ struct perf_event { /* * timestamp shadows the actual context timing but it can * be safely used in NMI interrupt context. It reflects the - * context time as it was when the event was last scheduled in. + * context time as it was when the event was last scheduled in, + * or when ctx_sched_in failed to schedule the event because we + * run out of PMC. * * ctx_time already accounts for ctx->timestamp. Therefore to * compute ctx_time for a sample, simply add perf_clock(). @@ -1398,6 +1400,7 @@ perf_event_addr_filters(struct perf_event *event) } extern void perf_event_addr_filters_sync(struct perf_event *event); +extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id); extern int perf_output_begin(struct perf_output_handle *handle, struct perf_sample_data *data, diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 6beb26b7151d..86be8bf27b41 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -4,6 +4,8 @@ #include <linux/mm.h> +#define ARCH_DEFAULT_PKEY 0 + #ifdef CONFIG_ARCH_HAS_PKEYS #include <asm/pkeys.h> #else /* ! CONFIG_ARCH_HAS_PKEYS */ diff --git a/include/linux/platform_data/usb-omap1.h b/include/linux/platform_data/usb-omap1.h index 43b5ce139c37..878e572a78bf 100644 --- a/include/linux/platform_data/usb-omap1.h +++ b/include/linux/platform_data/usb-omap1.h @@ -48,6 +48,8 @@ struct omap_usb_config { u32 (*usb2_init)(unsigned nwires, unsigned alt_pingroup); int (*ocpi_enable)(void); + + void (*lb_reset)(void); }; #endif /* __LINUX_USB_OMAP1_H */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 4d244e295e85..b4381f255a5c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -77,6 +77,27 @@ /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include <asm/preempt.h> +/** + * interrupt_context_level - return interrupt context level + * + * Returns the current interrupt context level. + * 0 - normal context + * 1 - softirq context + * 2 - hardirq context + * 3 - NMI context + */ +static __always_inline unsigned char interrupt_context_level(void) +{ + unsigned long pc = preempt_count(); + unsigned char level = 0; + + level += !!(pc & (NMI_MASK)); + level += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + + return level; +} + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT @@ -122,9 +143,10 @@ * The preempt_count offset after spin_lock() */ #if !defined(CONFIG_PREEMPT_RT) -#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET #else -#define PREEMPT_LOCK_OFFSET 0 +/* Locks on RT do not disable preemption */ +#define PREEMPT_LOCK_OFFSET 0 #endif /* diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index c0475d1c9885..81cad9e1e412 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -61,7 +61,6 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -#if IS_ENABLED(CONFIG_QCOM_SCM) extern bool qcom_scm_is_available(void); extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); @@ -115,74 +114,4 @@ extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, extern int qcom_scm_lmh_profile_change(u32 profile_id); extern bool qcom_scm_lmh_dcvsh_available(void); -#else - -#include <linux/errno.h> - -static inline bool qcom_scm_is_available(void) { return false; } - -static inline int qcom_scm_set_cold_boot_addr(void *entry, - const cpumask_t *cpus) { return -ENODEV; } -static inline int qcom_scm_set_warm_boot_addr(void *entry, - const cpumask_t *cpus) { return -ENODEV; } -static inline void qcom_scm_cpu_power_down(u32 flags) {} -static inline u32 qcom_scm_set_remote_state(u32 state,u32 id) - { return -ENODEV; } - -static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size) { return -ENODEV; } -static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size) { return -ENODEV; } -static inline int qcom_scm_pas_auth_and_reset(u32 peripheral) - { return -ENODEV; } -static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; } -static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; } - -static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) - { return -ENODEV; } -static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) - { return -ENODEV; } - -static inline bool qcom_scm_restore_sec_cfg_available(void) { return false; } -static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) - { return -ENODEV; } -static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) - { return -ENODEV; } -static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) - { return -ENODEV; } -extern inline int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size) - { return -ENODEV; } -static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - unsigned int *src, const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt) { return -ENODEV; } - -static inline bool qcom_scm_ocmem_lock_available(void) { return false; } -static inline int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode) { return -ENODEV; } -static inline int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, - u32 offset, u32 size) { return -ENODEV; } - -static inline bool qcom_scm_ice_available(void) { return false; } -static inline int qcom_scm_ice_invalidate_key(u32 index) { return -ENODEV; } -static inline int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size) { return -ENODEV; } - -static inline bool qcom_scm_hdcp_available(void) { return false; } -static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp) { return -ENODEV; } - -static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) - { return -ENODEV; } - -static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version) - { return -ENODEV; } - -static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; } - -static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; } -#endif #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 434d12fe2d4f..5e0beb5c5659 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -71,7 +71,8 @@ static inline void __rcu_read_lock(void) static inline void __rcu_read_unlock(void) { preempt_enable(); - rcu_read_unlock_strict(); + if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) + rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 86c8f6c98412..6f9c35817398 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -31,7 +31,7 @@ static inline int rcu_read_lock_trace_held(void) #ifdef CONFIG_TASKS_TRACE_RCU -void rcu_read_unlock_trace_special(struct task_struct *t, int nesting); +void rcu_read_unlock_trace_special(struct task_struct *t); /** * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section @@ -80,7 +80,8 @@ static inline void rcu_read_unlock_trace(void) WRITE_ONCE(t->trc_reader_nesting, nesting); return; // We assume shallow reader nesting. } - rcu_read_unlock_trace_special(t, nesting); + WARN_ON_ONCE(nesting != 0); + rcu_read_unlock_trace_special(t); } void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); diff --git a/include/linux/regulator/lp872x.h b/include/linux/regulator/lp872x.h index d780dbb8b423..b62e45aa1dd3 100644 --- a/include/linux/regulator/lp872x.h +++ b/include/linux/regulator/lp872x.h @@ -10,7 +10,7 @@ #include <linux/regulator/machine.h> #include <linux/platform_device.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #define LP872X_MAX_REGULATORS 9 @@ -40,11 +40,6 @@ enum lp872x_regulator_id { LP872X_ID_MAX, }; -enum lp872x_dvs_state { - DVS_LOW = GPIOF_OUT_INIT_LOW, - DVS_HIGH = GPIOF_OUT_INIT_HIGH, -}; - enum lp872x_dvs_sel { SEL_V1, SEL_V2, @@ -52,14 +47,14 @@ enum lp872x_dvs_sel { /** * lp872x_dvs - * @gpio : gpio pin number for dvs control + * @gpio : gpio descriptor for dvs control * @vsel : dvs selector for buck v1 or buck v2 register * @init_state : initial dvs pin state */ struct lp872x_dvs { - int gpio; + struct gpio_desc *gpio; enum lp872x_dvs_sel vsel; - enum lp872x_dvs_state init_state; + enum gpiod_flags init_state; }; /** @@ -78,14 +73,14 @@ struct lp872x_regulator_data { * @update_config : if LP872X_GENERAL_CFG register is updated, set true * @regulator_data : platform regulator id and init data * @dvs : dvs data for buck voltage control - * @enable_gpio : gpio pin number for enable control + * @enable_gpio : gpio descriptor for enable control */ struct lp872x_platform_data { u8 general_config; bool update_config; struct lp872x_regulator_data regulator_data[LP872X_MAX_REGULATORS]; struct lp872x_dvs *dvs; - int enable_gpio; + struct gpio_desc *enable_gpio; }; #endif diff --git a/include/linux/regulator/tps62360.h b/include/linux/regulator/tps62360.h index 94a90c06f1e5..398e74a1d941 100644 --- a/include/linux/regulator/tps62360.h +++ b/include/linux/regulator/tps62360.h @@ -19,10 +19,6 @@ * @en_discharge: Enable discharge the output capacitor via internal * register. * @en_internal_pulldn: internal pull down enable or not. - * @vsel0_gpio: Gpio number for vsel0. It should be -1 if this is tied with - * fixed logic. - * @vsel1_gpio: Gpio number for vsel1. It should be -1 if this is tied with - * fixed logic. * @vsel0_def_state: Default state of vsel0. 1 if it is high else 0. * @vsel1_def_state: Default state of vsel1. 1 if it is high else 0. */ @@ -30,8 +26,6 @@ struct tps62360_regulator_platform_data { struct regulator_init_data *reg_init_data; bool en_discharge; bool en_internal_pulldn; - int vsel0_gpio; - int vsel1_gpio; int vsel0_def_state; int vsel1_def_state; }; diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c976cc6de257..e704b1a4c06c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -235,7 +235,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); * * returns the number of cleaned PTEs. */ -int page_mkclean(struct page *); +int folio_mkclean(struct folio *); /* * called in munlock()/munmap() path to check for other vmas holding @@ -295,12 +295,14 @@ static inline void try_to_unmap(struct page *page, enum ttu_flags flags) { } -static inline int page_mkclean(struct page *page) +static inline int folio_mkclean(struct folio *folio) { return 0; } - - #endif /* CONFIG_MMU */ +static inline int page_mkclean(struct page *page) +{ + return folio_mkclean(page_folio(page)); +} #endif /* _LINUX_RMAP_H */ diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 7ce9a51ae5c0..2c0ad417ce3c 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -30,31 +30,16 @@ do { \ #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock); -#define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock) extern int do_raw_read_trylock(rwlock_t *lock); extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock); extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock); -#define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock) extern int do_raw_write_trylock(rwlock_t *lock); extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else - -#ifndef arch_read_lock_flags -# define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#endif - -#ifndef arch_write_lock_flags -# define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#endif - # define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) -# define do_raw_read_lock_flags(lock, flags) \ - do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) # define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock) # define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) # define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0) -# define do_raw_write_lock_flags(lock, flags) \ - do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) # define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock) # define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) #endif diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index abfb53ab11be..f1db6f17c4fb 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -157,8 +157,7 @@ static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, do_raw_read_trylock, do_raw_read_lock, - do_raw_read_lock_flags, &flags); + LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock); return flags; } @@ -184,8 +183,7 @@ static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, do_raw_write_trylock, do_raw_write_lock, - do_raw_write_lock_flags, &flags); + LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); return flags; } diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 2713e689ad66..4a6ff274335a 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -427,6 +427,19 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); int __sbitmap_queue_get(struct sbitmap_queue *sbq); /** + * __sbitmap_queue_get_batch() - Try to allocate a batch of free bits + * @sbq: Bitmap queue to allocate from. + * @nr_tags: number of tags requested + * @offset: offset to add to returned bits + * + * Return: Mask of allocated tags, 0 if none are found. Each tag allocated is + * a bit in the mask returned, and the caller must add @offset to the value to + * get the absolute tag value. + */ +unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, + unsigned int *offset); + +/** * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. @@ -515,6 +528,17 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu); +/** + * sbitmap_queue_clear_batch() - Free a batch of allocated bits + * &struct sbitmap_queue. + * @sbq: Bitmap to free from. + * @offset: offset for each tag in array + * @tags: array of tags + * @nr_tags: number of tags in array + */ +void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, + int *tags, int nr_tags); + static inline int sbq_index_inc(int index) { return (index + 1) & (SBQ_WAIT_QUEUES - 1); diff --git a/include/linux/sched.h b/include/linux/sched.h index e12b524426b0..6f6f8f340a0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -503,6 +503,8 @@ struct sched_statistics { u64 block_start; u64 block_max; + s64 sum_block_runtime; + u64 exec_max; u64 slice_max; @@ -522,7 +524,7 @@ struct sched_statistics { u64 nr_wakeups_passive; u64 nr_wakeups_idle; #endif -}; +} ____cacheline_aligned; struct sched_entity { /* For load-balancing: */ @@ -538,8 +540,6 @@ struct sched_entity { u64 nr_migrations; - struct sched_statistics statistics; - #ifdef CONFIG_FAIR_GROUP_SCHED int depth; struct sched_entity *parent; @@ -750,10 +750,6 @@ struct task_struct { #ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; -#ifdef CONFIG_THREAD_INFO_IN_TASK - /* Current CPU: */ - unsigned int cpu; -#endif unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; @@ -775,10 +771,10 @@ struct task_struct { int normal_prio; unsigned int rt_priority; - const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; + const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE struct rb_node core_node; @@ -803,6 +799,8 @@ struct task_struct { struct uclamp_se uclamp[UCLAMP_CNT]; #endif + struct sched_statistics stats; + #ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ struct hlist_head preempt_notifiers; @@ -1160,10 +1158,8 @@ struct task_struct { /* Stacked block device info: */ struct bio_list *bio_list; -#ifdef CONFIG_BLOCK /* Stack plugging: */ struct blk_plug *plug; -#endif /* VM state: */ struct reclaim_state *reclaim_state; @@ -1471,6 +1467,7 @@ struct task_struct { mce_whole_page : 1, __mce_reserved : 62; struct callback_head mce_kill_me; + int mce_count; #endif #ifdef CONFIG_KRETPROBES @@ -1719,7 +1716,7 @@ extern struct pid *cad_pid; #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -static inline bool is_percpu_thread(void) +static __always_inline bool is_percpu_thread(void) { #ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && @@ -1885,10 +1882,7 @@ extern struct thread_info init_thread_info; extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_THREAD_INFO_IN_TASK -static inline struct thread_info *task_thread_info(struct task_struct *task) -{ - return &task->thread_info; -} +# define task_thread_info(task) (&(task)->thread_info) #elif !defined(__HAVE_THREAD_FUNCTIONS) # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif @@ -2038,7 +2032,7 @@ static inline int _cond_resched(void) { return 0; } #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */ #define cond_resched() ({ \ - ___might_sleep(__FILE__, __LINE__, 0); \ + __might_resched(__FILE__, __LINE__, 0); \ _cond_resched(); \ }) @@ -2046,19 +2040,38 @@ extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_rwlock_read(rwlock_t *lock); extern int __cond_resched_rwlock_write(rwlock_t *lock); -#define cond_resched_lock(lock) ({ \ - ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ - __cond_resched_lock(lock); \ +#define MIGHT_RESCHED_RCU_SHIFT 8 +#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) + +#ifndef CONFIG_PREEMPT_RT +/* + * Non RT kernels have an elevated preempt count due to the held lock, + * but are not allowed to be inside a RCU read side critical section + */ +# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET +#else +/* + * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in + * cond_resched*lock() has to take that into account because it checks for + * preempt_count() and rcu_preempt_depth(). + */ +# define PREEMPT_LOCK_RESCHED_OFFSETS \ + (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) +#endif + +#define cond_resched_lock(lock) ({ \ + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ + __cond_resched_lock(lock); \ }) -#define cond_resched_rwlock_read(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ - __cond_resched_rwlock_read(lock); \ +#define cond_resched_rwlock_read(lock) ({ \ + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ + __cond_resched_rwlock_read(lock); \ }) -#define cond_resched_rwlock_write(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ - __cond_resched_rwlock_write(lock); \ +#define cond_resched_rwlock_write(lock) ({ \ + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ + __cond_resched_rwlock_write(lock); \ }) static inline void cond_resched_rcu(void) @@ -2113,11 +2126,7 @@ static __always_inline bool need_resched(void) static inline unsigned int task_cpu(const struct task_struct *p) { -#ifdef CONFIG_THREAD_INFO_IN_TASK - return READ_ONCE(p->cpu); -#else return READ_ONCE(task_thread_info(p)->cpu); -#endif } extern void set_task_cpu(struct task_struct *p, unsigned int cpu); @@ -2136,6 +2145,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ extern bool sched_task_on_rq(struct task_struct *p); +extern unsigned long get_wchan(struct task_struct *p); /* * In order to reduce various lock holder preemption latencies provide an diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index 22873d276be6..d73d314d59c6 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -11,7 +11,11 @@ enum cpu_idle_type { CPU_MAX_IDLE_TYPES }; +#ifdef CONFIG_SMP extern void wake_up_if_idle(int cpu); +#else +static inline void wake_up_if_idle(int cpu) { } +#endif /* * Idle thread specific functions to determine the need_resched diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 5561486fddef..aca874d33fe6 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,6 +49,35 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +#ifdef CONFIG_PREEMPT_RT +/* + * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is + * by far the least expensive way to do that. + */ +static inline void __mmdrop_delayed(struct rcu_head *rhp) +{ + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); + + __mmdrop(mm); +} + +/* + * Invoked from finish_task_switch(). Delegates the heavy lifting on RT + * kernels via RCU. + */ +static inline void mmdrop_sched(struct mm_struct *mm) +{ + /* Provides a full memory barrier. See mmdrop() */ + if (atomic_dec_and_test(&mm->mm_count)) + call_rcu(&mm->delayed_drop, __mmdrop_delayed); +} +#else +static inline void mmdrop_sched(struct mm_struct *mm) +{ + mmdrop(mm); +} +#endif + /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ef02be869cf2..ba88a6987400 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -54,7 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_post_fork(struct task_struct *p); +extern void sched_post_fork(struct task_struct *p, + struct kernel_clone_args *kargs); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 2413427e439c..d10150587d81 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task) static inline unsigned long *end_of_stack(const struct task_struct *task) { +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; +#else return task->stack; +#endif } #elif !defined(__HAVE_THREAD_FUNCTIONS) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 8f0f778b7c91..c07bfa2d80f2 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -42,6 +42,13 @@ static inline int cpu_smt_flags(void) } #endif +#ifdef CONFIG_SCHED_CLUSTER +static inline int cpu_cluster_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} +#endif + #ifdef CONFIG_SCHED_MC static inline int cpu_core_flags(void) { @@ -98,7 +105,7 @@ struct sched_domain { /* idle_balance() stats */ u64 max_newidle_lb_cost; - unsigned long next_decay_max_lb_cost; + unsigned long last_decay_max_lb_cost; u64 avg_scan_cost; /* select_idle_sibling */ diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 21c3771e6a56..988528b5da43 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page) mapping = (struct address_space *) ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); - if (mapping != page->mapping) + if (!mapping || mapping != page->mapping) return false; return mapping->a_ops == &secretmem_aops; diff --git a/include/linux/security.h b/include/linux/security.h index 5b7288521300..7e0ba63b5dde 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -258,13 +258,13 @@ extern int security_init(void); extern int early_security_init(void); /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr); -int security_binder_transaction(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file); +int security_binder_set_context_mgr(const struct cred *mgr); +int security_binder_transaction(const struct cred *from, + const struct cred *to); +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to); +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -317,8 +317,9 @@ int security_add_mnt_opt(const char *option, const char *val, int len, void **mnt_opts); int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, - const struct qstr *name, void **ctx, - u32 *ctxlen); + const struct qstr *name, + const char **xattr_name, void **ctx, + u32 *ctxlen); int security_dentry_create_files_as(struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, @@ -508,25 +509,25 @@ static inline int early_security_init(void) return 0; } -static inline int security_binder_set_context_mgr(struct task_struct *mgr) +static inline int security_binder_set_context_mgr(const struct cred *mgr) { return 0; } -static inline int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transaction(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static inline int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return 0; @@ -739,6 +740,7 @@ static inline void security_inode_free(struct inode *inode) static inline int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, + const char **xattr_name, void **ctx, u32 *ctxlen) { @@ -1041,6 +1043,11 @@ static inline void security_transfer_creds(struct cred *new, { } +static inline void security_cred_getsecid(const struct cred *c, u32 *secid) +{ + *secid = 0; +} + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; @@ -2038,4 +2045,20 @@ static inline int security_perf_event_write(struct perf_event *event) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_PERF_EVENTS */ +#ifdef CONFIG_IO_URING +#ifdef CONFIG_SECURITY +extern int security_uring_override_creds(const struct cred *new); +extern int security_uring_sqpoll(void); +#else +static inline int security_uring_override_creds(const struct cred *new) +{ + return 0; +} +static inline int security_uring_sqpoll(void) +{ + return 0; +} +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_IO_URING */ + #endif /* ! __LINUX_SECURITY_H */ diff --git a/include/linux/signal.h b/include/linux/signal.h index 3f96a6374e4f..7d34105e20c6 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -464,6 +464,12 @@ int __save_altstack(stack_t __user *, unsigned long); unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ } while (0); +#ifdef CONFIG_DYNAMIC_SIGFRAME +bool sigaltstack_size_valid(size_t ss_size); +#else +static inline bool sigaltstack_size_valid(size_t size) { return true; } +#endif /* !CONFIG_DYNAMIC_SIGFRAME */ + #ifdef CONFIG_PROC_FS struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6bdb0db3e825..841e2f0f5240 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1940,7 +1940,7 @@ static inline void __skb_insert(struct sk_buff *newsk, WRITE_ONCE(newsk->prev, prev); WRITE_ONCE(next->prev, newsk); WRITE_ONCE(prev->next, newsk); - list->qlen++; + WRITE_ONCE(list->qlen, list->qlen + 1); } static inline void __skb_queue_splice(const struct sk_buff_head *list, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14ab0c0bc924..1ce9a9eb223b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); +bool sk_msg_is_readable(struct sock *sk); static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 8371bca13729..eb7ac8a1e03c 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -78,10 +78,6 @@ struct spi_statistics { unsigned long transfers_split_maxsize; }; -void spi_statistics_add_transfer_stats(struct spi_statistics *stats, - struct spi_transfer *xfer, - struct spi_controller *ctlr); - #define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count) \ do { \ unsigned long flags; \ @@ -531,6 +527,9 @@ struct spi_controller { /* I/O mutex */ struct mutex io_mutex; + /* Used to avoid adding the same CS twice */ + struct mutex add_lock; + /* lock and mutex for SPI bus locking */ spinlock_t bus_lock_spinlock; struct mutex bus_lock_mutex; @@ -760,8 +759,6 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); -extern struct spi_controller *spi_busnum_to_master(u16 busnum); - /* * SPI resource management while processing a SPI message */ @@ -785,15 +782,6 @@ struct spi_res { unsigned long long data[]; /* guarantee ull alignment */ }; -extern void *spi_res_alloc(struct spi_device *spi, - spi_res_release_t release, - size_t size, gfp_t gfp); -extern void spi_res_add(struct spi_message *message, void *res); -extern void spi_res_free(void *res); - -extern void spi_res_release(struct spi_controller *ctlr, - struct spi_message *message); - /*---------------------------------------------------------------------------*/ /* @@ -1111,8 +1099,6 @@ static inline void spi_message_free(struct spi_message *m) extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); -extern int spi_async_locked(struct spi_device *spi, - struct spi_message *message); extern int spi_slave_abort(struct spi_device *spi); static inline size_t @@ -1195,15 +1181,6 @@ struct spi_replaced_transfers { struct spi_transfer inserted_transfers[]; }; -extern struct spi_replaced_transfers *spi_replace_transfers( - struct spi_message *msg, - struct spi_transfer *xfer_first, - size_t remove, - size_t insert, - spi_replaced_release_t release, - size_t extradatasize, - gfp_t gfp); - /*---------------------------------------------------------------------------*/ /* SPI transfer transformation methods */ @@ -1475,20 +1452,8 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). - * - * You can also use spi_alloc_device() and spi_add_device() to use a two - * stage registration sequence for each spi_device. This gives the caller - * some more control over the spi_device structure before it is registered, - * but requires that caller to initialize fields that would otherwise - * be defined using the board info. */ extern struct spi_device * -spi_alloc_device(struct spi_controller *ctlr); - -extern int -spi_add_device(struct spi_device *spi); - -extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); extern void spi_unregister_device(struct spi_device *spi); @@ -1502,23 +1467,6 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } -/* OF support code */ -#if IS_ENABLED(CONFIG_OF) - -/* must call put_device() when done with returned spi_device device */ -extern struct spi_device * -of_find_spi_device_by_node(struct device_node *node); - -#else - -static inline struct spi_device * -of_find_spi_device_by_node(struct device_node *node) -{ - return NULL; -} - -#endif /* IS_ENABLED(CONFIG_OF) */ - /* Compatibility layer */ #define spi_master spi_controller diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 45310ea1b1d7..f0447062eecd 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -177,7 +177,6 @@ do { \ #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); -#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock) extern int do_raw_spin_trylock(raw_spinlock_t *lock); extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock); #else @@ -188,18 +187,6 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) mmiowb_spin_lock(); } -#ifndef arch_spin_lock_flags -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#endif - -static inline void -do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock) -{ - __acquire(lock); - arch_spin_lock_flags(&lock->raw_lock, *flags); - mmiowb_spin_lock(); -} - static inline int do_raw_spin_trylock(raw_spinlock_t *lock) { int ret = arch_spin_trylock(&(lock)->raw_lock); diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 6b8e1a0b137b..51fa0dab68c4 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -108,16 +108,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) local_irq_save(flags); preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - /* - * On lockdep we dont want the hand-coded irq-enable of - * do_raw_spin_lock_flags() code, because lockdep assumes - * that interrupts are not re-enabled during lock-acquire: - */ -#ifdef CONFIG_LOCKDEP LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -#else - do_raw_spin_lock_flags(lock, &flags); -#endif return flags; } diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 0ac9112c1bbe..16521074b6f7 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -62,7 +62,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define arch_spin_is_locked(lock) ((void)(lock), 0) /* for sched/core.c and kernel_lock.c: */ # define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) -# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) # define arch_spin_unlock(lock) do { barrier(); (void)(lock); } while (0) # define arch_spin_trylock(lock) ({ barrier(); (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 998a4ba28eba..ca507bd5f808 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -20,7 +20,7 @@ enum { #endif /** - * sizeof_field(TYPE, MEMBER) + * sizeof_field() - Report the size of a struct field in bytes * * @TYPE: The structure containing the field of interest * @MEMBER: The field to return the size of @@ -28,7 +28,7 @@ enum { #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) /** - * offsetofend(TYPE, MEMBER) + * offsetofend() - Report the offset of a struct field within the struct * * @TYPE: The type of the structure * @MEMBER: The member within the structure to get the end offset of @@ -36,4 +36,65 @@ enum { #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +/** + * struct_group() - Wrap a set of declarations in a mirrored struct + * + * @NAME: The identifier name of the mirrored sub-struct + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. + */ +#define struct_group(NAME, MEMBERS...) \ + __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS) + +/** + * struct_group_attr() - Create a struct_group() with trailing attributes + * + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes to apply + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. Includes structure attributes argument. + */ +#define struct_group_attr(NAME, ATTRS, MEMBERS...) \ + __struct_group(/* no tag */, NAME, ATTRS, MEMBERS) + +/** + * struct_group_tagged() - Create a struct_group with a reusable tag + * + * @TAG: The tag name for the named sub-struct + * @NAME: The identifier name of the mirrored sub-struct + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. Includes struct tag argument for the named copy, + * so the specified layout can be reused later. + */ +#define struct_group_tagged(TAG, NAME, MEMBERS...) \ + __struct_group(TAG, NAME, /* no attrs */, MEMBERS) + +/** + * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define DECLARE_FLEX_ARRAY(TYPE, NAME) \ + __DECLARE_FLEX_ARRAY(TYPE, NAME) + #endif diff --git a/include/linux/string.h b/include/linux/string.h index 5e96d656be7a..5a36608144a9 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -249,15 +249,6 @@ static inline const char *kbasename(const char *path) return tail ? tail + 1 : path; } -#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline)) -#define __RENAME(x) __asm__(#x) - -void fortify_panic(const char *name) __noreturn __cold; -void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter"); -void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter"); -void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter"); -void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); - #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) #include <linux/fortify-string.h> #endif @@ -281,6 +272,41 @@ static inline void memcpy_and_pad(void *dest, size_t dest_len, } /** + * memset_after - Set a value after a struct member to the end of a struct + * + * @obj: Address of target struct instance + * @v: Byte value to repeatedly write + * @member: after which struct member to start writing bytes + * + * This is good for clearing padding following the given member. + */ +#define memset_after(obj, v, member) \ +({ \ + u8 *__ptr = (u8 *)(obj); \ + typeof(v) __val = (v); \ + memset(__ptr + offsetofend(typeof(*(obj)), member), __val, \ + sizeof(*(obj)) - offsetofend(typeof(*(obj)), member)); \ +}) + +/** + * memset_startat - Set a value starting at a member to the end of a struct + * + * @obj: Address of target struct instance + * @v: Byte value to repeatedly write + * @member: struct member to start writing at + * + * Note that if there is padding between the prior member and the target + * member, memset_after() should be used to clear the prior padding. + */ +#define memset_startat(obj, v, member) \ +({ \ + u8 *__ptr = (u8 *)(obj); \ + typeof(v) __val = (v); \ + memset(__ptr + offsetof(typeof(*(obj)), member), __val, \ + sizeof(*(obj)) - offsetof(typeof(*(obj)), member)); \ +}) + +/** * str_has_prefix - Test if a string has a given prefix * @str: The string to test * @prefix: The string to see if @str starts with diff --git a/include/linux/swap.h b/include/linux/swap.h index ba52f3a3478e..cdf0957a88a4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -320,11 +320,17 @@ struct vma_swap_readahead { #endif }; +static inline swp_entry_t folio_swap_entry(struct folio *folio) +{ + swp_entry_t entry = { .val = page_private(&folio->page) }; + return entry; +} + /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); -void workingset_refault(struct page *page, void *shadow); -void workingset_activation(struct page *page); +void workingset_refault(struct folio *folio, void *shadow); +void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); @@ -344,9 +350,11 @@ extern unsigned long nr_free_buffer_pages(void); /* linux/mm/swap.c */ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); -extern void lru_note_cost_page(struct page *); +extern void lru_note_cost_folio(struct folio *); +extern void folio_add_lru(struct folio *); extern void lru_cache_add(struct page *); -extern void mark_page_accessed(struct page *); +void mark_page_accessed(struct page *); +void folio_mark_accessed(struct folio *); extern atomic_t lru_disable_count; @@ -365,7 +373,6 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); -extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 252243c7783d..528a478dbda8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -58,6 +58,7 @@ struct mq_attr; struct compat_stat; struct old_timeval32; struct robust_list_head; +struct futex_waitv; struct getcpu_cache; struct old_linux_dirent; struct perf_event_attr; @@ -610,7 +611,7 @@ asmlinkage long sys_waitid(int which, pid_t pid, asmlinkage long sys_set_tid_address(int __user *tidptr); asmlinkage long sys_unshare(unsigned long unshare_flags); -/* kernel/futex.c */ +/* kernel/futex/syscalls.c */ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, const struct __kernel_timespec __user *utime, u32 __user *uaddr2, u32 val3); @@ -623,6 +624,10 @@ asmlinkage long sys_get_robust_list(int pid, asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); +asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, + unsigned int nr_futexes, unsigned int flags, + struct __kernel_timespec __user *timeout, clockid_t clockid); + /* kernel/hrtimer.c */ asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 96305a64a5a7..c635c2e014e3 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -3,7 +3,7 @@ #define _LINUX_T10_PI_H #include <linux/types.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> /* * A T10 PI-capable target device can be formatted with different diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 0999f6317978..ad0c4e041030 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -203,7 +203,7 @@ static inline void copy_overflow(int size, unsigned long count) static __always_inline __must_check bool check_copy_size(const void *addr, size_t bytes, bool is_source) { - int sz = __compiletime_object_size(addr); + int sz = __builtin_object_size(addr, 0); if (unlikely(sz >= 0 && sz < bytes)) { if (!__builtin_constant_p(bytes)) copy_overflow(sz, bytes); diff --git a/include/linux/topology.h b/include/linux/topology.h index 7634cd737061..0b3704ad13c8 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -186,6 +186,9 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_die_id #define topology_die_id(cpu) ((void)(cpu), -1) #endif +#ifndef topology_cluster_id +#define topology_cluster_id(cpu) ((void)(cpu), -1) +#endif #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif @@ -195,6 +198,9 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_core_cpumask #define topology_core_cpumask(cpu) cpumask_of(cpu) #endif +#ifndef topology_cluster_cpumask +#define topology_cluster_cpumask(cpu) cpumask_of(cpu) +#endif #ifndef topology_die_cpumask #define topology_die_cpumask(cpu) cpumask_of(cpu) #endif @@ -206,6 +212,13 @@ static inline const struct cpumask *cpu_smt_mask(int cpu) } #endif +#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask) +static inline const struct cpumask *cpu_cluster_mask(int cpu) +{ + return topology_cluster_cpumask(cpu); +} +#endif + static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); diff --git a/include/linux/torture.h b/include/linux/torture.h index 0910c5803f35..24f58e50a94b 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -47,6 +47,14 @@ do { \ } while (0) void verbose_torout_sleep(void); +#define torture_init_error(firsterr) \ +({ \ + int ___firsterr = (firsterr); \ + \ + WARN_ONCE(!IS_MODULE(CONFIG_RCU_TORTURE_TEST) && ___firsterr < 0, "Torture-test initialization failed with error code %d\n", ___firsterr); \ + ___firsterr < 0; \ +}) + /* Definitions for online/offline exerciser. */ #ifdef CONFIG_HOTPLUG_CPU int torture_num_online_cpus(void); diff --git a/include/linux/tpm.h b/include/linux/tpm.h index aa11fe323c56..12d827734686 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -269,6 +269,7 @@ enum tpm2_cc_attrs { #define TPM_VID_INTEL 0x8086 #define TPM_VID_WINBOND 0x1050 #define TPM_VID_STM 0x104A +#define TPM_VID_ATML 0x1114 enum tpm_chip_flags { TPM_CHIP_FLAG_TPM2 = BIT(1), diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 3e475eeb5a99..50453b287615 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -671,7 +671,7 @@ struct trace_event_file { } \ early_initcall(trace_init_perf_perm_##name); -#define PERF_MAX_TRACE_SIZE 2048 +#define PERF_MAX_TRACE_SIZE 8192 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index a9f9c5714e65..c303f7a114e9 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -16,23 +16,8 @@ * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... - * If callback is registered to the "global" list, the list - * function is called and recursion checks the GLOBAL bits. - * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. - * - * Now if the arch does not support a feature, and it calls - * the global list function which calls the ftrace callback - * all three of these steps will do a recursion protection. - * There's no reason to do one if the previous caller already - * did. The recursion that we are protecting against will - * go through the same steps again. - * - * To prevent the multiple recursion checks, if a recursion - * bit is set that is higher than the MAX bit of the current - * check, then we know that the check was made by the previous - * caller, and we can skip the current check. */ enum { /* Function recursion bits */ @@ -40,12 +25,14 @@ enum { TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, + TRACE_FTRACE_TRANSITION_BIT, - /* INTERNAL_BITs must be greater than FTRACE_BITs */ + /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, + TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* @@ -86,12 +73,6 @@ enum { */ TRACE_GRAPH_NOTRACE_BIT, - /* - * When transitioning between context, the preempt_count() may - * not be correct. Allow for a single recursion to cover this case. - */ - TRACE_TRANSITION_BIT, - /* Used to prevent recursion recording from recursing. */ TRACE_RECORD_RECURSION_BIT, }; @@ -113,12 +94,10 @@ enum { #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT -#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) #define TRACE_LIST_START TRACE_INTERNAL_BIT -#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) -#define TRACE_CONTEXT_MASK TRACE_LIST_MAX +#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) /* * Used for setting context @@ -132,17 +111,14 @@ enum { TRACE_CTX_IRQ, TRACE_CTX_SOFTIRQ, TRACE_CTX_NORMAL, + TRACE_CTX_TRANSITION, }; static __always_inline int trace_get_context_bit(void) { - unsigned long pc = preempt_count(); + unsigned char bit = interrupt_context_level(); - if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) - return TRACE_CTX_NORMAL; - else - return pc & NMI_MASK ? TRACE_CTX_NMI : - pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; + return TRACE_CTX_NORMAL - bit; } #ifdef CONFIG_FTRACE_RECORD_RECURSION @@ -159,46 +135,48 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); # define do_ftrace_record_recursion(ip, pip) do { } while (0) #endif +/* + * Preemption is promised to be disabled when return bit >= 0. + */ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, - int start, int max) + int start) { unsigned int val = READ_ONCE(current->trace_recursion); int bit; - /* A previous recursion check was made */ - if ((val & TRACE_CONTEXT_MASK) > max) - return 0; - bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* - * It could be that preempt_count has not been updated during - * a switch between contexts. Allow for a single recursion. + * If an interrupt occurs during a trace, and another trace + * happens in that interrupt but before the preempt_count is + * updated to reflect the new interrupt context, then this + * will think a recursion occurred, and the event will be dropped. + * Let a single instance happen via the TRANSITION_BIT to + * not drop those events. */ - bit = TRACE_TRANSITION_BIT; + bit = TRACE_CTX_TRANSITION + start; if (val & (1 << bit)) { do_ftrace_record_recursion(ip, pip); return -1; } - } else { - /* Normal check passed, clear the transition to allow it again */ - val &= ~(1 << TRACE_TRANSITION_BIT); } val |= 1 << bit; current->trace_recursion = val; barrier(); - return bit + 1; + preempt_disable_notrace(); + + return bit; } +/* + * Preemption will be enabled (if it was previously enabled). + */ static __always_inline void trace_clear_recursion(int bit) { - if (!bit) - return; - + preempt_enable_notrace(); barrier(); - bit--; trace_recursion_clear(bit); } @@ -209,12 +187,12 @@ static __always_inline void trace_clear_recursion(int bit) * tracing recursed in the same context (normal vs interrupt), * * Returns: -1 if a recursion happened. - * >= 0 if no recursion + * >= 0 if no recursion. */ static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, unsigned long parent_ip) { - return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX); + return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START); } /** diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 3e80c4bc66f7..2564b7434b4d 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -197,6 +197,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) mem_cgroup_handle_over_high(); blkcg_maybe_throttle_current(); + + rseq_handle_notify_resume(NULL, regs); } /* diff --git a/include/linux/uio.h b/include/linux/uio.h index 5265024e8b90..207101a9c5c3 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -27,6 +27,12 @@ enum iter_type { ITER_DISCARD, }; +struct iov_iter_state { + size_t iov_offset; + size_t count; + unsigned long nr_segs; +}; + struct iov_iter { u8 iter_type; bool data_source; @@ -47,7 +53,6 @@ struct iov_iter { }; loff_t xarray_start; }; - size_t truncated; }; static inline enum iter_type iov_iter_type(const struct iov_iter *i) @@ -55,6 +60,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i) return i->iter_type; } +static inline void iov_iter_save_state(struct iov_iter *iter, + struct iov_iter_state *state) +{ + state->iov_offset = iter->iov_offset; + state->count = iter->count; + state->nr_segs = iter->nr_segs; +} + static inline bool iter_is_iovec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_IOVEC; @@ -233,6 +246,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); @@ -255,10 +269,8 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count) * conversion in assignement is by definition greater than all * values of size_t, including old i->count. */ - if (i->count > count) { - i->truncated += i->count - count; + if (i->count > count) i->count = count; - } } /* @@ -267,7 +279,6 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count) */ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { - i->truncated -= count - i->count; i->count = count; } diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 548a028f2dab..2c1fc9212cf2 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -124,6 +124,7 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ +#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -134,6 +135,7 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) +#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index eb70cabe6e7f..33a4240e6a6f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type); +void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type); bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); static inline void set_rlimit_ucount_max(struct user_namespace *ns, diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d6a6cf53b127..bfe38869498d 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -415,6 +415,78 @@ static inline void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) { } #endif /* CONFIG_SMP */ +static inline void __zone_stat_mod_folio(struct folio *folio, + enum zone_stat_item item, long nr) +{ + __mod_zone_page_state(folio_zone(folio), item, nr); +} + +static inline void __zone_stat_add_folio(struct folio *folio, + enum zone_stat_item item) +{ + __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); +} + +static inline void __zone_stat_sub_folio(struct folio *folio, + enum zone_stat_item item) +{ + __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); +} + +static inline void zone_stat_mod_folio(struct folio *folio, + enum zone_stat_item item, long nr) +{ + mod_zone_page_state(folio_zone(folio), item, nr); +} + +static inline void zone_stat_add_folio(struct folio *folio, + enum zone_stat_item item) +{ + mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); +} + +static inline void zone_stat_sub_folio(struct folio *folio, + enum zone_stat_item item) +{ + mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); +} + +static inline void __node_stat_mod_folio(struct folio *folio, + enum node_stat_item item, long nr) +{ + __mod_node_page_state(folio_pgdat(folio), item, nr); +} + +static inline void __node_stat_add_folio(struct folio *folio, + enum node_stat_item item) +{ + __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); +} + +static inline void __node_stat_sub_folio(struct folio *folio, + enum node_stat_item item) +{ + __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); +} + +static inline void node_stat_mod_folio(struct folio *folio, + enum node_stat_item item, long nr) +{ + mod_node_page_state(folio_pgdat(folio), item, nr); +} + +static inline void node_stat_add_folio(struct folio *folio, + enum node_stat_item item) +{ + mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); +} + +static inline void node_stat_sub_folio(struct folio *folio, + enum node_stat_item item) +{ + mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); +} + static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, int migratetype) { @@ -525,12 +597,6 @@ static inline void mod_lruvec_page_state(struct page *page, #endif /* CONFIG_MEMCG */ -static inline void inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, 1); -} - static inline void __inc_lruvec_page_state(struct page *page, enum node_stat_item idx) { @@ -543,6 +609,24 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } +static inline void __lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) +{ + __mod_lruvec_page_state(&folio->page, idx, val); +} + +static inline void __lruvec_stat_add_folio(struct folio *folio, + enum node_stat_item idx) +{ + __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); +} + +static inline void __lruvec_stat_sub_folio(struct folio *folio, + enum node_stat_item idx) +{ + __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); +} + static inline void inc_lruvec_page_state(struct page *page, enum node_stat_item idx) { @@ -555,4 +639,21 @@ static inline void dec_lruvec_page_state(struct page *page, mod_lruvec_page_state(page, idx, -1); } +static inline void lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) +{ + mod_lruvec_page_state(&folio->page, idx, val); +} + +static inline void lruvec_stat_add_folio(struct folio *folio, + enum node_stat_item idx) +{ + lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); +} + +static inline void lruvec_stat_sub_folio(struct folio *folio, + enum node_stat_item idx) +{ + lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); +} #endif /* _LINUX_VMSTAT_H */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 93dab0e9580f..2d0df57c9902 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -1160,6 +1160,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i (wait)->flags = 0; \ } while (0) -bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg); +typedef int (*task_call_f)(struct task_struct *p, void *arg); +extern int task_call_func(struct task_struct *p, task_call_f func, void *arg); #endif /* _LINUX_WAIT_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2ebef6b1a3d6..74d3c1efd9bb 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -399,9 +399,8 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq; * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ -struct workqueue_struct *alloc_workqueue(const char *fmt, - unsigned int flags, - int max_active, ...); +__printf(1, 4) struct workqueue_struct * +alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); /** * alloc_ordered_workqueue - allocate an ordered workqueue diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d1f65adf6a26..3bfd487d1dd2 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -11,7 +11,6 @@ #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> -#include <linux/blk-cgroup.h> struct bio; @@ -109,15 +108,12 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc) return flags; } -static inline struct cgroup_subsys_state * -wbc_blkcg_css(struct writeback_control *wbc) -{ #ifdef CONFIG_CGROUP_WRITEBACK - if (wbc->wb) - return wbc->wb->blkcg_css; -#endif - return blkcg_root_css; -} +#define wbc_blkcg_css(wbc) \ + ((wbc)->wb ? (wbc)->wb->blkcg_css : blkcg_root_css) +#else +#define wbc_blkcg_css(wbc) (blkcg_root_css) +#endif /* CONFIG_CGROUP_WRITEBACK */ /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to @@ -393,7 +389,14 @@ void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); -void account_page_redirty(struct page *page); +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); +void folio_account_redirty(struct folio *folio); +static inline void account_page_redirty(struct page *page) +{ + folio_account_redirty(page_folio(page)); +} +bool folio_redirty_for_writepage(struct writeback_control *, struct folio *); +bool redirty_page_for_writepage(struct writeback_control *, struct page *); void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 29db736af86d..bb763085479a 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -28,12 +28,10 @@ #ifndef CONFIG_PREEMPT_RT #define WW_MUTEX_BASE mutex #define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k) -#define ww_mutex_base_trylock(l) mutex_trylock(l) #define ww_mutex_base_is_locked(b) mutex_is_locked((b)) #else #define WW_MUTEX_BASE rt_mutex #define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k) -#define ww_mutex_base_trylock(l) rt_mutex_trylock(l) #define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex) #endif @@ -339,17 +337,8 @@ ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, extern void ww_mutex_unlock(struct ww_mutex *lock); -/** - * ww_mutex_trylock - tries to acquire the w/w mutex without acquire context - * @lock: mutex to lock - * - * Trylocks a mutex without acquire context, so no deadlock detection is - * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise. - */ -static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock) -{ - return ww_mutex_base_trylock(&lock->base); -} +extern int __must_check ww_mutex_trylock(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /*** * ww_mutex_destroy - mark a w/w mutex unusable diff --git a/include/linux/xz.h b/include/linux/xz.h index 9884c8440188..7285ca5d56e9 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -234,6 +234,112 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s); XZ_EXTERN void xz_dec_end(struct xz_dec *s); /* + * Decompressor for MicroLZMA, an LZMA variant with a very minimal header. + * See xz_dec_microlzma_alloc() below for details. + * + * These functions aren't used or available in preboot code and thus aren't + * marked with XZ_EXTERN. This avoids warnings about static functions that + * are never defined. + */ +/** + * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state + */ +struct xz_dec_microlzma; + +/** + * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder + * @mode XZ_SINGLE or XZ_PREALLOC + * @dict_size LZMA dictionary size. This must be at least 4 KiB and + * at most 3 GiB. + * + * In contrast to xz_dec_init(), this function only allocates the memory + * and remembers the dictionary size. xz_dec_microlzma_reset() must be used + * before calling xz_dec_microlzma_run(). + * + * The amount of allocated memory is a little less than 30 KiB with XZ_SINGLE. + * With XZ_PREALLOC also a dictionary buffer of dict_size bytes is allocated. + * + * On success, xz_dec_microlzma_alloc() returns a pointer to + * struct xz_dec_microlzma. If memory allocation fails or + * dict_size is invalid, NULL is returned. + * + * The compressed format supported by this decoder is a raw LZMA stream + * whose first byte (always 0x00) has been replaced with bitwise-negation + * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is + * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. + * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream + * marker must not be used. The unused values are reserved for future use. + * This MicroLZMA header format was created for use in EROFS but may be used + * by others too. + */ +extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size); + +/** + * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state + * @s Decoder state allocated using xz_dec_microlzma_alloc() + * @comp_size Compressed size of the input stream + * @uncomp_size Uncompressed size of the input stream. A value smaller + * than the real uncompressed size of the input stream can + * be specified if uncomp_size_is_exact is set to false. + * uncomp_size can never be set to a value larger than the + * expected real uncompressed size because it would eventually + * result in XZ_DATA_ERROR. + * @uncomp_size_is_exact This is an int instead of bool to avoid + * requiring stdbool.h. This should normally be set to true. + * When this is set to false, error detection is weaker. + */ +extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, + uint32_t comp_size, uint32_t uncomp_size, + int uncomp_size_is_exact); + +/** + * xz_dec_microlzma_run() - Run the MicroLZMA decoder + * @s Decoder state initialized using xz_dec_microlzma_reset() + * @b: Input and output buffers + * + * This works similarly to xz_dec_run() with a few important differences. + * Only the differences are documented here. + * + * The only possible return values are XZ_OK, XZ_STREAM_END, and + * XZ_DATA_ERROR. This function cannot return XZ_BUF_ERROR: if no progress + * is possible due to lack of input data or output space, this function will + * keep returning XZ_OK. Thus, the calling code must be written so that it + * will eventually provide input and output space matching (or exceeding) + * comp_size and uncomp_size arguments given to xz_dec_microlzma_reset(). + * If the caller cannot do this (for example, if the input file is truncated + * or otherwise corrupt), the caller must detect this error by itself to + * avoid an infinite loop. + * + * If the compressed data seems to be corrupt, XZ_DATA_ERROR is returned. + * This can happen also when incorrect dictionary, uncompressed, or + * compressed sizes have been specified. + * + * With XZ_PREALLOC only: As an extra feature, b->out may be NULL to skip over + * uncompressed data. This way the caller doesn't need to provide a temporary + * output buffer for the bytes that will be ignored. + * + * With XZ_SINGLE only: In contrast to xz_dec_run(), the return value XZ_OK + * is also possible and thus XZ_SINGLE is actually a limited multi-call mode. + * After XZ_OK the bytes decoded so far may be read from the output buffer. + * It is possible to continue decoding but the variables b->out and b->out_pos + * MUST NOT be changed by the caller. Increasing the value of b->out_size is + * allowed to make more output space available; one doesn't need to provide + * space for the whole uncompressed data on the first call. The input buffer + * may be changed normally like with XZ_PREALLOC. This way input data can be + * provided from non-contiguous memory. + */ +extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, + struct xz_buf *b); + +/** + * xz_dec_microlzma_end() - Free the memory allocated for the decoder state + * @s: Decoder state allocated using xz_dec_microlzma_alloc(). + * If s is NULL, this function does nothing. + */ +extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); + +/* * Standalone build (userspace build or in-kernel build for boot time use) * needs a CRC32 implementation. For normal in-kernel use, kernel's own * CRC32 module is used instead, and users of this module don't need to diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h index 781371bff2ad..ef63bc205756 100644 --- a/include/media/hevc-ctrls.h +++ b/include/media/hevc-ctrls.h @@ -19,6 +19,7 @@ #define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008) #define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009) #define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010) +#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011) #define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012) #define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015) #define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016) @@ -27,6 +28,7 @@ #define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 #define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 #define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 +#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 #define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124 enum v4l2_mpeg_video_hevc_decode_mode { @@ -225,6 +227,15 @@ struct v4l2_ctrl_hevc_decode_params { __u64 flags; }; +struct v4l2_ctrl_hevc_scaling_matrix { + __u8 scaling_list_4x4[6][16]; + __u8 scaling_list_8x8[6][64]; + __u8 scaling_list_16x16[6][64]; + __u8 scaling_list_32x32[2][64]; + __u8 scaling_list_dc_coef_16x16[6]; + __u8 scaling_list_dc_coef_32x32[2]; +}; + /* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */ #define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200) /* diff --git a/include/media/i2c/mt9p031.h b/include/media/i2c/mt9p031.h index 7c29c53aa988..f933cd0be8e5 100644 --- a/include/media/i2c/mt9p031.h +++ b/include/media/i2c/mt9p031.h @@ -10,6 +10,7 @@ struct v4l2_subdev; * @target_freq: Pixel clock frequency */ struct mt9p031_platform_data { + unsigned int pixclk_pol:1; int ext_freq; int target_freq; }; diff --git a/include/media/tuner.h b/include/media/tuner.h index ff85d7227f91..a7796e0a3659 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -132,6 +132,7 @@ #define TUNER_SONY_BTF_PG472Z 89 /* PAL+SECAM */ #define TUNER_SONY_BTF_PK467Z 90 /* NTSC_JP */ #define TUNER_SONY_BTF_PB463Z 91 /* NTSC */ +#define TUNER_SI2157 92 /* tv card specific */ #define TDA9887_PRESENT (1<<0) diff --git a/include/media/v4l2-async.h b/include/media/v4l2-async.h index fa4901162663..13ff3ad948f4 100644 --- a/include/media/v4l2-async.h +++ b/include/media/v4l2-async.h @@ -123,45 +123,45 @@ struct v4l2_async_notifier { void v4l2_async_debug_init(struct dentry *debugfs_dir); /** - * v4l2_async_notifier_init - Initialize a notifier. + * v4l2_async_nf_init - Initialize a notifier. * * @notifier: pointer to &struct v4l2_async_notifier * * This function initializes the notifier @asd_list. It must be called * before adding a subdevice to a notifier, using one of: - * v4l2_async_notifier_add_fwnode_remote_subdev(), - * v4l2_async_notifier_add_fwnode_subdev(), - * v4l2_async_notifier_add_i2c_subdev(), - * __v4l2_async_notifier_add_subdev() or - * v4l2_async_notifier_parse_fwnode_endpoints(). + * v4l2_async_nf_add_fwnode_remote(), + * v4l2_async_nf_add_fwnode(), + * v4l2_async_nf_add_i2c(), + * __v4l2_async_nf_add_subdev() or + * v4l2_async_nf_parse_fwnode_endpoints(). */ -void v4l2_async_notifier_init(struct v4l2_async_notifier *notifier); +void v4l2_async_nf_init(struct v4l2_async_notifier *notifier); /** - * __v4l2_async_notifier_add_subdev - Add an async subdev to the + * __v4l2_async_nf_add_subdev - Add an async subdev to the * notifier's master asd list. * * @notifier: pointer to &struct v4l2_async_notifier * @asd: pointer to &struct v4l2_async_subdev * * \warning: Drivers should avoid using this function and instead use one of: - * v4l2_async_notifier_add_fwnode_subdev(), - * v4l2_async_notifier_add_fwnode_remote_subdev() or - * v4l2_async_notifier_add_i2c_subdev(). + * v4l2_async_nf_add_fwnode(), + * v4l2_async_nf_add_fwnode_remote() or + * v4l2_async_nf_add_i2c(). * * Call this function before registering a notifier to link the provided @asd to * the notifiers master @asd_list. The @asd must be allocated with k*alloc() as * it will be freed by the framework when the notifier is destroyed. */ -int __v4l2_async_notifier_add_subdev(struct v4l2_async_notifier *notifier, - struct v4l2_async_subdev *asd); +int __v4l2_async_nf_add_subdev(struct v4l2_async_notifier *notifier, + struct v4l2_async_subdev *asd); struct v4l2_async_subdev * -__v4l2_async_notifier_add_fwnode_subdev(struct v4l2_async_notifier *notifier, - struct fwnode_handle *fwnode, - unsigned int asd_struct_size); +__v4l2_async_nf_add_fwnode(struct v4l2_async_notifier *notifier, + struct fwnode_handle *fwnode, + unsigned int asd_struct_size); /** - * v4l2_async_notifier_add_fwnode_subdev - Allocate and add a fwnode async + * v4l2_async_nf_add_fwnode - Allocate and add a fwnode async * subdev to the notifier's master asd_list. * * @notifier: pointer to &struct v4l2_async_notifier @@ -175,16 +175,15 @@ __v4l2_async_notifier_add_fwnode_subdev(struct v4l2_async_notifier *notifier, * notifiers @asd_list. The function also gets a reference of the fwnode which * is released later at notifier cleanup time. */ -#define v4l2_async_notifier_add_fwnode_subdev(notifier, fwnode, type) \ - ((type *)__v4l2_async_notifier_add_fwnode_subdev(notifier, fwnode, \ - sizeof(type))) +#define v4l2_async_nf_add_fwnode(notifier, fwnode, type) \ + ((type *)__v4l2_async_nf_add_fwnode(notifier, fwnode, sizeof(type))) struct v4l2_async_subdev * -__v4l2_async_notifier_add_fwnode_remote_subdev(struct v4l2_async_notifier *notif, - struct fwnode_handle *endpoint, - unsigned int asd_struct_size); +__v4l2_async_nf_add_fwnode_remote(struct v4l2_async_notifier *notif, + struct fwnode_handle *endpoint, + unsigned int asd_struct_size); /** - * v4l2_async_notifier_add_fwnode_remote_subdev - Allocate and add a fwnode + * v4l2_async_nf_add_fwnode_remote - Allocate and add a fwnode * remote async subdev to the * notifier's master asd_list. * @@ -200,20 +199,18 @@ __v4l2_async_notifier_add_fwnode_remote_subdev(struct v4l2_async_notifier *notif * function also gets a reference of the fwnode which is released later at * notifier cleanup time. * - * This is just like v4l2_async_notifier_add_fwnode_subdev(), but with the + * This is just like v4l2_async_nf_add_fwnode(), but with the * exception that the fwnode refers to a local endpoint, not the remote one. */ -#define v4l2_async_notifier_add_fwnode_remote_subdev(notifier, ep, type) \ - ((type *) \ - __v4l2_async_notifier_add_fwnode_remote_subdev(notifier, ep, \ - sizeof(type))) +#define v4l2_async_nf_add_fwnode_remote(notifier, ep, type) \ + ((type *)__v4l2_async_nf_add_fwnode_remote(notifier, ep, sizeof(type))) struct v4l2_async_subdev * -__v4l2_async_notifier_add_i2c_subdev(struct v4l2_async_notifier *notifier, - int adapter_id, unsigned short address, - unsigned int asd_struct_size); +__v4l2_async_nf_add_i2c(struct v4l2_async_notifier *notifier, + int adapter_id, unsigned short address, + unsigned int asd_struct_size); /** - * v4l2_async_notifier_add_i2c_subdev - Allocate and add an i2c async + * v4l2_async_nf_add_i2c - Allocate and add an i2c async * subdev to the notifier's master asd_list. * * @notifier: pointer to &struct v4l2_async_notifier @@ -223,59 +220,59 @@ __v4l2_async_notifier_add_i2c_subdev(struct v4l2_async_notifier *notifier, * v4l2_async_subdev shall be the first member of the driver's async * sub-device struct, i.e. both begin at the same memory address. * - * Same as v4l2_async_notifier_add_fwnode_subdev() but for I2C matched + * Same as v4l2_async_nf_add_fwnode() but for I2C matched * sub-devices. */ -#define v4l2_async_notifier_add_i2c_subdev(notifier, adapter, address, type) \ - ((type *)__v4l2_async_notifier_add_i2c_subdev(notifier, adapter, \ - address, sizeof(type))) +#define v4l2_async_nf_add_i2c(notifier, adapter, address, type) \ + ((type *)__v4l2_async_nf_add_i2c(notifier, adapter, address, \ + sizeof(type))) /** - * v4l2_async_notifier_register - registers a subdevice asynchronous notifier + * v4l2_async_nf_register - registers a subdevice asynchronous notifier * * @v4l2_dev: pointer to &struct v4l2_device * @notifier: pointer to &struct v4l2_async_notifier */ -int v4l2_async_notifier_register(struct v4l2_device *v4l2_dev, - struct v4l2_async_notifier *notifier); +int v4l2_async_nf_register(struct v4l2_device *v4l2_dev, + struct v4l2_async_notifier *notifier); /** - * v4l2_async_subdev_notifier_register - registers a subdevice asynchronous + * v4l2_async_subdev_nf_register - registers a subdevice asynchronous * notifier for a sub-device * * @sd: pointer to &struct v4l2_subdev * @notifier: pointer to &struct v4l2_async_notifier */ -int v4l2_async_subdev_notifier_register(struct v4l2_subdev *sd, - struct v4l2_async_notifier *notifier); +int v4l2_async_subdev_nf_register(struct v4l2_subdev *sd, + struct v4l2_async_notifier *notifier); /** - * v4l2_async_notifier_unregister - unregisters a subdevice + * v4l2_async_nf_unregister - unregisters a subdevice * asynchronous notifier * * @notifier: pointer to &struct v4l2_async_notifier */ -void v4l2_async_notifier_unregister(struct v4l2_async_notifier *notifier); +void v4l2_async_nf_unregister(struct v4l2_async_notifier *notifier); /** - * v4l2_async_notifier_cleanup - clean up notifier resources + * v4l2_async_nf_cleanup - clean up notifier resources * @notifier: the notifier the resources of which are to be cleaned up * * Release memory resources related to a notifier, including the async * sub-devices allocated for the purposes of the notifier but not the notifier * itself. The user is responsible for calling this function to clean up the * notifier after calling - * v4l2_async_notifier_add_fwnode_remote_subdev(), - * v4l2_async_notifier_add_fwnode_subdev(), - * v4l2_async_notifier_add_i2c_subdev(), - * __v4l2_async_notifier_add_subdev() or - * v4l2_async_notifier_parse_fwnode_endpoints(). + * v4l2_async_nf_add_fwnode_remote(), + * v4l2_async_nf_add_fwnode(), + * v4l2_async_nf_add_i2c(), + * __v4l2_async_nf_add_subdev() or + * v4l2_async_nf_parse_fwnode_endpoints(). * - * There is no harm from calling v4l2_async_notifier_cleanup() in other + * There is no harm from calling v4l2_async_nf_cleanup() in other * cases as long as its memory has been zeroed after it has been * allocated. */ -void v4l2_async_notifier_cleanup(struct v4l2_async_notifier *notifier); +void v4l2_async_nf_cleanup(struct v4l2_async_notifier *notifier); /** * v4l2_async_register_subdev - registers a sub-device to the asynchronous @@ -295,7 +292,7 @@ int v4l2_async_register_subdev(struct v4l2_subdev *sd); * * This function is just like v4l2_async_register_subdev() with the exception * that calling it will also parse firmware interfaces for remote references - * using v4l2_async_notifier_parse_fwnode_sensor() and registers the + * using v4l2_async_nf_parse_fwnode_sensor() and registers the * async sub-devices. The sub-device is similarly unregistered by calling * v4l2_async_unregister_subdev(). * diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h index 6a4afd4a7df2..5cf1edefb822 100644 --- a/include/media/v4l2-dev.h +++ b/include/media/v4l2-dev.h @@ -260,8 +260,7 @@ struct v4l2_file_operations { * Only set @dev_parent if that can't be deduced from @v4l2_dev. */ -struct video_device -{ +struct video_device { #if defined(CONFIG_MEDIA_CONTROLLER) struct media_entity entity; struct media_intf_devnode *intf_devnode; diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index 7ab033b819eb..9c97f1dbd1c6 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -463,7 +463,7 @@ typedef int (*parse_endpoint_func)(struct device *dev, struct v4l2_async_subdev *asd); /** - * v4l2_async_notifier_parse_fwnode_endpoints - Parse V4L2 fwnode endpoints in a + * v4l2_async_nf_parse_fwnode_endpoints - Parse V4L2 fwnode endpoints in a * device node * @dev: the device the endpoints of which are to be parsed * @notifier: notifier for @dev @@ -496,7 +496,7 @@ typedef int (*parse_endpoint_func)(struct device *dev, * to retain that configuration, the user needs to allocate memory for it. * * Any notifier populated using this function must be released with a call to - * v4l2_async_notifier_cleanup() after it has been unregistered and the async + * v4l2_async_nf_cleanup() after it has been unregistered and the async * sub-devices are no longer in use, even if the function returned an error. * * Return: %0 on success, including when no async sub-devices are found @@ -505,10 +505,10 @@ typedef int (*parse_endpoint_func)(struct device *dev, * Other error codes as returned by @parse_endpoint */ int -v4l2_async_notifier_parse_fwnode_endpoints(struct device *dev, - struct v4l2_async_notifier *notifier, - size_t asd_struct_size, - parse_endpoint_func parse_endpoint); +v4l2_async_nf_parse_fwnode_endpoints(struct device *dev, + struct v4l2_async_notifier *notifier, + size_t asd_struct_size, + parse_endpoint_func parse_endpoint); /* Helper macros to access the connector links. */ diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 12955cb460d2..2467284e5f26 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -46,6 +46,7 @@ enum vb2_memory { struct vb2_fileio_data; struct vb2_threadio_data; +struct vb2_buffer; /** * struct vb2_mem_ops - memory handling/memory allocator operations. @@ -53,10 +54,8 @@ struct vb2_threadio_data; * return ERR_PTR() on failure or a pointer to allocator private, * per-buffer data on success; the returned private structure * will then be passed as @buf_priv argument to other ops in this - * structure. Additional gfp_flags to use when allocating the - * are also passed to this operation. These flags are from the - * gfp_flags field of vb2_queue. The size argument to this function - * shall be *page aligned*. + * structure. The size argument to this function shall be + * *page aligned*. * @put: inform the allocator that the buffer will no longer be used; * usually will result in the allocator freeing the buffer (if * no other users of this buffer are present); the @buf_priv @@ -117,31 +116,33 @@ struct vb2_threadio_data; * map_dmabuf, unmap_dmabuf. */ struct vb2_mem_ops { - void *(*alloc)(struct device *dev, unsigned long attrs, - unsigned long size, - enum dma_data_direction dma_dir, - gfp_t gfp_flags); + void *(*alloc)(struct vb2_buffer *vb, + struct device *dev, + unsigned long size); void (*put)(void *buf_priv); - struct dma_buf *(*get_dmabuf)(void *buf_priv, unsigned long flags); - - void *(*get_userptr)(struct device *dev, unsigned long vaddr, - unsigned long size, - enum dma_data_direction dma_dir); + struct dma_buf *(*get_dmabuf)(struct vb2_buffer *vb, + void *buf_priv, + unsigned long flags); + + void *(*get_userptr)(struct vb2_buffer *vb, + struct device *dev, + unsigned long vaddr, + unsigned long size); void (*put_userptr)(void *buf_priv); void (*prepare)(void *buf_priv); void (*finish)(void *buf_priv); - void *(*attach_dmabuf)(struct device *dev, + void *(*attach_dmabuf)(struct vb2_buffer *vb, + struct device *dev, struct dma_buf *dbuf, - unsigned long size, - enum dma_data_direction dma_dir); + unsigned long size); void (*detach_dmabuf)(void *buf_priv); int (*map_dmabuf)(void *buf_priv); void (*unmap_dmabuf)(void *buf_priv); - void *(*vaddr)(void *buf_priv); - void *(*cookie)(void *buf_priv); + void *(*vaddr)(struct vb2_buffer *vb, void *buf_priv); + void *(*cookie)(struct vb2_buffer *vb, void *buf_priv); unsigned int (*num_users)(void *buf_priv); @@ -266,10 +267,10 @@ struct vb2_buffer { * after the 'buf_finish' op is called. * copied_timestamp: the timestamp of this capture buffer was copied * from an output buffer. - * need_cache_sync_on_prepare: when set buffer's ->prepare() function - * performs cache sync/invalidation. - * need_cache_sync_on_finish: when set buffer's ->finish() function - * performs cache sync/invalidation. + * skip_cache_sync_on_prepare: when set buffer's ->prepare() function + * skips cache sync/invalidation. + * skip_cache_sync_on_finish: when set buffer's ->finish() function + * skips cache sync/invalidation. * queued_entry: entry on the queued buffers list, which holds * all buffers queued from userspace * done_entry: entry on the list that stores all buffers ready @@ -280,8 +281,8 @@ struct vb2_buffer { unsigned int synced:1; unsigned int prepared:1; unsigned int copied_timestamp:1; - unsigned int need_cache_sync_on_prepare:1; - unsigned int need_cache_sync_on_finish:1; + unsigned int skip_cache_sync_on_prepare:1; + unsigned int skip_cache_sync_on_finish:1; struct vb2_plane planes[VB2_MAX_PLANES]; struct list_head queued_entry; @@ -503,6 +504,8 @@ struct vb2_buf_ops { * @allow_cache_hints: when set user-space can pass cache management hints in * order to skip cache flush/invalidation on ->prepare() or/and * ->finish(). + * @non_coherent_mem: when set queue will attempt to allocate buffers using + * non-coherent memory. * @lock: pointer to a mutex that protects the &struct vb2_queue. The * driver can set this to a mutex to let the v4l2 core serialize * the queuing ioctls. If the driver wants to handle locking @@ -582,6 +585,7 @@ struct vb2_queue { unsigned int uses_qbuf:1; unsigned int uses_requests:1; unsigned int allow_cache_hints:1; + unsigned int non_coherent_mem:1; struct mutex *lock; void *owner; @@ -747,6 +751,8 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb); * vb2_core_reqbufs() - Initiate streaming. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory type, as defined by &enum vb2_memory. + * @flags: auxiliary queue/buffer management flags. Currently, the only + * used flag is %V4L2_MEMORY_FLAG_NON_COHERENT. * @count: requested buffer count. * * Videobuf2 core helper to implement VIDIOC_REQBUF() operation. It is called @@ -771,12 +777,13 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb); * Return: returns zero on success; an error code otherwise. */ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, - unsigned int *count); + unsigned int flags, unsigned int *count); /** * vb2_core_create_bufs() - Allocate buffers and any required auxiliary structs * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory type, as defined by &enum vb2_memory. + * @flags: auxiliary queue/buffer management flags. * @count: requested buffer count. * @requested_planes: number of planes requested. * @requested_sizes: array with the size of the planes. @@ -794,7 +801,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, * Return: returns zero on success; an error code otherwise. */ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, - unsigned int *count, + unsigned int flags, unsigned int *count, unsigned int requested_planes, const unsigned int requested_sizes[]); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 62dd8422e0dc..27336fc70467 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5376,7 +5376,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface * @mgmt_registrations: list of registrations for management frames - * @mgmt_registrations_lock: lock for the list * @mgmt_registrations_need_update: mgmt registrations were updated, * need to propagate the update to the driver * @mtx: mutex used to lock data in this struct, may be used by drivers @@ -5423,7 +5422,6 @@ struct wireless_dev { u32 identifier; struct list_head mgmt_registrations; - spinlock_t mgmt_registrations_lock; u8 mgmt_registrations_need_update:1; struct mutex mtx; diff --git a/include/net/dsa.h b/include/net/dsa.h index f9a17145255a..d784e76113b8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -447,6 +447,11 @@ static inline bool dsa_port_is_user(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_USER; } +static inline bool dsa_port_is_unused(struct dsa_port *dp) +{ + return dp->type == DSA_PORT_TYPE_UNUSED; +} + static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; @@ -580,8 +585,16 @@ struct dsa_switch_ops { int (*change_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol proto); + /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); + + /* Per-port initialization and destruction methods. Mandatory if the + * driver registers devlink port regions, optional otherwise. + */ + int (*port_setup)(struct dsa_switch *ds, int port); + void (*port_teardown)(struct dsa_switch *ds, int port); + u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* @@ -1041,6 +1054,7 @@ static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr, void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); +void dsa_switch_shutdown(struct dsa_switch *ds); struct dsa_switch *dsa_switch_find(int tree_index, int sw_index); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 21c5386d4a6d..ab5348e57db1 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -597,5 +597,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, - int nh_weight, u8 rt_family); + int nh_weight, u8 rt_family, u32 nh_tclassid); #endif /* _NET_FIB_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index af0fc13cea34..618d1f427cb2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2818,13 +2818,13 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag * when they are able to replace in-use PTK keys according to the following * requirements: - * 1) They do not hand over frames decrypted with the old key to - mac80211 once the call to set_key() with command %DISABLE_KEY has been - completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key, + * 1) They do not hand over frames decrypted with the old key to mac80211 + once the call to set_key() with command %DISABLE_KEY has been completed, 2) either drop or continue to use the old key for any outgoing frames queued at the time of the key deletion (including re-transmits), 3) never send out a frame queued prior to the set_key() %SET_KEY command - encrypted with the new key and + encrypted with the new key when also needing + @IEEE80211_KEY_FLAG_GENERATE_IV and 4) never send out a frame unencrypted when it should be encrypted. Mac80211 will not queue any new frames for a deleted key to the driver. */ diff --git a/include/net/mctp.h b/include/net/mctp.h index a824d47c3c6d..ffd2c23bd76d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -54,7 +54,7 @@ struct mctp_sock { struct sock sk; /* bind() params */ - int bind_net; + unsigned int bind_net; mctp_eid_t bind_addr; __u8 bind_type; diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 6026bbefbffd..3214848402ec 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -69,6 +69,10 @@ struct mptcp_out_options { struct { u64 sndr_key; u64 rcvr_key; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + __sum16 csum; }; struct { struct mptcp_addr_info addr; diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index 0fd8a4159662..ceadf8ba25a4 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -17,7 +17,6 @@ struct inet_frags_ctl; struct nft_ct_frag6_pernet { struct ctl_table_header *nf_frag_frags_hdr; struct fqdir *fqdir; - unsigned int users; }; #endif /* _NF_DEFRAG_IPV6_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 148f5d8ee5ab..a16171c5fd9e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1202,7 +1202,7 @@ struct nft_object *nft_obj_lookup(const struct net *net, void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, - int event, int family, int report, gfp_t gfp); + int event, u16 flags, int family, int report, gfp_t gfp); /** * struct nft_object_type - stateful object type diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 986a2a9cfdfa..b593f95e9991 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -27,5 +27,11 @@ struct netns_nf { #if IS_ENABLED(CONFIG_DECNET) struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + unsigned int defrag_ipv4_users; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + unsigned int defrag_ipv6_users; +#endif }; #endif diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 10e1777877e6..28085b995ddc 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -325,7 +325,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; - if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0) + if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) return -EMSGSIZE; } diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 6d7b12cba015..bf79f3a890af 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -11,6 +11,7 @@ #include <uapi/linux/pkt_sched.h> #define DEFAULT_TX_QUEUE_LEN 1000 +#define STAB_SIZE_LOG_MAX 30 struct qdisc_walker { int stop; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2eb6d7c2c931..f37c7a558d6d 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -384,11 +384,11 @@ sctp_vtag_verify(const struct sctp_chunk *chunk, * Verification Tag value does not match the receiver's own * tag value, the receiver shall silently discard the packet... */ - if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag) - return 1; + if (ntohl(chunk->sctp_hdr->vtag) != asoc->c.my_vtag) + return 0; chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; - return 0; + return 1; } /* Check VTAG of the packet matches the sender's own tag and the T bit is diff --git a/include/net/sock.h b/include/net/sock.h index 66a9a90f9558..463f390d90b3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -307,6 +307,7 @@ struct bpf_local_storage; * @sk_priority: %SO_PRIORITY setting * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family + * @sk_peer_lock: lock protecting @sk_peer_pid and @sk_peer_cred * @sk_peer_pid: &struct pid for this socket's peer * @sk_peer_cred: %SO_PEERCRED setting * @sk_rcvlowat: %SO_RCVLOWAT setting @@ -488,8 +489,10 @@ struct sock { u8 sk_prefer_busy_poll; u16 sk_busy_poll_budget; #endif + spinlock_t sk_peer_lock; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 @@ -1205,7 +1208,7 @@ struct proto { #endif bool (*stream_memory_free)(const struct sock *sk, int wake); - bool (*stream_memory_read)(const struct sock *sk); + bool (*sock_is_readable)(struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); @@ -1623,7 +1626,36 @@ void release_sock(struct sock *sk); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); +bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); + +/** + * lock_sock_fast - fast version of lock_sock + * @sk: socket + * + * This version should be used for very small section, where process wont block + * return false if fast path is taken: + * + * sk_lock.slock locked, owned = 0, BH disabled + * + * return true if slow path is taken: + * + * sk_lock.slock unlocked, owned = 1, BH enabled + */ +static inline bool lock_sock_fast(struct sock *sk) +{ + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + + return __lock_sock_fast(sk); +} + +/* fast socket lock variant for caller already holding a [different] socket lock */ +static inline bool lock_sock_fast_nested(struct sock *sk) +{ + mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); + + return __lock_sock_fast(sk); +} /** * unlock_sock_fast - complement of lock_sock_fast @@ -1640,6 +1672,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) release_sock(sk); __release(&sk->sk_lock.slock); } else { + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); spin_unlock_bh(&sk->sk_lock.slock); } } @@ -2787,4 +2820,10 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs); int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +static inline bool sk_is_readable(struct sock *sk) +{ + if (sk->sk_prot->sock_is_readable) + return sk->sk_prot->sock_is_readable(sk); + return false; +} #endif /* _SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 3166dc15d7d6..60c384569e9c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1576,6 +1576,7 @@ struct tcp_md5sig_key { u8 keylen; u8 family; /* AF_INET or AF_INET6 */ u8 prefixlen; + u8 flags; union tcp_md5_addr addr; int l3index; /* set if key added with L3 scope */ u8 key[TCP_MD5SIG_MAXKEYLEN]; @@ -1621,10 +1622,10 @@ struct tcp_md5sig_pool { int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, u8 prefixlen, int l3index, + int family, u8 prefixlen, int l3index, u8 flags, const u8 *newkey, u8 newkeylen, gfp_t gfp); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, - int family, u8 prefixlen, int l3index); + int family, u8 prefixlen, int l3index, u8 flags); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); diff --git a/include/net/tls.h b/include/net/tls.h index be4b3e1cac46..1fffb206f09f 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -358,6 +358,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); +void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); @@ -375,7 +376,7 @@ void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -bool tls_sw_stream_read(const struct sock *sk); +bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -466,12 +467,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) #endif } -static inline void tls_err_abort(struct sock *sk, int err) -{ - sk->sk_err = err; - sk_error_report(sk); -} - static inline bool tls_bigint_increment(unsigned char *seq, int len) { int i; @@ -512,7 +507,7 @@ static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (prot->version != TLS_1_3_VERSION && prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) diff --git a/include/net/udp.h b/include/net/udp.h index 360df454356c..909ecf447e0f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -494,8 +494,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit - * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this - * specific case, where PARTIAL is both correct and required. + * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. + * Reset in this specific case, where PARTIAL is both correct and + * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/include/scsi/sas.h b/include/scsi/sas.h index 4726c1bbec65..64154c1fed02 100644 --- a/include/scsi/sas.h +++ b/include/scsi/sas.h @@ -323,8 +323,10 @@ struct ssp_response_iu { __be32 sense_data_len; __be32 response_data_len; - u8 resp_data[0]; - u8 sense_data[]; + union { + DECLARE_FLEX_ARRAY(u8, resp_data); + DECLARE_FLEX_ARRAY(u8, sense_data); + }; } __attribute__ ((packed)); struct ssp_command_iu { @@ -554,8 +556,10 @@ struct ssp_response_iu { __be32 sense_data_len; __be32 response_data_len; - u8 resp_data[0]; - u8 sense_data[]; + union { + DECLARE_FLEX_ARRAY(u8, resp_data); + DECLARE_FLEX_ARRAY(u8, sense_data); + }; } __attribute__ ((packed)); struct ssp_command_iu { diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index eaf04c9a1dfc..31078063afac 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -396,4 +396,7 @@ static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd) extern void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq); +struct request *scsi_alloc_request(struct request_queue *q, + unsigned int op, blk_mq_req_flags_t flags); + #endif /* _SCSI_SCSI_CMND_H */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 09a17f6e93a7..430b73bd02ac 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -5,7 +5,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/workqueue.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <scsi/scsi.h> #include <linux/atomic.h> #include <linux/sbitmap.h> @@ -146,7 +146,6 @@ struct scsi_device { struct scsi_vpd __rcu *vpd_pg83; struct scsi_vpd __rcu *vpd_pg80; struct scsi_vpd __rcu *vpd_pg89; - unsigned char current_tag; /* current tag */ struct scsi_target *sdev_target; blist_flags_t sdev_bflags; /* black/white flags as also found in diff --git a/include/soc/arc/timers.h b/include/soc/arc/timers.h index 7ecde3b159c8..ae99d3e855f1 100644 --- a/include/soc/arc/timers.h +++ b/include/soc/arc/timers.h @@ -17,8 +17,8 @@ #define ARC_REG_TIMER1_CNT 0x100 /* timer 1 count */ /* CTRL reg bits */ -#define TIMER_CTRL_IE (1 << 0) /* Interrupt when Count reaches limit */ -#define TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */ +#define ARC_TIMER_CTRL_IE (1 << 0) /* Interrupt when Count reaches limit */ +#define ARC_TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */ #define ARC_TIMERN_MAX 0xFFFFFFFF diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 06706a9fd5b1..d7055b41982d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -89,15 +89,6 @@ /* Source PGIDs, one per physical port */ #define PGID_SRC 80 -#define IFH_TAG_TYPE_C 0 -#define IFH_TAG_TYPE_S 1 - -#define IFH_REW_OP_NOOP 0x0 -#define IFH_REW_OP_DSCP 0x1 -#define IFH_REW_OP_ONE_STEP_PTP 0x2 -#define IFH_REW_OP_TWO_STEP_PTP 0x3 -#define IFH_REW_OP_ORIGIN_PTP 0x5 - #define OCELOT_NUM_TC 8 #define OCELOT_SPEED_2500 0 @@ -603,10 +594,10 @@ struct ocelot_port { /* The VLAN ID that will be transmitted as untagged, on egress */ struct ocelot_vlan native_vlan; + unsigned int ptp_skbs_in_flight; u8 ptp_cmd; struct sk_buff_head tx_skbs; u8 ts_id; - spinlock_t ts_id_lock; phy_interface_t phy_mode; @@ -680,6 +671,9 @@ struct ocelot { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_info; struct hwtstamp_config hwtstamp_config; + unsigned int ptp_skbs_in_flight; + /* Protects the 2-step TX timestamp ID logic */ + spinlock_t ts_id_lock; /* Protects the PTP interface state */ struct mutex ptp_lock; /* Protects the PTP clock */ @@ -692,15 +686,6 @@ struct ocelot_policer { u32 burst; /* bytes */ }; -struct ocelot_skb_cb { - struct sk_buff *clone; - u8 ptp_cmd; - u8 ts_id; -}; - -#define OCELOT_SKB_CB(skb) \ - ((struct ocelot_skb_cb *)((skb)->cb)) - #define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) #define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi)) #define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri)) @@ -752,8 +737,6 @@ u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target, void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, u32 val, u32 reg, u32 offset); -#if IS_ENABLED(CONFIG_MSCC_OCELOT_SWITCH_LIB) - /* Packet I/O */ bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, @@ -761,36 +744,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); -u32 ocelot_ptp_rew_op(struct sk_buff *skb); -#else - -static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp) -{ - return false; -} - -static inline void ocelot_port_inject_frame(struct ocelot *ocelot, int port, - int grp, u32 rew_op, - struct sk_buff *skb) -{ -} - -static inline int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, - struct sk_buff **skb) -{ - return -EIO; -} - -static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) -{ -} - -static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) -{ - return 0; -} -#endif - /* Hardware initialization */ int ocelot_regfields_init(struct ocelot *ocelot, const struct reg_field *const regfields); diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h index ded497d72bdb..f085884b1fa2 100644 --- a/include/soc/mscc/ocelot_ptp.h +++ b/include/soc/mscc/ocelot_ptp.h @@ -13,6 +13,9 @@ #include <linux/ptp_clock_kernel.h> #include <soc/mscc/ocelot.h> +#define OCELOT_MAX_PTP_ID 63 +#define OCELOT_PTP_FIFO_SIZE 128 + #define PTP_PIN_CFG_RSZ 0x20 #define PTP_PIN_TOD_SEC_MSB_RSZ PTP_PIN_CFG_RSZ #define PTP_PIN_TOD_SEC_LSB_RSZ PTP_PIN_CFG_RSZ diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 25fd525aaf92..4869ebbd438d 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -694,7 +694,7 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); struct ocelot_vcap_filter * -ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id, - bool tc_offload); +ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, + unsigned long cookie, bool tc_offload); #endif /* _OCELOT_VCAP_H_ */ diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 01570dbda503..0e45963bb767 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -224,6 +224,7 @@ struct hda_codec { #endif /* misc flags */ + unsigned int configured:1; /* codec was configured */ unsigned int in_freeing:1; /* being released */ unsigned int registered:1; /* codec was registered */ unsigned int display_power_control:1; /* needs display power */ diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 989e1517332d..7a08ed2acd60 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -98,6 +98,7 @@ struct snd_rawmidi_file { struct snd_rawmidi *rmidi; struct snd_rawmidi_substream *input; struct snd_rawmidi_substream *output; + unsigned int user_pversion; /* supported protocol version */ }; struct snd_rawmidi_str { diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 9f73ed2cf061..bca73e8c8cde 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -306,11 +306,13 @@ enum afs_flock_operation { enum afs_cb_break_reason { afs_cb_break_no_break, + afs_cb_break_no_promise, afs_cb_break_for_callback, afs_cb_break_for_deleted, afs_cb_break_for_lapsed, + afs_cb_break_for_s_reinit, afs_cb_break_for_unlink, - afs_cb_break_for_vsbreak, + afs_cb_break_for_v_break, afs_cb_break_for_volume_callback, afs_cb_break_for_zap, }; @@ -602,11 +604,13 @@ enum afs_cb_break_reason { #define afs_cb_break_reasons \ EM(afs_cb_break_no_break, "no-break") \ + EM(afs_cb_break_no_promise, "no-promise") \ EM(afs_cb_break_for_callback, "break-cb") \ EM(afs_cb_break_for_deleted, "break-del") \ EM(afs_cb_break_for_lapsed, "break-lapsed") \ + EM(afs_cb_break_for_s_reinit, "s-reinit") \ EM(afs_cb_break_for_unlink, "break-unlink") \ - EM(afs_cb_break_for_vsbreak, "break-vs") \ + EM(afs_cb_break_for_v_break, "break-v") \ EM(afs_cb_break_for_volume_callback, "break-v-cb") \ E_(afs_cb_break_for_zap, "break-zap") diff --git a/include/trace/events/block.h b/include/trace/events/block.h index cc5ab96a7471..a95daa4d4caa 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -114,7 +114,7 @@ TRACE_EVENT(block_rq_requeue, */ TRACE_EVENT(block_rq_complete, - TP_PROTO(struct request *rq, int error, unsigned int nr_bytes), + TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes), @@ -122,7 +122,7 @@ TRACE_EVENT(block_rq_complete, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __field( int, error ) + __field( int , error ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), @@ -131,7 +131,7 @@ TRACE_EVENT(block_rq_complete, __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; - __entry->error = error; + __entry->error = blk_status_to_errno(error); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 9a448fe9355d..920b6a303d60 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -178,7 +178,7 @@ TRACE_EVENT(cachefiles_unlink, ), TP_fast_assign( - __entry->obj = obj->fscache.debug_id; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; __entry->de = de; __entry->why = why; ), @@ -205,7 +205,7 @@ TRACE_EVENT(cachefiles_rename, ), TP_fast_assign( - __entry->obj = obj->fscache.debug_id; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; __entry->de = de; __entry->to = to; __entry->why = why; @@ -305,7 +305,7 @@ TRACE_EVENT(cachefiles_mark_buried, ), TP_fast_assign( - __entry->obj = obj->fscache.debug_id; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; __entry->de = de; __entry->why = why; ), diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index bf9806fd1306..16ae7b666810 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -24,7 +24,7 @@ struct erofs_map_blocks; #define show_mflags(flags) __print_flags(flags, "", \ { EROFS_MAP_MAPPED, "M" }, \ { EROFS_MAP_META, "I" }, \ - { EROFS_MAP_ZIPPED, "Z" }) + { EROFS_MAP_ENCODED, "E" }) TRACE_EVENT(erofs_lookup, @@ -35,20 +35,20 @@ TRACE_EVENT(erofs_lookup, TP_STRUCT__entry( __field(dev_t, dev ) __field(erofs_nid_t, nid ) - __field(const char *, name ) + __string(name, dentry->d_name.name ) __field(unsigned int, flags ) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->nid = EROFS_I(dir)->nid; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->flags = flags; ), TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x", show_dev_nid(__entry), - __entry->name, + __get_str(name), __entry->flags) ); diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 0dd30de00e5b..7346f0164cf4 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -6,6 +6,7 @@ #define _TRACE_IO_URING_H #include <linux/tracepoint.h> +#include <uapi/linux/io_uring.h> struct io_wq_work; @@ -497,6 +498,66 @@ TRACE_EVENT(io_uring_task_run, (unsigned long long) __entry->user_data) ); +/* + * io_uring_req_failed - called when an sqe is errored dring submission + * + * @sqe: pointer to the io_uring_sqe that failed + * @error: error it failed with + * + * Allows easier diagnosing of malformed requests in production systems. + */ +TRACE_EVENT(io_uring_req_failed, + + TP_PROTO(const struct io_uring_sqe *sqe, int error), + + TP_ARGS(sqe, error), + + TP_STRUCT__entry ( + __field( u8, opcode ) + __field( u8, flags ) + __field( u8, ioprio ) + __field( u64, off ) + __field( u64, addr ) + __field( u32, len ) + __field( u32, op_flags ) + __field( u64, user_data ) + __field( u16, buf_index ) + __field( u16, personality ) + __field( u32, file_index ) + __field( u64, pad1 ) + __field( u64, pad2 ) + __field( int, error ) + ), + + TP_fast_assign( + __entry->opcode = sqe->opcode; + __entry->flags = sqe->flags; + __entry->ioprio = sqe->ioprio; + __entry->off = sqe->off; + __entry->addr = sqe->addr; + __entry->len = sqe->len; + __entry->op_flags = sqe->rw_flags; + __entry->user_data = sqe->user_data; + __entry->buf_index = sqe->buf_index; + __entry->personality = sqe->personality; + __entry->file_index = sqe->file_index; + __entry->pad1 = sqe->__pad2[0]; + __entry->pad2 = sqe->__pad2[1]; + __entry->error = error; + ), + + TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, " + "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, " + "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", + __entry->opcode, __entry->flags, __entry->ioprio, + (unsigned long long)__entry->off, + (unsigned long long) __entry->addr, __entry->len, + __entry->op_flags, (unsigned long long) __entry->user_data, + __entry->buf_index, __entry->personality, __entry->file_index, + (unsigned long long) __entry->pad1, + (unsigned long long) __entry->pad2, __entry->error) +); + #endif /* _TRACE_IO_URING_H */ /* This part must be outside protection */ diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h index 491098a0d8ed..bf7533f171ff 100644 --- a/include/trace/events/kyber.h +++ b/include/trace/events/kyber.h @@ -13,11 +13,11 @@ TRACE_EVENT(kyber_latency, - TP_PROTO(struct request_queue *q, const char *domain, const char *type, + TP_PROTO(dev_t dev, const char *domain, const char *type, unsigned int percentile, unsigned int numerator, unsigned int denominator, unsigned int samples), - TP_ARGS(q, domain, type, percentile, numerator, denominator, samples), + TP_ARGS(dev, domain, type, percentile, numerator, denominator, samples), TP_STRUCT__entry( __field( dev_t, dev ) @@ -30,7 +30,7 @@ TRACE_EVENT(kyber_latency, ), TP_fast_assign( - __entry->dev = disk_devt(q->disk); + __entry->dev = dev; strlcpy(__entry->domain, domain, sizeof(__entry->domain)); strlcpy(__entry->type, type, sizeof(__entry->type)); __entry->percentile = percentile; @@ -47,10 +47,9 @@ TRACE_EVENT(kyber_latency, TRACE_EVENT(kyber_adjust, - TP_PROTO(struct request_queue *q, const char *domain, - unsigned int depth), + TP_PROTO(dev_t dev, const char *domain, unsigned int depth), - TP_ARGS(q, domain, depth), + TP_ARGS(dev, domain, depth), TP_STRUCT__entry( __field( dev_t, dev ) @@ -59,7 +58,7 @@ TRACE_EVENT(kyber_adjust, ), TP_fast_assign( - __entry->dev = disk_devt(q->disk); + __entry->dev = dev; strlcpy(__entry->domain, domain, sizeof(__entry->domain)); __entry->depth = depth; ), @@ -71,9 +70,9 @@ TRACE_EVENT(kyber_adjust, TRACE_EVENT(kyber_throttled, - TP_PROTO(struct request_queue *q, const char *domain), + TP_PROTO(dev_t dev, const char *domain), - TP_ARGS(q, domain), + TP_ARGS(dev, domain), TP_STRUCT__entry( __field( dev_t, dev ) @@ -81,7 +80,7 @@ TRACE_EVENT(kyber_throttled, ), TP_fast_assign( - __entry->dev = disk_devt(q->disk); + __entry->dev = dev; strlcpy(__entry->domain, domain, sizeof(__entry->domain)); ), diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 1d28431e85bd..171524d3526d 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -16,38 +16,38 @@ #define PAGEMAP_MAPPEDDISK 0x0020u #define PAGEMAP_BUFFERS 0x0040u -#define trace_pagemap_flags(page) ( \ - (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ - (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \ - (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \ - (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \ - (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \ - (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \ +#define trace_pagemap_flags(folio) ( \ + (folio_test_anon(folio) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ + (folio_mapped(folio) ? PAGEMAP_MAPPED : 0) | \ + (folio_test_swapcache(folio) ? PAGEMAP_SWAPCACHE : 0) | \ + (folio_test_swapbacked(folio) ? PAGEMAP_SWAPBACKED : 0) | \ + (folio_test_mappedtodisk(folio) ? PAGEMAP_MAPPEDDISK : 0) | \ + (folio_test_private(folio) ? PAGEMAP_BUFFERS : 0) \ ) TRACE_EVENT(mm_lru_insertion, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( - __field(struct page *, page ) + __field(struct folio *, folio ) __field(unsigned long, pfn ) __field(enum lru_list, lru ) __field(unsigned long, flags ) ), TP_fast_assign( - __entry->page = page; - __entry->pfn = page_to_pfn(page); - __entry->lru = page_lru(page); - __entry->flags = trace_pagemap_flags(page); + __entry->folio = folio; + __entry->pfn = folio_pfn(folio); + __entry->lru = folio_lru_list(folio); + __entry->flags = trace_pagemap_flags(folio); ), /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", - __entry->page, + TP_printk("folio=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", + __entry->folio, __entry->pfn, __entry->lru, __entry->flags & PAGEMAP_MAPPED ? "M" : " ", @@ -60,23 +60,21 @@ TRACE_EVENT(mm_lru_insertion, TRACE_EVENT(mm_lru_activate, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( - __field(struct page *, page ) + __field(struct folio *, folio ) __field(unsigned long, pfn ) ), TP_fast_assign( - __entry->page = page; - __entry->pfn = page_to_pfn(page); + __entry->folio = folio; + __entry->pfn = folio_pfn(folio); ), - /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn) - + TP_printk("folio=%p pfn=0x%lx", __entry->folio, __entry->pfn) ); #endif /* _TRACE_PAGEMAP_H */ diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 840d1ba84cf5..7dccb66474f7 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -52,11 +52,11 @@ WB_WORK_REASON struct wb_writeback_work; -DECLARE_EVENT_CLASS(writeback_page_template, +DECLARE_EVENT_CLASS(writeback_folio_template, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping), + TP_ARGS(folio, mapping), TP_STRUCT__entry ( __array(char, name, 32) @@ -69,7 +69,7 @@ DECLARE_EVENT_CLASS(writeback_page_template, bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; - __entry->index = page->index; + __entry->index = folio->index; ), TP_printk("bdi %s: ino=%lu index=%lu", @@ -79,18 +79,18 @@ DECLARE_EVENT_CLASS(writeback_page_template, ) ); -DEFINE_EVENT(writeback_page_template, writeback_dirty_page, +DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping) + TP_ARGS(folio, mapping) ); -DEFINE_EVENT(writeback_page_template, wait_on_page_writeback, +DEFINE_EVENT(writeback_folio_template, folio_wait_writeback, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping) + TP_ARGS(folio, mapping) ); DECLARE_EVENT_CLASS(writeback_dirty_inode_template, @@ -236,9 +236,9 @@ TRACE_EVENT(inode_switch_wbs, TRACE_EVENT(track_foreign_dirty, - TP_PROTO(struct page *page, struct bdi_writeback *wb), + TP_PROTO(struct folio *folio, struct bdi_writeback *wb), - TP_ARGS(page, wb), + TP_ARGS(folio, wb), TP_STRUCT__entry( __array(char, name, 32) @@ -250,7 +250,7 @@ TRACE_EVENT(track_foreign_dirty, ), TP_fast_assign( - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); struct inode *inode = mapping ? mapping->host : NULL; strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); @@ -258,7 +258,7 @@ TRACE_EVENT(track_foreign_dirty, __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); - __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup); + __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup); ), TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu", diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 9dc0bf0c5a6e..ecd0f5bdfc1d 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -181,6 +181,10 @@ struct f_owner_ex { blocking */ #define LOCK_UN 8 /* remove lock */ +/* + * LOCK_MAND support has been removed from the kernel. We leave the symbols + * here to not break legacy builds, but these should not be used in new code. + */ #define LOCK_MAND 32 /* This is a mandatory flock ... */ #define LOCK_READ 64 /* which allows concurrent read operations */ #define LOCK_WRITE 128 /* which allows concurrent write operations */ diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 1c5fb86d455a..4557a8b6086f 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #define __NR_process_mrelease 448 __SYSCALL(__NR_process_mrelease, sys_process_mrelease) +#define __NR_futex_waitv 449 +__SYSCALL(__NR_futex_waitv, sys_futex_waitv) + #undef __NR_syscalls -#define __NR_syscalls 449 +#define __NR_syscalls 450 /* * 32 bit systems traditionally used different diff --git a/include/uapi/drm/mga_drm.h b/include/uapi/drm/mga_drm.h index 8c4337548ab5..bb31567e66c0 100644 --- a/include/uapi/drm/mga_drm.h +++ b/include/uapi/drm/mga_drm.h @@ -279,20 +279,22 @@ typedef struct drm_mga_init { unsigned long sarea_priv_offset; - int chipset; - int sgram; + __struct_group(/* no tag */, always32bit, /* no attrs */, + int chipset; + int sgram; - unsigned int maccess; + unsigned int maccess; - unsigned int fb_cpp; - unsigned int front_offset, front_pitch; - unsigned int back_offset, back_pitch; + unsigned int fb_cpp; + unsigned int front_offset, front_pitch; + unsigned int back_offset, back_pitch; - unsigned int depth_cpp; - unsigned int depth_offset, depth_pitch; + unsigned int depth_cpp; + unsigned int depth_offset, depth_pitch; - unsigned int texture_offset[MGA_NR_TEX_HEAPS]; - unsigned int texture_size[MGA_NR_TEX_HEAPS]; + unsigned int texture_offset[MGA_NR_TEX_HEAPS]; + unsigned int texture_size[MGA_NR_TEX_HEAPS]; + ); unsigned long fb_offset; unsigned long mmio_offset; diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 20e435fe657a..3246f2c74696 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -225,7 +225,14 @@ struct binder_freeze_info { struct binder_frozen_status_info { __u32 pid; + + /* process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing + */ __u32 sync_recv; + + /* process received async transactions since last frozen */ __u32 async_recv; }; diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index afa2472ad5d6..47e2be36d4b1 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -118,6 +118,7 @@ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ #define AUDIT_BPF 1334 /* BPF subsystem */ #define AUDIT_EVENT_LISTENER 1335 /* Task joined multicast read socket */ +#define AUDIT_URINGOP 1336 /* io_uring operation */ #define AUDIT_OPENAT2 1337 /* Record showing openat2 how args */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ @@ -167,8 +168,9 @@ #define AUDIT_FILTER_EXCLUDE 0x05 /* Apply rule before record creation */ #define AUDIT_FILTER_TYPE AUDIT_FILTER_EXCLUDE /* obsolete misleading naming */ #define AUDIT_FILTER_FS 0x06 /* Apply rule at __audit_inode_child */ +#define AUDIT_FILTER_URING_EXIT 0x07 /* Apply rule at io_uring op exit */ -#define AUDIT_NR_FILTERS 7 +#define AUDIT_NR_FILTERS 8 #define AUDIT_FILTER_PREPEND 0x10 /* Prepend to front of list */ diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h deleted file mode 100644 index cf7399f03b71..000000000000 --- a/include/uapi/linux/bcache.h +++ /dev/null @@ -1,445 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _LINUX_BCACHE_H -#define _LINUX_BCACHE_H - -/* - * Bcache on disk data structures - */ - -#include <linux/types.h> - -#define BITMASK(name, type, field, offset, size) \ -static inline __u64 name(const type *k) \ -{ return (k->field >> offset) & ~(~0ULL << size); } \ - \ -static inline void SET_##name(type *k, __u64 v) \ -{ \ - k->field &= ~(~(~0ULL << size) << offset); \ - k->field |= (v & ~(~0ULL << size)) << offset; \ -} - -/* Btree keys - all units are in sectors */ - -struct bkey { - __u64 high; - __u64 low; - __u64 ptr[]; -}; - -#define KEY_FIELD(name, field, offset, size) \ - BITMASK(name, struct bkey, field, offset, size) - -#define PTR_FIELD(name, offset, size) \ -static inline __u64 name(const struct bkey *k, unsigned int i) \ -{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ - \ -static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ -{ \ - k->ptr[i] &= ~(~(~0ULL << size) << offset); \ - k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ -} - -#define KEY_SIZE_BITS 16 -#define KEY_MAX_U64S 8 - -KEY_FIELD(KEY_PTRS, high, 60, 3) -KEY_FIELD(HEADER_SIZE, high, 58, 2) -KEY_FIELD(KEY_CSUM, high, 56, 2) -KEY_FIELD(KEY_PINNED, high, 55, 1) -KEY_FIELD(KEY_DIRTY, high, 36, 1) - -KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) -KEY_FIELD(KEY_INODE, high, 0, 20) - -/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ - -static inline __u64 KEY_OFFSET(const struct bkey *k) -{ - return k->low; -} - -static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) -{ - k->low = v; -} - -/* - * The high bit being set is a relic from when we used it to do binary - * searches - it told you where a key started. It's not used anymore, - * and can probably be safely dropped. - */ -#define KEY(inode, offset, size) \ -((struct bkey) { \ - .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ - .low = (offset) \ -}) - -#define ZERO_KEY KEY(0, 0, 0) - -#define MAX_KEY_INODE (~(~0 << 20)) -#define MAX_KEY_OFFSET (~0ULL >> 1) -#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) - -#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) -#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) - -#define PTR_DEV_BITS 12 - -PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) -PTR_FIELD(PTR_OFFSET, 8, 43) -PTR_FIELD(PTR_GEN, 0, 8) - -#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) - -#define MAKE_PTR(gen, offset, dev) \ - ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) - -/* Bkey utility code */ - -static inline unsigned long bkey_u64s(const struct bkey *k) -{ - return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); -} - -static inline unsigned long bkey_bytes(const struct bkey *k) -{ - return bkey_u64s(k) * sizeof(__u64); -} - -#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src)) - -static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) -{ - SET_KEY_INODE(dest, KEY_INODE(src)); - SET_KEY_OFFSET(dest, KEY_OFFSET(src)); -} - -static inline struct bkey *bkey_next(const struct bkey *k) -{ - __u64 *d = (void *) k; - - return (struct bkey *) (d + bkey_u64s(k)); -} - -static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) -{ - __u64 *d = (void *) k; - - return (struct bkey *) (d + nr_keys); -} -/* Enough for a key with 6 pointers */ -#define BKEY_PAD 8 - -#define BKEY_PADDED(key) \ - union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } - -/* Superblock */ - -/* Version 0: Cache device - * Version 1: Backing device - * Version 2: Seed pointer into btree node checksum - * Version 3: Cache device with new UUID format - * Version 4: Backing device with data offset - */ -#define BCACHE_SB_VERSION_CDEV 0 -#define BCACHE_SB_VERSION_BDEV 1 -#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 -#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 -#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 -#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 -#define BCACHE_SB_MAX_VERSION 6 - -#define SB_SECTOR 8 -#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) -#define SB_SIZE 4096 -#define SB_LABEL_SIZE 32 -#define SB_JOURNAL_BUCKETS 256U -/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ -#define MAX_CACHES_PER_SET 8 - -#define BDEV_DATA_START_DEFAULT 16 /* sectors */ - -struct cache_sb_disk { - __le64 csum; - __le64 offset; /* sector where this sb was written */ - __le64 version; - - __u8 magic[16]; - - __u8 uuid[16]; - union { - __u8 set_uuid[16]; - __le64 set_magic; - }; - __u8 label[SB_LABEL_SIZE]; - - __le64 flags; - __le64 seq; - - __le64 feature_compat; - __le64 feature_incompat; - __le64 feature_ro_compat; - - __le64 pad[5]; - - union { - struct { - /* Cache devices */ - __le64 nbuckets; /* device size */ - - __le16 block_size; /* sectors */ - __le16 bucket_size; /* sectors */ - - __le16 nr_in_set; - __le16 nr_this_dev; - }; - struct { - /* Backing devices */ - __le64 data_offset; - - /* - * block_size from the cache device section is still used by - * backing devices, so don't add anything here until we fix - * things to not need it for backing devices anymore - */ - }; - }; - - __le32 last_mount; /* time overflow in y2106 */ - - __le16 first_bucket; - union { - __le16 njournal_buckets; - __le16 keys; - }; - __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ - __le16 obso_bucket_size_hi; /* obsoleted */ -}; - -/* - * This is for in-memory bcache super block. - * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member - * size, ordering and even whole struct size may be different - * from cache_sb_disk. - */ -struct cache_sb { - __u64 offset; /* sector where this sb was written */ - __u64 version; - - __u8 magic[16]; - - __u8 uuid[16]; - union { - __u8 set_uuid[16]; - __u64 set_magic; - }; - __u8 label[SB_LABEL_SIZE]; - - __u64 flags; - __u64 seq; - - __u64 feature_compat; - __u64 feature_incompat; - __u64 feature_ro_compat; - - union { - struct { - /* Cache devices */ - __u64 nbuckets; /* device size */ - - __u16 block_size; /* sectors */ - __u16 nr_in_set; - __u16 nr_this_dev; - __u32 bucket_size; /* sectors */ - }; - struct { - /* Backing devices */ - __u64 data_offset; - - /* - * block_size from the cache device section is still used by - * backing devices, so don't add anything here until we fix - * things to not need it for backing devices anymore - */ - }; - }; - - __u32 last_mount; /* time overflow in y2106 */ - - __u16 first_bucket; - union { - __u16 njournal_buckets; - __u16 keys; - }; - __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ -}; - -static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) -{ - return sb->version == BCACHE_SB_VERSION_BDEV - || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET - || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; -} - -BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); -BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); -BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); -#define CACHE_REPLACEMENT_LRU 0U -#define CACHE_REPLACEMENT_FIFO 1U -#define CACHE_REPLACEMENT_RANDOM 2U - -BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); -#define CACHE_MODE_WRITETHROUGH 0U -#define CACHE_MODE_WRITEBACK 1U -#define CACHE_MODE_WRITEAROUND 2U -#define CACHE_MODE_NONE 3U -BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); -#define BDEV_STATE_NONE 0U -#define BDEV_STATE_CLEAN 1U -#define BDEV_STATE_DIRTY 2U -#define BDEV_STATE_STALE 3U - -/* - * Magic numbers - * - * The various other data structures have their own magic numbers, which are - * xored with the first part of the cache set's UUID - */ - -#define JSET_MAGIC 0x245235c1a3625032ULL -#define PSET_MAGIC 0x6750e15f87337f91ULL -#define BSET_MAGIC 0x90135c78b99e07f5ULL - -static inline __u64 jset_magic(struct cache_sb *sb) -{ - return sb->set_magic ^ JSET_MAGIC; -} - -static inline __u64 pset_magic(struct cache_sb *sb) -{ - return sb->set_magic ^ PSET_MAGIC; -} - -static inline __u64 bset_magic(struct cache_sb *sb) -{ - return sb->set_magic ^ BSET_MAGIC; -} - -/* - * Journal - * - * On disk format for a journal entry: - * seq is monotonically increasing; every journal entry has its own unique - * sequence number. - * - * last_seq is the oldest journal entry that still has keys the btree hasn't - * flushed to disk yet. - * - * version is for on disk format changes. - */ - -#define BCACHE_JSET_VERSION_UUIDv1 1 -#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ -#define BCACHE_JSET_VERSION 1 - -struct jset { - __u64 csum; - __u64 magic; - __u64 seq; - __u32 version; - __u32 keys; - - __u64 last_seq; - - BKEY_PADDED(uuid_bucket); - BKEY_PADDED(btree_root); - __u16 btree_level; - __u16 pad[3]; - - __u64 prio_bucket[MAX_CACHES_PER_SET]; - - union { - struct bkey start[0]; - __u64 d[0]; - }; -}; - -/* Bucket prios/gens */ - -struct prio_set { - __u64 csum; - __u64 magic; - __u64 seq; - __u32 version; - __u32 pad; - - __u64 next_bucket; - - struct bucket_disk { - __u16 prio; - __u8 gen; - } __attribute((packed)) data[]; -}; - -/* UUIDS - per backing device/flash only volume metadata */ - -struct uuid_entry { - union { - struct { - __u8 uuid[16]; - __u8 label[32]; - __u32 first_reg; /* time overflow in y2106 */ - __u32 last_reg; - __u32 invalidated; - - __u32 flags; - /* Size of flash only volumes */ - __u64 sectors; - }; - - __u8 pad[128]; - }; -}; - -BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); - -/* Btree nodes */ - -/* Version 1: Seed pointer into btree node checksum - */ -#define BCACHE_BSET_CSUM 1 -#define BCACHE_BSET_VERSION 1 - -/* - * Btree nodes - * - * On disk a btree node is a list/log of these; within each set the keys are - * sorted - */ -struct bset { - __u64 csum; - __u64 magic; - __u64 seq; - __u32 version; - __u32 keys; - - union { - struct bkey start[0]; - __u64 d[0]; - }; -}; - -/* OBSOLETE */ - -/* UUIDS - per backing device/flash only volume metadata */ - -struct uuid_entry_v0 { - __u8 uuid[16]; - __u8 label[32]; - __u32 first_reg; - __u32 last_reg; - __u32 invalidated; - __u32 pad; -}; - -#endif /* _LINUX_BCACHE_H */ diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index d7d3cfead056..738619994e26 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -771,10 +771,16 @@ struct btrfs_ioctl_received_subvol_args { */ #define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4 +/* + * Read the protocol version in the structure + */ +#define BTRFS_SEND_FLAG_VERSION 0x8 + #define BTRFS_SEND_FLAG_MASK \ (BTRFS_SEND_FLAG_NO_FILE_DATA | \ BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \ - BTRFS_SEND_FLAG_OMIT_END_CMD) + BTRFS_SEND_FLAG_OMIT_END_CMD | \ + BTRFS_SEND_FLAG_VERSION) struct btrfs_ioctl_send_args { __s64 send_fd; /* in */ @@ -782,7 +788,8 @@ struct btrfs_ioctl_send_args { __u64 __user *clone_sources; /* in */ __u64 parent_root; /* in */ __u64 flags; /* in */ - __u64 reserved[4]; /* in */ + __u32 version; /* in */ + __u8 reserved[28]; /* in */ }; /* diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h index 6c34f6e2f1f7..804ff8d98f71 100644 --- a/include/uapi/linux/cdrom.h +++ b/include/uapi/linux/cdrom.h @@ -147,6 +147,8 @@ #define CDROM_NEXT_WRITABLE 0x5394 /* get next writable block */ #define CDROM_LAST_WRITTEN 0x5395 /* get last block written on disc */ +#define CDROM_TIMED_MEDIA_CHANGE 0x5396 /* get the timestamp of the last media change */ + /******************************************************* * CDROM IOCTL structures *******************************************************/ @@ -295,6 +297,23 @@ struct cdrom_generic_command }; }; +/* This struct is used by CDROM_TIMED_MEDIA_CHANGE */ +struct cdrom_timed_media_change_info { + __s64 last_media_change; /* Timestamp of the last detected media + * change in ms. May be set by caller, + * updated upon successful return of + * ioctl. + */ + __u64 media_flags; /* Flags returned by ioctl to indicate + * media status. + */ +}; +#define MEDIA_CHANGED_FLAG 0x1 /* Last detected media change was more + * recent than last_media_change set by + * caller. + */ +/* other bits of media_flags available for future use */ + /* * A CD-ROM physical sector size is 2048, 2052, 2056, 2324, 2332, 2336, * 2340, or 2352 bytes long. diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h index 69829205fdb5..8e87d27b0951 100644 --- a/include/uapi/linux/cifs/cifs_mount.h +++ b/include/uapi/linux/cifs/cifs_mount.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ /* - * include/uapi/linux/cifs/cifs_mount.h * * Author(s): Scott Lovenberg (scott.lovenberg@gmail.com) * diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h index f880d2831160..e83954c69fff 100644 --- a/include/uapi/linux/dlm_device.h +++ b/include/uapi/linux/dlm_device.h @@ -45,13 +45,13 @@ struct dlm_lock_params { void __user *bastaddr; struct dlm_lksb __user *lksb; char lvb[DLM_USER_LVB_LEN]; - char name[0]; + char name[]; }; struct dlm_lspace_params { __u32 flags; __u32 minor; - char name[0]; + char name[]; }; struct dlm_purge_params { diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 235e5b2facaa..71a5df8d2689 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -44,6 +44,31 @@ FUTEX_PRIVATE_FLAG) /* + * Flags to specify the bit length of the futex word for futex2 syscalls. + * Currently, only 32 is supported. + */ +#define FUTEX_32 2 + +/* + * Max numbers of elements in a futex_waitv array + */ +#define FUTEX_WAITV_MAX 128 + +/** + * struct futex_waitv - A waiter for vectorized wait + * @val: Expected value at uaddr + * @uaddr: User address to wait on + * @flags: Flags for this waiter + * @__reserved: Reserved member to preserve data alignment. Should be 0. + */ +struct futex_waitv { + __u64 val; + __u64 uaddr; + __u32 flags; + __u32 __reserved; +}; + +/* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. */ diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index 6135d92e0d47..daf82a230c0e 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -26,7 +26,7 @@ #ifndef _UAPI_HYPERV_H #define _UAPI_HYPERV_H -#include <linux/uuid.h> +#include <linux/types.h> /* * Framework version for util services. diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 59ef35154e3d..c45b5e9a9387 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -158,6 +158,7 @@ enum { #define IORING_TIMEOUT_BOOTTIME (1U << 2) #define IORING_TIMEOUT_REALTIME (1U << 3) #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) +#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* @@ -317,13 +318,19 @@ enum { IORING_REGISTER_IOWQ_AFF = 17, IORING_UNREGISTER_IOWQ_AFF = 18, - /* set/get max number of workers */ + /* set/get max number of io-wq workers */ IORING_REGISTER_IOWQ_MAX_WORKERS = 19, /* this goes last */ IORING_REGISTER_LAST }; +/* io-wq worker categories */ +enum { + IO_WQ_BOUND, + IO_WQ_UNBOUND, +}; + /* deprecated, see struct io_uring_rsrc_update */ struct io_uring_files_update { __u32 offset; diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 32d148309b16..966c3070959b 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -81,6 +81,20 @@ struct ipmi_ipmb_addr { }; /* + * Used for messages received directly from an IPMB that have not gone + * through a MC. This is for systems that sit right on an IPMB so + * they can receive commands and respond to them. + */ +#define IPMI_IPMB_DIRECT_ADDR_TYPE 0x81 +struct ipmi_ipmb_direct_addr { + int addr_type; + short channel; + unsigned char slave_addr; + unsigned char rs_lun; + unsigned char rq_lun; +}; + +/* * A LAN Address. This is an address to/from a LAN interface bridged * by the BMC, not an address actually out on the LAN. * @@ -158,7 +172,7 @@ struct kernel_ipmi_msg { * is used for the receive in-kernel interface and in the receive * IOCTL. * - * The "IPMI_RESPONSE_RESPNOSE_TYPE" is a little strange sounding, but + * The "IPMI_RESPONSE_RESPONSE_TYPE" is a little strange sounding, but * it allows you to get the message results when you send a response * message. */ diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 52b54d13f385..6acd4ccafbf7 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -10,6 +10,7 @@ #define __UAPI_MCTP_H #include <linux/types.h> +#include <linux/socket.h> typedef __u8 mctp_eid_t; @@ -18,11 +19,13 @@ struct mctp_addr { }; struct sockaddr_mctp { - unsigned short int smctp_family; - int smctp_network; + __kernel_sa_family_t smctp_family; + __u16 __smctp_pad0; + unsigned int smctp_network; struct mctp_addr smctp_addr; __u8 smctp_type; __u8 smctp_tag; + __u8 __smctp_pad1; }; #define MCTP_NET_ANY 0x0 diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index f92880a15645..bd8860eeb291 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1141,6 +1141,21 @@ enum perf_event_type { */ PERF_RECORD_TEXT_POKE = 20, + /* + * Data written to the AUX area by hardware due to aux_output, may need + * to be matched to the event by an architecture-specific hardware ID. + * This records the hardware ID, but requires sample_id to provide the + * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT + * records from multiple events. + * + * struct { + * struct perf_event_header header; + * u64 hw_id; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -1210,14 +1225,16 @@ union perf_mem_data_src { mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ mem_blk:3, /* access blocked */ - mem_rsvd:21; + mem_hops:3, /* hop level */ + mem_rsvd:18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:21, + __u64 mem_rsvd:18, + mem_hops:3, /* hop level */ mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ @@ -1241,7 +1258,13 @@ union perf_mem_data_src { #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_SHIFT 0 -/* memory hierarchy (memory level, hit or miss) */ +/* + * PERF_MEM_LVL_* namespace being depricated to some extent in the + * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. + * Supporting this namespace inorder to not break defined ABIs. + * + * memory hierarchy (memory level, hit or miss) + */ #define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */ @@ -1307,6 +1330,11 @@ union perf_mem_data_src { #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_SHIFT 40 +/* hop level */ +#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ +/* 2-7 available */ +#define PERF_MEM_HOPS_SHIFT 43 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index ee8220f8dcf5..3021ea25a284 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -4,3 +4,40 @@ #ifndef __always_inline #define __always_inline inline #endif + +/** + * __struct_group() - Create a mirrored named and anonyomous struct + * + * @TAG: The tag name for the named sub-struct (usually empty) + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes (usually empty) + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical layout + * and size: one anonymous and one named. The former's members can be used + * normally without sub-struct naming, and the latter can be used to + * reason about the start, end, and size of the group of struct members. + * The named struct can also be explicitly tagged for layer reuse, as well + * as both having struct attributes appended. + */ +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } + +/** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 5532b5f68493..5fea5feb0412 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -211,6 +211,11 @@ enum v4l2_colorfx { * We reserve 128 controls for this driver. */ #define V4L2_CID_USER_CCS_BASE (V4L2_CID_USER_BASE + 0x10f0) +/* + * The base for Allegro driver controls. + * We reserve 16 controls for this driver. + */ +#define V4L2_CID_USER_ALLEGRO_BASE (V4L2_CID_USER_BASE + 0x1170) /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls @@ -1118,6 +1123,7 @@ enum v4l2_jpeg_chroma_subsampling { #define V4L2_CID_TEST_PATTERN_BLUE (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 6) #define V4L2_CID_TEST_PATTERN_GREENB (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 7) #define V4L2_CID_UNIT_CELL_SIZE (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 8) +#define V4L2_CID_NOTIFY_GAINS (V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 9) /* Image processing controls */ diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 9260791b8438..f118fe7a9f58 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -601,15 +601,12 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_NV61 v4l2_fourcc('N', 'V', '6', '1') /* 16 Y/CrCb 4:2:2 */ #define V4L2_PIX_FMT_NV24 v4l2_fourcc('N', 'V', '2', '4') /* 24 Y/CbCr 4:4:4 */ #define V4L2_PIX_FMT_NV42 v4l2_fourcc('N', 'V', '4', '2') /* 24 Y/CrCb 4:4:4 */ -#define V4L2_PIX_FMT_HM12 v4l2_fourcc('H', 'M', '1', '2') /* 8 YUV 4:2:0 16x16 macroblocks */ /* two non contiguous planes - one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12M v4l2_fourcc('N', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 */ #define V4L2_PIX_FMT_NV21M v4l2_fourcc('N', 'M', '2', '1') /* 21 Y/CrCb 4:2:0 */ #define V4L2_PIX_FMT_NV16M v4l2_fourcc('N', 'M', '1', '6') /* 16 Y/CbCr 4:2:2 */ #define V4L2_PIX_FMT_NV61M v4l2_fourcc('N', 'M', '6', '1') /* 16 Y/CrCb 4:2:2 */ -#define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 macroblocks */ -#define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 macroblocks */ /* three planes - Y Cb, Cr */ #define V4L2_PIX_FMT_YUV410 v4l2_fourcc('Y', 'U', 'V', '9') /* 9 YUV 4:1:0 */ @@ -627,6 +624,15 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YUV444M v4l2_fourcc('Y', 'M', '2', '4') /* 24 YUV444 planar */ #define V4L2_PIX_FMT_YVU444M v4l2_fourcc('Y', 'M', '4', '2') /* 24 YVU444 planar */ +/* Tiled YUV formats */ +#define V4L2_PIX_FMT_NV12_4L4 v4l2_fourcc('V', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 4x4 tiles */ +#define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ +#define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 32x32 tiles */ + +/* Tiled YUV formats, non contiguous planes */ +#define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 tiles */ +#define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ + /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */ #define V4L2_PIX_FMT_SBGGR8 v4l2_fourcc('B', 'A', '8', '1') /* 8 BGBG.. GRGR.. */ #define V4L2_PIX_FMT_SGBRG8 v4l2_fourcc('G', 'B', 'R', 'G') /* 8 GBGB.. RGRG.. */ @@ -733,8 +739,8 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_Y12I v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */ #define V4L2_PIX_FMT_Z16 v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */ #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ +#define V4L2_PIX_FMT_MM21 v4l2_fourcc('M', 'M', '2', '1') /* Mediatek 8-bit block mode, two non-contiguous planes */ #define V4L2_PIX_FMT_INZI v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */ -#define V4L2_PIX_FMT_SUNXI_TILED_NV12 v4l2_fourcc('S', 'T', '1', '2') /* Sunxi Tiled NV12 Format */ #define V4L2_PIX_FMT_CNF4 v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */ #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */ @@ -953,9 +959,12 @@ struct v4l2_requestbuffers { __u32 type; /* enum v4l2_buf_type */ __u32 memory; /* enum v4l2_memory */ __u32 capabilities; - __u32 reserved[1]; + __u8 flags; + __u8 reserved[3]; }; +#define V4L2_MEMORY_FLAG_NON_COHERENT (1 << 0) + /* capabilities for struct v4l2_requestbuffers and v4l2_create_buffers */ #define V4L2_BUF_CAP_SUPPORTS_MMAP (1 << 0) #define V4L2_BUF_CAP_SUPPORTS_USERPTR (1 << 1) @@ -2499,6 +2508,9 @@ struct v4l2_dbg_chip_info { * @memory: enum v4l2_memory; buffer memory type * @format: frame format, for which buffers are requested * @capabilities: capabilities of this buffer type. + * @flags: additional buffer management attributes (ignored unless the + * queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability + * and configured for MMAP streaming I/O). * @reserved: future extensions */ struct v4l2_create_buffers { @@ -2507,7 +2519,8 @@ struct v4l2_create_buffers { __u32 memory; struct v4l2_format format; __u32 capabilities; - __u32 reserved[7]; + __u32 flags; + __u32 reserved[6]; }; /* @@ -2615,4 +2628,10 @@ struct v4l2_create_buffers { #define BASE_VIDIOC_PRIVATE 192 /* 192-255 are private */ +/* Deprecated definitions kept for backwards compatibility */ +#ifndef __KERNEL__ +#define V4L2_PIX_FMT_HM12 V4L2_PIX_FMT_NV12_16L16 +#define V4L2_PIX_FMT_SUNXI_TILED_NV12 V4L2_PIX_FMT_NV12_32L32 +#endif + #endif /* _UAPI__LINUX_VIDEODEV2_H */ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index b96c1ea7166d..eda0426ec4c2 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -213,13 +213,13 @@ enum { XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + XFRM_MSG_MAPPING, +#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING + XFRM_MSG_SETDEFAULT, #define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT XFRM_MSG_GETDEFAULT, #define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT - - XFRM_MSG_MAPPING, -#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -514,9 +514,12 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_INBOUND 2 struct xfrm_userpolicy_default { -#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8) - __u8 dirmask; - __u8 action; +#define XFRM_USERPOLICY_UNSPEC 0 +#define XFRM_USERPOLICY_BLOCK 1 +#define XFRM_USERPOLICY_ACCEPT 2 + __u8 in; + __u8 fwd; + __u8 out; }; #ifndef __KERNEL__ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 7cc2a0f3f2f5..d13bb8c1b450 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -917,7 +917,6 @@ struct hl_wait_cs_in { #define HL_WAIT_CS_STATUS_BUSY 1 #define HL_WAIT_CS_STATUS_TIMEDOUT 2 #define HL_WAIT_CS_STATUS_ABORTED 3 -#define HL_WAIT_CS_STATUS_INTERRUPTED 4 #define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 #define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 @@ -1286,7 +1285,8 @@ struct hl_debug_args { * EIO - The CS was aborted (usually because the device was reset) * ENODEV - The device wants to do hard-reset (so user need to close FD) * - * The driver also returns a custom define inside the IOCTL which can be: + * The driver also returns a custom define in case the IOCTL call returned 0. + * The define can be one of the following: * * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) @@ -1294,8 +1294,6 @@ struct hl_debug_args { * (ETIMEDOUT) * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the * device was reset (EIO) - * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR) - * */ #define HL_IOCTL_WAIT_CS \ diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index e283c2220aba..7f44d54bb0ab 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -141,8 +141,8 @@ struct rxe_dma_info { __u32 sge_offset; __u32 reserved; union { - __u8 inline_data[0]; - struct rxe_sge sge[0]; + __DECLARE_FLEX_ARRAY(__u8, inline_data); + __DECLARE_FLEX_ARRAY(struct rxe_sge, sge); }; }; diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index da61398b1f8f..053949287ce8 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -240,8 +240,8 @@ struct snd_soc_tplg_vendor_array { struct snd_soc_tplg_private { __le32 size; /* in bytes of private data */ union { - char data[0]; - struct snd_soc_tplg_vendor_array array[0]; + __DECLARE_FLEX_ARRAY(char, data); + __DECLARE_FLEX_ARRAY(struct snd_soc_tplg_vendor_array, array); }; } __attribute__((packed)); diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 1d84ec9db93b..5859ca0a1439 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -784,6 +784,7 @@ struct snd_rawmidi_status { #define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int) #define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info) +#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int) #define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params) #define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status) #define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 39a5580f8feb..a3584a357f35 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -46,30 +46,18 @@ extern unsigned long *xen_contiguous_bitmap; int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); - void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); -#else -static inline int xen_create_contiguous_region(phys_addr_t pstart, - unsigned int order, - unsigned int address_bits, - dma_addr_t *dma_handle) -{ - return 0; -} - -static inline void xen_destroy_contiguous_region(phys_addr_t pstart, - unsigned int order) { } #endif #if defined(CONFIG_XEN_PV) int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages); + unsigned int domid, bool no_translate); #else static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, unsigned int domid, - bool no_translate, struct page **pages) + bool no_translate) { BUG(); return 0; @@ -146,7 +134,7 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, */ BUG_ON(err_ptr == NULL); return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, - false, pages); + false); } /* @@ -158,7 +146,6 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, * @err_ptr: Returns per-MFN error status. * @prot: page protection mask * @domid: Domain owning the pages - * @pages: Array of pages if this domain has an auto-translated physmap * * @mfn and @err_ptr may point to the same buffer, the MFNs will be * overwritten by the error codes after they are mapped. @@ -169,14 +156,13 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *mfn, int nr, int *err_ptr, - pgprot_t prot, unsigned int domid, - struct page **pages) + pgprot_t prot, unsigned int domid) { if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, - true, pages); + true); } /* xen_remap_domain_gfn_range() - map a range of foreign frames @@ -200,8 +186,7 @@ static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; - return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, - pages); + return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false); } int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, |