diff options
Diffstat (limited to 'include/linux')
300 files changed, 5607 insertions, 3030 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6274758648e3..d7136d13aa44 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -526,7 +526,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -void __init acpi_check_s4_hw_signature(int check); +extern int acpi_check_s4_hw_signature; #endif #ifdef CONFIG_PM_SLEEP @@ -580,6 +580,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; extern bool osc_sb_native_usb4_support_confirmed; +extern bool osc_sb_cppc_not_supported; /* USB4 Capabilities */ #define OSC_USB_USB3_TUNNELING 0x00000001 @@ -691,7 +692,7 @@ int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); struct platform_device *acpi_create_platform_device(struct acpi_device *, - struct property_entry *); + const struct property_entry *); #define ACPI_PTR(_ptr) (_ptr) static inline void acpi_device_set_enumerated(struct acpi_device *adev) @@ -930,7 +931,7 @@ static inline int acpi_device_modalias(struct device *dev, static inline struct platform_device * acpi_create_platform_device(struct acpi_device *adev, - struct property_entry *properties) + const struct property_entry *properties) { return NULL; } @@ -1023,7 +1024,15 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); - +#ifdef CONFIG_X86 +struct acpi_s2idle_dev_ops { + struct list_head list_node; + void (*prepare)(void); + void (*restore)(void); +}; +int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); +void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); +#endif /* CONFIG_X86 */ #ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h new file mode 100644 index 000000000000..f477f0b452fa --- /dev/null +++ b/include/linux/acpi_agdi.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_AGDI_H__ +#define __ACPI_AGDI_H__ + +#include <linux/acpi.h> + +#ifdef CONFIG_ACPI_AGDI +void __init acpi_agdi_init(void); +#else +static inline void acpi_agdi_init(void) {} +#endif +#endif /* __ACPI_AGDI_H__ */ diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6c7f47846971..6562f543c3e0 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -117,30 +117,9 @@ void amba_device_put(struct amba_device *); int amba_device_add(struct amba_device *, struct resource *); int amba_device_register(struct amba_device *, struct resource *); void amba_device_unregister(struct amba_device *); -struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int); int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -static inline int amba_pclk_enable(struct amba_device *dev) -{ - return clk_enable(dev->pclk); -} - -static inline void amba_pclk_disable(struct amba_device *dev) -{ - clk_disable(dev->pclk); -} - -static inline int amba_pclk_prepare(struct amba_device *dev) -{ - return clk_prepare(dev->pclk); -} - -static inline void amba_pclk_unprepare(struct amba_device *dev) -{ - clk_unprepare(dev->pclk); -} - /* Some drivers don't use the struct amba_device */ #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index cce6136b300a..58cbe18d825c 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -11,6 +11,10 @@ void topology_normalize_cpu_scale(void); int topology_update_cpu_topology(void); +#ifdef CONFIG_ACPI_CPPC_LIB +void topology_init_cpu_capacity_cppc(void); +#endif + struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 63ccb5252190..220c8c60e021 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -92,6 +92,11 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +#define ARM_SMCCC_ARCH_WORKAROUND_3 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x3fff) + #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_32, \ diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 0a241c5c911d..14dc461b0e82 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes); /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); +void __init sdei_init(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } +static inline void sdei_init(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/include/linux/ata.h b/include/linux/ata.h index 199e47e97d64..21292b5bbb55 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -324,12 +324,12 @@ enum { ATA_LOG_NCQ_NON_DATA = 0x12, ATA_LOG_NCQ_SEND_RECV = 0x13, ATA_LOG_IDENTIFY_DEVICE = 0x30, + ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device log pages: */ ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, - ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device SATA settings log:*/ ATA_LOG_DEVSLP_OFFSET = 0x30, diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index a3dba31df01e..6db58d180866 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -151,7 +151,16 @@ static __always_inline int arch_atomic_read_acquire(const atomic_t *v) { - return smp_load_acquire(&(v)->counter); + int ret; + + if (__native_word(atomic_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic_read_acquire arch_atomic_read_acquire #endif @@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v) static __always_inline void arch_atomic_set_release(atomic_t *v, int i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic_set(v, i); + } } #define arch_atomic_set_release arch_atomic_set_release #endif @@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v) static __always_inline s64 arch_atomic64_read_acquire(const atomic64_t *v) { - return smp_load_acquire(&(v)->counter); + s64 ret; + + if (__native_word(atomic64_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic64_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic64_read_acquire arch_atomic64_read_acquire #endif @@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v) static __always_inline void arch_atomic64_set_release(atomic64_t *v, s64 i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic64_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic64_set(v, i); + } } #define arch_atomic64_set_release arch_atomic64_set_release #endif @@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 +// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 993c5628a726..e863c88df95f 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -207,14 +207,6 @@ struct backing_dev_info { #endif }; -enum { - BLK_RW_ASYNC = 0, - BLK_RW_SYNC = 1, -}; - -void clear_bdi_congested(struct backing_dev_info *bdi, int sync); -void set_bdi_congested(struct backing_dev_info *bdi, int sync); - struct wb_lock_cookie { bool locked; unsigned long flags; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 483979c1b9f4..87ce24d238f3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -135,13 +135,6 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb) struct backing_dev_info *inode_to_bdi(struct inode *inode); -static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) -{ - return wb->congested & cong_bits; -} - -long congestion_wait(int sync, long timeout); - static inline bool mapping_can_writeback(struct address_space *mapping) { return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; @@ -162,7 +155,6 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, gfp_t gfp); void wb_memcg_offline(struct mem_cgroup *memcg); void wb_blkcg_offline(struct blkcg *blkcg); -int inode_congested(struct inode *inode, int cong_bits); /** * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode @@ -390,50 +382,8 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) { } -static inline int inode_congested(struct inode *inode, int cong_bits) -{ - return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); -} - #endif /* CONFIG_CGROUP_WRITEBACK */ -static inline int inode_read_congested(struct inode *inode) -{ - return inode_congested(inode, 1 << WB_sync_congested); -} - -static inline int inode_write_congested(struct inode *inode) -{ - return inode_congested(inode, 1 << WB_async_congested); -} - -static inline int inode_rw_congested(struct inode *inode) -{ - return inode_congested(inode, (1 << WB_sync_congested) | - (1 << WB_async_congested)); -} - -static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) -{ - return wb_congested(&bdi->wb, cong_bits); -} - -static inline int bdi_read_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, 1 << WB_sync_congested); -} - -static inline int bdi_write_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, 1 << WB_async_congested); -} - -static inline int bdi_rw_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, (1 << WB_sync_congested) | - (1 << WB_async_congested)); -} - const char *bdi_dev_name(struct backing_dev_info *bdi); #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 049cf9421d83..3dc20c4f394c 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -8,6 +8,7 @@ #include <uapi/linux/binfmts.h> struct filename; +struct coredump_params; #define CORENAME_MAX_SIZE 128 @@ -77,18 +78,6 @@ struct linux_binprm { #define BINPRM_FLAGS_PRESERVE_ARGV0_BIT 3 #define BINPRM_FLAGS_PRESERVE_ARGV0 (1 << BINPRM_FLAGS_PRESERVE_ARGV0_BIT) -/* Function parameter for binfmt->coredump */ -struct coredump_params { - const kernel_siginfo_t *siginfo; - struct pt_regs *regs; - struct file *file; - unsigned long limit; - unsigned long mm_flags; - loff_t written; - loff_t pos; - loff_t to_skip; -}; - /* * This structure defines the functions that are used to load the binary formats that * linux accepts. @@ -98,8 +87,10 @@ struct linux_binfmt { struct module *module; int (*load_binary)(struct linux_binprm *); int (*load_shlib)(struct file *); +#ifdef CONFIG_COREDUMP int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; /* minimal dump size */ +#endif } __randomize_layout; extern void __register_binfmt(struct linux_binfmt *fmt, int insert); diff --git a/include/linux/bio.h b/include/linux/bio.h index 117d7f248ac9..278cc81cc1e7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -65,7 +65,6 @@ static inline bool bio_no_advance_iter(const struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || - bio_op(bio) == REQ_OP_WRITE_SAME || bio_op(bio) == REQ_OP_WRITE_ZEROES; } @@ -186,8 +185,6 @@ static inline unsigned bio_segments(struct bio *bio) case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: return 0; - case REQ_OP_WRITE_SAME: - return 1; default: break; } @@ -405,21 +402,25 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); -struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs, - struct bio_set *bs); -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, - struct bio_set *bs); +struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, + unsigned int opf, gfp_t gfp_mask, + struct bio_set *bs); +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); -extern void __bio_clone_fast(struct bio *, struct bio *); -extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); +struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, + gfp_t gfp, struct bio_set *bs); +int bio_init_clone(struct block_device *bdev, struct bio *bio, + struct bio *bio_src, gfp_t gfp); extern struct bio_set fs_bio_set; -static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) +static inline struct bio *bio_alloc(struct block_device *bdev, + unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); + return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); @@ -454,10 +455,10 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; extern int submit_bio_wait(struct bio *bio); -extern void bio_init(struct bio *bio, struct bio_vec *table, - unsigned short max_vecs); +void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, + unsigned short max_vecs, unsigned int opf); extern void bio_uninit(struct bio *); -extern void bio_reset(struct bio *); +void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf); void bio_chain(struct bio *, struct bio *); int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); @@ -487,8 +488,6 @@ static inline void bio_release_pages(struct bio *bio, bool mark_dirty) __bio_release_pages(bio, mark_dirty); } -extern const char *bio_devname(struct bio *bio, char *buffer); - #define bio_dev(bio) \ disk_devt((bio)->bi_bdev->bd_disk) @@ -515,13 +514,6 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) bio_associate_blkg(bio); } -static inline void bio_copy_dev(struct bio *dst, struct bio *src) -{ - bio_clear_flag(dst, BIO_REMAPPED); - dst->bi_bdev = src->bi_bdev; - bio_clone_blkg_association(dst, src); -} - /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * @@ -790,6 +782,7 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) bio->bi_opf |= REQ_NOWAIT; } -struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp); +struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, + unsigned int nr_pages, unsigned int opf, gfp_t gfp); #endif /* __LINUX_BIO_H */ diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 6093fa6db260..c9be1657f03d 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -19,6 +19,9 @@ * * Example: * + * #include <linux/bitfield.h> + * #include <linux/bits.h> + * * #define REG_FIELD_A GENMASK(6, 0) * #define REG_FIELD_B BIT(7) * #define REG_FIELD_C GENMASK(15, 8) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b4de2010fba5..f2ad8ed8f777 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -25,14 +25,8 @@ #include <linux/kthread.h> #include <linux/fs.h> -/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ -#define BLKG_STAT_CPU_BATCH (INT_MAX / 2) - -/* Max limits for throttle policy */ -#define THROTL_IOPS_MAX UINT_MAX #define FC_APPID_LEN 129 - #ifdef CONFIG_BLK_CGROUP enum blkg_iostat_type { @@ -44,6 +38,7 @@ enum blkg_iostat_type { }; struct blkcg_gq; +struct blkg_policy_data; struct blkcg { struct cgroup_subsys_state css; @@ -76,36 +71,6 @@ struct blkg_iostat_set { struct blkg_iostat last; }; -/* - * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a - * request_queue (q). This is used by blkcg policies which need to track - * information per blkcg - q pair. - * - * There can be multiple active blkcg policies and each blkg:policy pair is - * represented by a blkg_policy_data which is allocated and freed by each - * policy's pd_alloc/free_fn() methods. A policy can allocate private data - * area by allocating larger data structure which embeds blkg_policy_data - * at the beginning. - */ -struct blkg_policy_data { - /* the blkg and policy id this per-policy data belongs to */ - struct blkcg_gq *blkg; - int plid; -}; - -/* - * Policies that need to keep per-blkcg data which is independent from any - * request_queue associated to it should implement cpd_alloc/free_fn() - * methods. A policy can allocate private data area by allocating larger - * data structure which embeds blkcg_policy_data at the beginning. - * cpd_init() is invoked to let each policy handle per-blkcg data. - */ -struct blkcg_policy_data { - /* the blkcg and policy id this per-policy data belongs to */ - struct blkcg *blkcg; - int plid; -}; - /* association between a blk cgroup and a request queue */ struct blkcg_gq { /* Pointer to the associated request_queue */ @@ -141,93 +106,11 @@ struct blkcg_gq { struct rcu_head rcu_head; }; -typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); -typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); -typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); -typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); -typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, - struct request_queue *q, struct blkcg *blkcg); -typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); -typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); -typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, - struct seq_file *s); - -struct blkcg_policy { - int plid; - /* cgroup files for the policy */ - struct cftype *dfl_cftypes; - struct cftype *legacy_cftypes; - - /* operations */ - blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; - blkcg_pol_init_cpd_fn *cpd_init_fn; - blkcg_pol_free_cpd_fn *cpd_free_fn; - blkcg_pol_bind_cpd_fn *cpd_bind_fn; - - blkcg_pol_alloc_pd_fn *pd_alloc_fn; - blkcg_pol_init_pd_fn *pd_init_fn; - blkcg_pol_online_pd_fn *pd_online_fn; - blkcg_pol_offline_pd_fn *pd_offline_fn; - blkcg_pol_free_pd_fn *pd_free_fn; - blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; - blkcg_pol_stat_pd_fn *pd_stat_fn; -}; - -extern struct blkcg blkcg_root; extern struct cgroup_subsys_state * const blkcg_root_css; -extern bool blkcg_debug_stats; - -struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, - struct request_queue *q, bool update_hint); -int blkcg_init_queue(struct request_queue *q); -void blkcg_exit_queue(struct request_queue *q); - -/* Blkio controller policy registration */ -int blkcg_policy_register(struct blkcg_policy *pol); -void blkcg_policy_unregister(struct blkcg_policy *pol); -int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol); -void blkcg_deactivate_policy(struct request_queue *q, - const struct blkcg_policy *pol); - -const char *blkg_dev_name(struct blkcg_gq *blkg); -void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, - u64 (*prfill)(struct seq_file *, - struct blkg_policy_data *, int), - const struct blkcg_policy *pol, int data, - bool show_total); -u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); - -struct blkg_conf_ctx { - struct block_device *bdev; - struct blkcg_gq *blkg; - char *body; -}; - -struct block_device *blkcg_conf_open_bdev(char **inputp); -int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, - char *input, struct blkg_conf_ctx *ctx); -void blkg_conf_finish(struct blkg_conf_ctx *ctx); -/** - * blkcg_css - find the current css - * - * Find the css associated with either the kthread or the current task. - * This may return a dying css, so it is up to the caller to use tryget logic - * to confirm it is alive and well. - */ -static inline struct cgroup_subsys_state *blkcg_css(void) -{ - struct cgroup_subsys_state *css; - - css = kthread_blkcg(); - if (css) - return css; - return task_css(current, io_cgrp_id); -} +void blkcg_destroy_blkgs(struct blkcg *blkcg); +void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +void blkcg_maybe_throttle_current(void); static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { @@ -235,27 +118,6 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) } /** - * __bio_blkcg - internal, inconsistent version to get blkcg - * - * DO NOT USE. - * This function is inconsistent and consequently is dangerous to use. The - * first part of the function returns a blkcg where a reference is owned by the - * bio. This means it does not need to be rcu protected as it cannot go away - * with the bio owning a reference to it. However, the latter potentially gets - * it from task_css(). This can race against task migration and the cgroup - * dying. It is also semantically different as it must be called rcu protected - * and is susceptible to failure when trying to get a reference to it. - * Therefore, it is not ok to assume that *_get() will always succeed on the - * blkcg returned here. - */ -static inline struct blkcg *__bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_blkg) - return bio->bi_blkg->blkcg; - return css_to_blkcg(blkcg_css()); -} - -/** * bio_blkcg - grab the blkcg associated with a bio * @bio: target bio * @@ -291,22 +153,6 @@ static inline bool blk_cgroup_congested(void) } /** - * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg - * @return: true if this bio needs to be submitted with the root blkg context. - * - * In order to avoid priority inversions we sometimes need to issue a bio as if - * it were attached to the root blkg, and then backcharge to the actual owning - * blkg. The idea is we do bio_blkcg() to look up the actual context for the - * bio and attach the appropriate blkg to the bio. Then we call this helper and - * if it is true run with the root blkg for that queue and then do any - * backcharging to the originating cgroup once the io is complete. - */ -static inline bool bio_issue_as_root_blkg(struct bio *bio) -{ - return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; -} - -/** * blkcg_parent - get the parent of a blkcg * @blkcg: blkcg of interest * @@ -318,96 +164,6 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) } /** - * __blkg_lookup - internal version of blkg_lookup() - * @blkcg: blkcg of interest - * @q: request_queue of interest - * @update_hint: whether to update lookup hint with the result or not - * - * This is internal version and shouldn't be used by policy - * implementations. Looks up blkgs for the @blkcg - @q pair regardless of - * @q's bypass state. If @update_hint is %true, the caller should be - * holding @q->queue_lock and lookup hint is updated on success. - */ -static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q, - bool update_hint) -{ - struct blkcg_gq *blkg; - - if (blkcg == &blkcg_root) - return q->root_blkg; - - blkg = rcu_dereference(blkcg->blkg_hint); - if (blkg && blkg->q == q) - return blkg; - - return blkg_lookup_slowpath(blkcg, q, update_hint); -} - -/** - * blkg_lookup - lookup blkg for the specified blkcg - q pair - * @blkcg: blkcg of interest - * @q: request_queue of interest - * - * Lookup blkg for the @blkcg - @q pair. This function should be called - * under RCU read lock. - */ -static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, - struct request_queue *q) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return __blkg_lookup(blkcg, q, false); -} - -/** - * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair - * @q: request_queue of interest - * - * Lookup blkg for @q at the root level. See also blkg_lookup(). - */ -static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) -{ - return q->root_blkg; -} - -/** - * blkg_to_pdata - get policy private data - * @blkg: blkg of interest - * @pol: policy of interest - * - * Return pointer to private data associated with the @blkg-@pol pair. - */ -static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) -{ - return blkg ? blkg->pd[pol->plid] : NULL; -} - -static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, - struct blkcg_policy *pol) -{ - return blkcg ? blkcg->cpd[pol->plid] : NULL; -} - -/** - * pdata_to_blkg - get blkg associated with policy private data - * @pd: policy private data of interest - * - * @pd is policy private data. Determine the blkg it's associated with. - */ -static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) -{ - return pd ? pd->blkg : NULL; -} - -static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) -{ - return cpd ? cpd->blkcg : NULL; -} - -extern void blkcg_destroy_blkgs(struct blkcg *blkcg); - -/** * blkcg_pin_online - pin online state * @blkcg: blkcg of interest * @@ -439,231 +195,24 @@ static inline void blkcg_unpin_online(struct blkcg *blkcg) } while (blkcg); } -/** - * blkg_path - format cgroup path of blkg - * @blkg: blkg of interest - * @buf: target buffer - * @buflen: target buffer length - * - * Format the path of the cgroup of @blkg into @buf. - */ -static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) -{ - return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); -} - -/** - * blkg_get - get a blkg reference - * @blkg: blkg to get - * - * The caller should be holding an existing reference. - */ -static inline void blkg_get(struct blkcg_gq *blkg) -{ - percpu_ref_get(&blkg->refcnt); -} - -/** - * blkg_tryget - try and get a blkg reference - * @blkg: blkg to get - * - * This is for use when doing an RCU lookup of the blkg. We may be in the midst - * of freeing this blkg, so we can only use it if the refcnt is not zero. - */ -static inline bool blkg_tryget(struct blkcg_gq *blkg) -{ - return blkg && percpu_ref_tryget(&blkg->refcnt); -} - -/** - * blkg_put - put a blkg reference - * @blkg: blkg to put - */ -static inline void blkg_put(struct blkcg_gq *blkg) -{ - percpu_ref_put(&blkg->refcnt); -} - -/** - * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_css: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU - * read locked. If called under either blkcg or queue lock, the iteration - * is guaranteed to include all and only online blkgs. The caller may - * update @pos_css by calling css_rightmost_descendant() to skip subtree. - * @p_blkg is included in the iteration and the first node to be visited. - */ -#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ - css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ - if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q, false))) - -/** - * blkg_for_each_descendant_post - post-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_css: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Similar to blkg_for_each_descendant_pre() but performs post-order - * traversal instead. Synchronization rules are the same. @p_blkg is - * included in the iteration and the last node to be visited. - */ -#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ - css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ - if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q, false))) - -bool __blkcg_punt_bio_submit(struct bio *bio); - -static inline bool blkcg_punt_bio_submit(struct bio *bio) -{ - if (bio->bi_opf & REQ_CGROUP_PUNT) - return __blkcg_punt_bio_submit(bio); - else - return false; -} - -static inline void blkcg_bio_issue_init(struct bio *bio) -{ - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -} - -static inline void blkcg_use_delay(struct blkcg_gq *blkg) -{ - if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) - return; - if (atomic_add_return(1, &blkg->use_delay) == 1) - atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); -} - -static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) -{ - int old = atomic_read(&blkg->use_delay); - - if (WARN_ON_ONCE(old < 0)) - return 0; - if (old == 0) - return 0; - - /* - * We do this song and dance because we can race with somebody else - * adding or removing delay. If we just did an atomic_dec we'd end up - * negative and we'd already be in trouble. We need to subtract 1 and - * then check to see if we were the last delay so we can drop the - * congestion count on the cgroup. - */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); - if (cur == old) - break; - old = cur; - } - - if (old == 0) - return 0; - if (old == 1) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); - return 1; -} - -/** - * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount - * @blkg: target blkg - * @delay: delay duration in nsecs - * - * When enabled with this function, the delay is not decayed and must be - * explicitly cleared with blkcg_clear_delay(). Must not be mixed with - * blkcg_[un]use_delay() and blkcg_add_delay() usages. - */ -static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) -{ - int old = atomic_read(&blkg->use_delay); - - /* We only want 1 person setting the congestion count for this blkg. */ - if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) - atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); - - atomic64_set(&blkg->delay_nsec, delay); -} - -/** - * blkcg_clear_delay - Disable allocator delay mechanism - * @blkg: target blkg - * - * Disable use_delay mechanism. See blkcg_set_delay(). - */ -static inline void blkcg_clear_delay(struct blkcg_gq *blkg) -{ - int old = atomic_read(&blkg->use_delay); - - /* We only want 1 person clearing the congestion count for this blkg. */ - if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); -} - -void blk_cgroup_bio_start(struct bio *bio); -void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); -void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); -void blkcg_maybe_throttle_current(void); #else /* CONFIG_BLK_CGROUP */ struct blkcg { }; -struct blkg_policy_data { -}; - -struct blkcg_policy_data { -}; - struct blkcg_gq { }; -struct blkcg_policy { -}; - #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) static inline void blkcg_maybe_throttle_current(void) { } static inline bool blk_cgroup_congested(void) { return false; } #ifdef CONFIG_BLOCK - static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } - -static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } -static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) -{ return NULL; } -static inline int blkcg_init_queue(struct request_queue *q) { return 0; } -static inline void blkcg_exit_queue(struct request_queue *q) { } -static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } -static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } -static inline int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol) { return 0; } -static inline void blkcg_deactivate_policy(struct request_queue *q, - const struct blkcg_policy *pol) { } - -static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } +#endif /* CONFIG_BLOCK */ -static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, - struct blkcg_policy *pol) { return NULL; } -static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } -static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } -static inline void blkg_get(struct blkcg_gq *blkg) { } -static inline void blkg_put(struct blkcg_gq *blkg) { } - -static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } -static inline void blkcg_bio_issue_init(struct bio *bio) { } -static inline void blk_cgroup_bio_start(struct bio *bio) { } - -#define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) - -#endif /* CONFIG_BLOCK */ #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_BLK_CGROUP_FC_APPID diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d319ffa59354..7aa5c54901a9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -917,8 +917,7 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) } #define queue_for_each_hw_ctx(q, hctx, i) \ - for ((i) = 0; (i) < (q)->nr_hw_queues && \ - ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) + xa_for_each(&(q)->hctx_table, (i), (hctx)) #define hctx_for_each_ctx(hctx, ctx, i) \ for ((i) = 0; (i) < (hctx)->nr_ctx && \ @@ -952,8 +951,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); void blk_rq_unprep_clone(struct request *rq); -blk_status_t blk_insert_cloned_request(struct request_queue *q, - struct request *rq); +blk_status_t blk_insert_cloned_request(struct request *rq); struct rq_map_data { struct page **pages; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fe065c394fff..0c85f75f2fc5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -153,6 +153,13 @@ typedef u8 __bitwise blk_status_t; */ #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) +/* + * BLK_STS_OFFLINE is returned from the driver when the target device is offline + * or is being taken offline. This could help differentiate the case where a + * device is intentionally being shut down from a real I/O error. + */ +#define BLK_STS_OFFLINE ((__force blk_status_t)17) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with @@ -317,7 +324,8 @@ enum { BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ - BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ @@ -354,8 +362,6 @@ enum req_opf { REQ_OP_DISCARD = 3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, - /* write the same sector many times */ - REQ_OP_WRITE_SAME = 7, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, /* Open a zone */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c95df26fc26..fe2b2b2dfc5b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions Copyright (C) 1992 Drew Eckhardt + */ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H -#include <linux/sched.h> -#include <linux/genhd.h> +#include <linux/types.h> +#include <linux/blk_types.h> +#include <linux/device.h> #include <linux/list.h> #include <linux/llist.h> #include <linux/minmax.h> @@ -12,11 +16,15 @@ #include <linux/wait.h> #include <linux/bio.h> #include <linux/gfp.h> +#include <linux/kdev_t.h> #include <linux/rcupdate.h> #include <linux/percpu-refcount.h> #include <linux/blkzoned.h> +#include <linux/sched.h> #include <linux/sbitmap.h> #include <linux/srcu.h> +#include <linux/uuid.h> +#include <linux/xarray.h> struct module; struct request_queue; @@ -33,6 +41,10 @@ struct blk_queue_stats; struct blk_stat_callback; struct blk_crypto_profile; +extern const struct device_type disk_type; +extern struct device_type part_type; +extern struct class block_class; + /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 @@ -45,6 +57,145 @@ struct blk_crypto_profile; */ #define BLKCG_MAX_POLS 6 +#define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 + +#define PARTITION_META_INFO_VOLNAMELTH 64 +/* + * Enough for the string representation of any kind of UUID plus NULL. + * EFI UUID is 36 characters. MSDOS UUID is 11 characters. + */ +#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) + +struct partition_meta_info { + char uuid[PARTITION_META_INFO_UUIDLTH]; + u8 volname[PARTITION_META_INFO_VOLNAMELTH]; +}; + +/** + * DOC: genhd capability flags + * + * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to + * removable media. When set, the device remains present even when media is not + * inserted. Shall not be set for devices which are removed entirely when the + * media is removed. + * + * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, + * doesn't appear in sysfs, and can't be opened from userspace or using + * blkdev_get*. Used for the underlying components of multipath devices. + * + * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not + * scan for partitions from add_disk, and users can't add partitions manually. + * + */ +enum { + GENHD_FL_REMOVABLE = 1 << 0, + GENHD_FL_HIDDEN = 1 << 1, + GENHD_FL_NO_PART = 1 << 2, +}; + +enum { + DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ + DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ +}; + +enum { + /* Poll even if events_poll_msecs is unset */ + DISK_EVENT_FLAG_POLL = 1 << 0, + /* Forward events to udev */ + DISK_EVENT_FLAG_UEVENT = 1 << 1, + /* Block event polling when open for exclusive write */ + DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, +}; + +struct disk_events; +struct badblocks; + +struct blk_integrity { + const struct blk_integrity_profile *profile; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; +}; + +struct gendisk { + /* + * major/first_minor/minors should not be set by any new driver, the + * block core will take care of allocating them automatically. + */ + int major; + int first_minor; + int minors; + + char disk_name[DISK_NAME_LEN]; /* name of major driver */ + + unsigned short events; /* supported events */ + unsigned short event_flags; /* flags related to event processing */ + + struct xarray part_tbl; + struct block_device *part0; + + const struct block_device_operations *fops; + struct request_queue *queue; + void *private_data; + + int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 +#define GD_READ_ONLY 1 +#define GD_DEAD 2 +#define GD_NATIVE_CAPACITY 3 +#define GD_ADDED 4 + + struct mutex open_mutex; /* open/close mutex */ + unsigned open_partitions; /* number of open partitions */ + + struct backing_dev_info *bdi; + struct kobject *slave_dir; +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED + struct list_head slave_bdevs; +#endif + struct timer_rand_state *random; + atomic_t sync_io; /* RAID */ + struct disk_events *ev; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct kobject integrity_kobj; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ +#if IS_ENABLED(CONFIG_CDROM) + struct cdrom_device_info *cdi; +#endif + int node_id; + struct badblocks *bb; + struct lockdep_map lockdep_map; + u64 diskseq; +}; + +static inline bool disk_live(struct gendisk *disk) +{ + return !inode_unhashed(disk->part0->bd_inode); +} + +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + +#if IS_REACHABLE(CONFIG_CDROM) +#define disk_to_cdi(disk) ((disk)->cdi) +#else +#define disk_to_cdi(disk) NULL +#endif + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return MKDEV(disk->major, disk->first_minor); +} + static inline int blk_validate_block_size(unsigned long bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) @@ -97,7 +248,6 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; - unsigned int max_write_same_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; @@ -204,7 +354,7 @@ struct request_queue { unsigned int queue_depth; /* hw dispatch queues */ - struct blk_mq_hw_ctx **queue_hw_ctx; + struct xarray hctx_table; unsigned int nr_hw_queues; /* @@ -262,6 +412,7 @@ struct request_queue { #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; + struct kobject *crypto_kobject; #endif unsigned int rq_timeout; @@ -596,6 +747,118 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); +static inline int __must_check add_disk(struct gendisk *disk) +{ + return device_add_disk(NULL, disk, NULL); +} +void del_gendisk(struct gendisk *gp); +void invalidate_disk(struct gendisk *disk); +void set_disk_ro(struct gendisk *disk, bool read_only); +void disk_uevent(struct gendisk *disk, enum kobject_action action); + +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0->bd_read_only || + test_bit(GD_READ_ONLY, &disk->state); +} + +static inline int bdev_read_only(struct block_device *bdev) +{ + return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); +} + +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); +bool disk_force_media_change(struct gendisk *disk, unsigned int events); + +void add_disk_randomness(struct gendisk *disk) __latent_entropy; +void rand_initialize_disk(struct gendisk *disk); + +static inline sector_t get_start_sect(struct block_device *bdev) +{ + return bdev->bd_start_sect; +} + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) +{ + return bdev->bd_nr_sectors; +} + +static inline loff_t bdev_nr_bytes(struct block_device *bdev) +{ + return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; +} + +static inline sector_t get_capacity(struct gendisk *disk) +{ + return bdev_nr_sectors(disk->part0); +} + +static inline u64 sb_bdev_nr_blocks(struct super_block *sb) +{ + return bdev_nr_sectors(sb->s_bdev) >> + (sb->s_blocksize_bits - SECTOR_SHIFT); +} + +int bdev_disk_changed(struct gendisk *disk, bool invalidate); + +struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + struct lock_class_key *lkclass); +void put_disk(struct gendisk *disk); +struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); + +/** + * blk_alloc_disk - allocate a gendisk structure + * @node_id: numa node to allocate on + * + * Allocate and pre-initialize a gendisk structure for use with BIO based + * drivers. + * + * Context: can sleep + */ +#define blk_alloc_disk(node_id) \ +({ \ + static struct lock_class_key __key; \ + \ + __blk_alloc_disk(node_id, &__key); \ +}) +void blk_cleanup_disk(struct gendisk *disk); + +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) +void unregister_blkdev(unsigned int major, const char *name); + +bool bdev_check_media_change(struct block_device *bdev); +int __invalidate_device(struct block_device *bdev, bool kill_dirty); +void set_capacity(struct gendisk *disk, sector_t size); + +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED +int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); +int bd_register_pending_holders(struct gendisk *disk); +#else +static inline int bd_link_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ + return 0; +} +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} +static inline int bd_register_pending_holders(struct gendisk *disk) +{ + return 0; +} +#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ + +dev_t part_devt(struct gendisk *disk, u8 partno); +void inc_diskseq(struct gendisk *disk); +dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); @@ -651,9 +914,6 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, return min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT); - if (unlikely(op == REQ_OP_WRITE_SAME)) - return q->limits.max_write_same_sectors; - if (unlikely(op == REQ_OP_WRITE_ZEROES)) return q->limits.max_write_zeroes_sectors; @@ -696,8 +956,6 @@ extern void blk_queue_max_discard_segments(struct request_queue *, extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); -extern void blk_queue_max_write_same_sectors(struct request_queue *q, - unsigned int max_write_same_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); @@ -748,7 +1006,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, bool __must_check blk_get_queue(struct request_queue *); extern void blk_put_queue(struct request_queue *); -extern void blk_set_queue_dying(struct request_queue *); + +void blk_mark_disk_dead(struct gendisk *disk); #ifdef CONFIG_BLOCK /* @@ -791,14 +1050,11 @@ extern void blk_start_plug(struct blk_plug *); extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); -void blk_flush_plug(struct blk_plug *plug, bool from_schedule); - -static inline bool blk_needs_flush_plug(struct task_struct *tsk) +void __blk_flush_plug(struct blk_plug *plug, bool from_schedule); +static inline void blk_flush_plug(struct blk_plug *plug, bool async) { - struct blk_plug *plug = tsk->plug; - - return plug && - (plug->mq_list || !list_empty(&plug->cb_list)); + if (plug) + __blk_flush_plug(plug, async); } int blkdev_issue_flush(struct block_device *bdev); @@ -824,11 +1080,6 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - return false; -} - static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; @@ -842,9 +1093,6 @@ static inline long nr_blockdev_pages(void) extern void blk_io_schedule(void); -extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, struct page *page); - #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, @@ -1071,16 +1319,6 @@ static inline int bdev_discard_alignment(struct block_device *bdev) return q->limits.discard_alignment; } -static inline unsigned int bdev_write_same(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return q->limits.max_write_same_sectors; - - return 0; -} - static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1200,6 +1438,8 @@ enum blk_unique_id { struct block_device_operations { void (*submit_bio)(struct bio *bio); + int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob, + unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); @@ -1210,6 +1450,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); int (*set_read_only)(struct block_device *bdev, bool ro); + void (*free_disk)(struct gendisk *disk); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, @@ -1258,6 +1499,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); +void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev); @@ -1265,7 +1507,7 @@ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, /** * bio_end_io_acct - end I/O accounting for bio based drivers * @bio: bio to end account for - * @start: start time returned by bio_start_io_acct() + * @start_time: start time returned by bio_start_io_acct() */ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) { @@ -1310,6 +1552,7 @@ void invalidate_bdev(struct block_device *bdev); int sync_blockdev(struct block_device *bdev); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); +void printk_all_partitions(void); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1325,7 +1568,11 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -#endif +static inline void printk_all_partitions(void) +{ +} +#endif /* CONFIG_BLOCK */ + int fsync_bdev(struct block_device *bdev); int freeze_bdev(struct block_device *bdev); diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index b525d8cdc25b..88a51b242adc 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -8,6 +8,7 @@ #include <linux/jump_label.h> #include <linux/percpu.h> #include <linux/rbtree.h> +#include <net/sock.h> #include <uapi/linux/bpf.h> struct sock; @@ -165,11 +166,23 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, void *value, u64 flags); +/* Opportunistic check to see whether we have any BPF program attached*/ +static inline bool cgroup_bpf_sock_enabled(struct sock *sk, + enum cgroup_bpf_attach_type type) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array *array; + + array = rcu_access_pointer(cgrp->bpf.effective[type]); + return array != &bpf_empty_prog_array.hdr; +} + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_INET_INGRESS)) \ + if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ + cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ @@ -181,7 +194,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk)) \ + if (sk_fullsock(__sk) && \ + cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ } \ @@ -347,7 +361,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, kernel_optval) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT)) \ + if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT) && \ + cgroup_bpf_sock_enabled(sock, CGROUP_SETSOCKOPT)) \ __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \ optname, optval, \ optlen, \ @@ -367,7 +382,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, max_optlen, retval) \ ({ \ int __ret = retval; \ - if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ + if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) && \ + cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT)) \ if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ tcp_bpf_bypass_getsockopt, \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fa517ae604ad..bdb5298735ce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -194,6 +194,17 @@ struct bpf_map { struct work_struct work; struct mutex freeze_mutex; atomic64_t writecnt; + /* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + } owner; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -209,11 +220,9 @@ static inline bool map_value_has_timer(const struct bpf_map *map) static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { if (unlikely(map_value_has_spin_lock(map))) - *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = - (struct bpf_spin_lock){}; + memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); if (unlikely(map_value_has_timer(map))) - *(struct bpf_timer *)(dst + map->timer_off) = - (struct bpf_timer){}; + memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); } /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ @@ -224,7 +233,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) if (unlikely(map_value_has_spin_lock(map))) { s_off = map->spin_lock_off; s_sz = sizeof(struct bpf_spin_lock); - } else if (unlikely(map_value_has_timer(map))) { + } + if (unlikely(map_value_has_timer(map))) { t_off = map->timer_off; t_sz = sizeof(struct bpf_timer); } @@ -321,7 +331,18 @@ enum bpf_type_flag { */ MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_ALLOC, + /* MEM is in user address space. */ + MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS), + + /* MEM is a percpu memory. MEM_PERCPU tags PTR_TO_BTF_ID. When tagged + * with MEM_PERCPU, PTR_TO_BTF_ID _cannot_ be directly accessed. In + * order to drop this tag, it must be passed into bpf_per_cpu_ptr() + * or bpf_this_cpu_ptr(), which will return the pointer corresponding + * to the specified cpu. + */ + MEM_PERCPU = BIT(4 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_PERCPU, }; /* Max number of base types. */ @@ -503,7 +524,6 @@ enum bpf_reg_type { */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_BUF, /* reg points to a read/write buffer */ - PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_FUNC, /* reg points to a bpf program function */ __BPF_REG_TYPE_MAX, @@ -577,8 +597,7 @@ struct bpf_verifier_ops { const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id); - bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); + u32 *next_btf_id, enum bpf_type_flag *flag); }; struct bpf_prog_offload_ops { @@ -833,8 +852,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); -int bpf_jit_charge_modmem(u32 pages); -void bpf_jit_uncharge_modmem(u32 pages); +int bpf_jit_charge_modmem(u32 size); +void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, @@ -939,6 +958,8 @@ struct bpf_prog_aux { bool func_proto_unreliable; bool sleepable; bool tail_call_reachable; + bool xdp_has_frags; + bool use_bpf_prog_pack; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; @@ -999,16 +1020,6 @@ struct bpf_prog_aux { }; struct bpf_array_aux { - /* 'Ownership' of prog array is claimed by the first program that - * is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have - * the same prog type and JITed flag. - */ - struct { - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; @@ -1183,7 +1194,14 @@ struct bpf_event_entry { struct rcu_head rcu; }; -bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +static inline bool map_type_contains_progs(struct bpf_map *map) +{ + return map->map_type == BPF_MAP_TYPE_PROG_ARRAY || + map->map_type == BPF_MAP_TYPE_DEVMAP || + map->map_type == BPF_MAP_TYPE_CPUMAP; +} + +bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -1225,6 +1243,19 @@ struct bpf_prog_array { struct bpf_prog_array_item items[]; }; +struct bpf_empty_prog_array { + struct bpf_prog_array hdr; + struct bpf_prog *null_prog; +}; + +/* to avoid allocating empty bpf_prog_array for cgroups that + * don't have bpf program attached use one global 'bpf_empty_prog_array' + * It will not be modified the caller of bpf_prog_array_alloc() + * (since caller requested prog_cnt == 0) + * that pointer should be 'freed' by bpf_prog_array_free() + */ +extern struct bpf_empty_prog_array bpf_empty_prog_array; + struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); int bpf_prog_array_length(struct bpf_prog_array *progs); @@ -1251,6 +1282,7 @@ struct bpf_run_ctx {}; struct bpf_cg_run_ctx { struct bpf_run_ctx run_ctx; const struct bpf_prog_array_item *prog_item; + int retval; }; struct bpf_trace_run_ctx { @@ -1283,19 +1315,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); -static __always_inline u32 +static __always_inline int BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, const void *ctx, bpf_prog_run_fn run_prog, - u32 *ret_flags) + int retval, u32 *ret_flags) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - u32 ret = 1; u32 func_ret; + run_ctx.retval = retval; migrate_disable(); rcu_read_lock(); array = rcu_dereference(array_rcu); @@ -1304,27 +1336,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; func_ret = run_prog(prog, ctx); - ret &= (func_ret & 1); + if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) + run_ctx.retval = -EPERM; *(ret_flags) |= (func_ret >> 1); item++; } bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); - return ret; + return run_ctx.retval; } -static __always_inline u32 +static __always_inline int BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog) + const void *ctx, bpf_prog_run_fn run_prog, + int retval) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_cg_run_ctx run_ctx; - u32 ret = 1; + run_ctx.retval = retval; migrate_disable(); rcu_read_lock(); array = rcu_dereference(array_rcu); @@ -1332,13 +1366,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); while ((prog = READ_ONCE(item->prog))) { run_ctx.prog_item = item; - ret &= run_prog(prog, ctx); + if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) + run_ctx.retval = -EPERM; item++; } bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); migrate_enable(); - return ret; + return run_ctx.retval; } static __always_inline u32 @@ -1391,19 +1426,21 @@ out: * 0: NET_XMIT_SUCCESS skb should be transmitted * 1: NET_XMIT_DROP skb should be dropped and cn * 2: NET_XMIT_CN skb should be transmitted and cn - * 3: -EPERM skb should be dropped + * 3: -err skb should be dropped */ #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ ({ \ u32 _flags = 0; \ bool _cn; \ u32 _ret; \ - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ + _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ _cn = _flags & BPF_RET_SET_CN; \ - if (_ret) \ + if (_ret && !IS_ERR_VALUE((long)_ret)) \ + _ret = -EFAULT; \ + if (!_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ - _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ + _ret = (_cn ? NET_XMIT_DROP : _ret); \ _ret; \ }) @@ -1724,7 +1761,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1754,7 +1790,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id); + u32 *next_btf_id, enum bpf_type_flag *flag); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id); @@ -1793,6 +1829,11 @@ struct bpf_core_ctx { int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, int relo_idx, void *insn); +static inline bool unprivileged_ebpf_enabled(void) +{ + return !sysctl_unprivileged_bpf_disabled; +} + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1862,11 +1903,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline bool dev_map_can_have_prog(struct bpf_map *map) -{ - return false; -} - static inline void __dev_flush(void) { } @@ -1930,11 +1966,6 @@ static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, return -EOPNOTSUPP; } -static inline bool cpu_map_prog_allowed(struct bpf_map *map) -{ - return false; -} - static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) { @@ -1976,12 +2007,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, return -ENOTSUPP; } -static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, - struct module *owner) -{ - return false; -} - static inline void bpf_map_put(struct bpf_map *map) { } @@ -2012,6 +2037,12 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, { return NULL; } + +static inline bool unprivileged_ebpf_enabled(void) +{ + return false; +} + #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2076,6 +2107,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); +int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); + void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else @@ -2129,6 +2163,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } + +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ @@ -2227,6 +2267,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_strncmp_proto; +extern const struct bpf_func_proto bpf_copy_from_user_task_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2339,6 +2380,8 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void *bpf_arch_text_copy(void *dst, void *src, size_t len); + struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 37b3906af8b1..493e63258497 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, - bool charge_mem); + bool charge_mem, gfp_t gfp_flags); int bpf_local_storage_alloc(void *owner, struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *first_selem); + struct bpf_local_storage_elem *first_selem, + gfp_t gfp_flags); struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, - void *value, u64 map_flags); + void *value, u64 map_flags, gfp_t gfp_flags); void bpf_local_storage_free_rcu(struct rcu_head *rcu); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 48a91c51c015..3e24ad0c4b3c 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -140,3 +140,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif +BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e9993172f892..c1fc4af47f69 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -521,6 +521,12 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); +int check_func_arg_reg_off(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + enum bpf_arg_type arg_type, + bool is_release_func); +int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); @@ -564,4 +570,9 @@ static inline u32 type_flag(u32 type) return type & ~BPF_BASE_TYPE_MASK; } +static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +{ + return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; +} + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 6b211323a489..9e97ced2896d 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -10,7 +10,6 @@ #define _BLK_BSG_ #include <linux/blkdev.h> -#include <scsi/scsi_request.h> struct bsg_job; struct request; diff --git a/include/linux/btf.h b/include/linux/btf.h index 0c74348cbc9d..36bc09b8e890 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,11 +12,33 @@ #define BTF_TYPE_EMIT(type) ((void)(type *)0) #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) +enum btf_kfunc_type { + BTF_KFUNC_TYPE_CHECK, + BTF_KFUNC_TYPE_ACQUIRE, + BTF_KFUNC_TYPE_RELEASE, + BTF_KFUNC_TYPE_RET_NULL, + BTF_KFUNC_TYPE_MAX, +}; + struct btf; struct btf_member; struct btf_type; union bpf_attr; struct btf_show; +struct btf_id_set; + +struct btf_kfunc_id_set { + struct module *owner; + union { + struct { + struct btf_id_set *check_set; + struct btf_id_set *acquire_set; + struct btf_id_set *release_set; + struct btf_id_set *ret_null_set; + }; + struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; + }; +}; extern const struct file_operations btf_fops; @@ -216,6 +238,11 @@ static inline bool btf_type_is_var(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; } +static inline bool btf_type_is_type_tag(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG; +} + /* union is only a special case of struct: * all its offsetof(member) == 0 */ @@ -300,6 +327,11 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo( return (const struct btf_var_secinfo *)(t + 1); } +static inline struct btf_param *btf_params(const struct btf_type *t) +{ + return (struct btf_param *)(t + 1); +} + #ifdef CONFIG_BPF_SYSCALL struct bpf_prog; @@ -307,6 +339,11 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); +bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, u32 kfunc_btf_id); +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *s); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -318,50 +355,18 @@ static inline const char *btf_name_by_offset(const struct btf *btf, { return NULL; } -#endif - -struct kfunc_btf_id_set { - struct list_head list; - struct btf_id_set *set; - struct module *owner; -}; - -struct kfunc_btf_id_list { - struct list_head list; - struct mutex mutex; -}; - -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES -void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s); -void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s); -bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, - struct module *owner); - -extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; -extern struct kfunc_btf_id_list prog_test_kfunc_list; -#else -static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ -} -static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) +static inline bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, + u32 kfunc_btf_id) { + return false; } -static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, - u32 kfunc_id, struct module *owner) +static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *s) { - return false; + return 0; } - -static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; -static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; #endif -#define DEFINE_KFUNC_BTF_ID_SET(set, name) \ - struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ - THIS_MODULE } - #endif diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 919c0fde1c51..bc5d9cc34e4c 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -11,6 +11,7 @@ struct btf_id_set { #ifdef CONFIG_DEBUG_INFO_BTF #include <linux/compiler.h> /* for __PASTE */ +#include <linux/compiler_attributes.h> /* for __maybe_unused */ /* * Following macros help to define lists of BTF IDs placed @@ -146,14 +147,14 @@ extern struct btf_id_set name; #else -#define BTF_ID_LIST(name) static u32 name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n]; -#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; -#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1]; -#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; -#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1]; +#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 36f33685c8c0..bcb4fe9b8575 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -144,6 +144,7 @@ BUFFER_FNS(Defer_Completion, defer_completion) ((struct buffer_head *)page_private(page)); \ }) #define page_has_buffers(page) PagePrivate(page) +#define folio_buffers(folio) folio_get_private(folio) void buffer_check_dirty_writeback(struct page *page, bool *dirty, bool *writeback); @@ -216,16 +217,14 @@ extern int buffer_heads_over_limit; * Generic address_space_operations implementations for buffer_head-backed * address_spaces. */ -void block_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); +void block_invalidate_folio(struct folio *folio, size_t offset, size_t length); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); -int block_is_partially_uptodate(struct page *page, unsigned long from, - unsigned long count); +bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, @@ -398,7 +397,7 @@ __bread(struct block_device *bdev, sector_t block, unsigned size) return __bread_gfp(bdev, block, size, __GFP_MOVABLE); } -extern int __set_page_dirty_buffers(struct page *page); +bool block_dirty_folio(struct address_space *mapping, struct folio *folio); #else /* CONFIG_BLOCK */ diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h index fef8b607f97e..a6189d21f2ba 100644 --- a/include/linux/cacheflush.h +++ b/include/linux/cacheflush.h @@ -4,6 +4,8 @@ #include <asm/cacheflush.h> +struct folio; + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO void flush_dcache_folio(struct folio *folio); diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index a81652d1c6f3..7ae21c0f7f23 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -113,7 +113,7 @@ struct can_tdc_const { }; #ifdef CONFIG_CAN_CALC_BITTIMING -int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, +int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc); void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, @@ -121,7 +121,7 @@ void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, u32 *ctrlmode, u32 ctrlmode_supported); #else /* !CONFIG_CAN_CALC_BITTIMING */ static inline int -can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, +can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc) { netdev_err(dev, "bit-timing calculation not available\n"); @@ -136,7 +136,7 @@ can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, } #endif /* CONFIG_CAN_CALC_BITTIMING */ -int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, +int can_get_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc, const u32 *bitrate_const, const unsigned int bitrate_const_cnt); diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 7ad6c3d0db7d..86bf82dbd8b8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -28,8 +28,8 @@ #define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ -#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_GLOBAL_SNAPREALM 3 /* global dummy snaprealm */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 @@ -328,6 +328,7 @@ enum { CEPH_MDS_OP_LOOKUPPARENT = 0x00103, CEPH_MDS_OP_LOOKUPINO = 0x00104, CEPH_MDS_OP_LOOKUPNAME = 0x00105, + CEPH_MDS_OP_GETVXATTR = 0x00106, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 6a89ea410e43..00af2c98da75 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -35,6 +35,7 @@ #define CEPH_OPT_TCP_NODELAY (1<<4) /* TCP_NODELAY on TCP sockets */ #define CEPH_OPT_NOMSGSIGN (1<<5) /* don't sign msgs (msgr1) */ #define CEPH_OPT_ABORT_ON_FULL (1<<6) /* abort w/ ENOSPC when full */ +#define CEPH_OPT_RXBOUNCE (1<<7) /* double-buffer read data */ #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) @@ -283,6 +284,7 @@ DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_snap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index ff99ce094cfa..e7f2fb2fc207 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -383,6 +383,10 @@ struct ceph_connection_v2_info { struct ceph_gcm_nonce in_gcm_nonce; struct ceph_gcm_nonce out_gcm_nonce; + struct page **in_enc_pages; + int in_enc_page_cnt; + int in_enc_resid; + int in_enc_i; struct page **out_enc_pages; int out_enc_page_cnt; int out_enc_resid; @@ -457,6 +461,7 @@ struct ceph_connection { struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ + struct page *bounce_page; u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 75c151413fda..0d1ada8968d7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -450,6 +450,7 @@ extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ + rcu_read_lock_sched_held() || \ lockdep_is_held(&cgroup_mutex) || \ lockdep_is_held(&css_set_lock) || \ ((task)->flags & PF_EXITING) || (__c)) @@ -791,11 +792,9 @@ static inline void cgroup_account_cputime(struct task_struct *task, cpuacct_charge(task, delta_exec); - rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime(cgrp, delta_exec); - rcu_read_unlock(); } static inline void cgroup_account_cputime_field(struct task_struct *task, @@ -806,11 +805,9 @@ static inline void cgroup_account_cputime_field(struct task_struct *task, cpuacct_account_field(task, index, delta_exec); - rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime_field(cgrp, index, delta_exec); - rcu_read_unlock(); } #else /* CONFIG_CGROUPS */ diff --git a/include/linux/cgroup_api.h b/include/linux/cgroup_api.h new file mode 100644 index 000000000000..d0cfe8025111 --- /dev/null +++ b/include/linux/cgroup_api.h @@ -0,0 +1 @@ +#include <linux/cgroup.h> diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index ccb3f034bfa9..3484309b59bf 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -78,6 +78,10 @@ #define AT91_PMC_MAINRDY (1 << 16) /* Main Clock Ready */ #define AT91_CKGR_PLLAR 0x28 /* PLL A Register */ + +#define AT91_PMC_RATIO 0x2c /* Processor clock ratio register [SAMA7G5 only] */ +#define AT91_PMC_RATIO_RATIO (0xf) /* CPU clock ratio. */ + #define AT91_CKGR_PLLBR 0x2c /* PLL B Register */ #define AT91_PMC_DIV (0xff << 0) /* Divider */ #define AT91_PMC_PLLCOUNT (0x3f << 8) /* PLL Counter */ diff --git a/include/linux/cma.h b/include/linux/cma.h index bd801023504b..90fd742fd1ef 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -20,6 +20,14 @@ #define CMA_MAX_NAME 64 +/* + * TODO: once the buddy -- especially pageblock merging and alloc_contig_range() + * -- can deal with only some pageblocks of a higher-order page being + * MIGRATE_CMA, we can use pageblock_nr_pages. + */ +#define CMA_MIN_ALIGNMENT_PAGES MAX_ORDER_NR_PAGES +#define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES) + struct cma; extern unsigned long totalcma_pages; @@ -50,4 +58,6 @@ extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); + +extern void cma_reserve_pages_on_error(struct cma *cma); #endif diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 3c4de9b6c6e3..babb1347148c 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -68,3 +68,28 @@ #define __nocfi __attribute__((__no_sanitize__("cfi"))) #define __cficanonical __attribute__((__cfi_canonical_jump_table__)) + +/* + * Turn individual warnings and errors on and off locally, depending + * on version. + */ +#define __diag_clang(version, severity, s) \ + __diag_clang_ ## version(__diag_clang_ ## severity s) + +/* Severity used in pragma directives */ +#define __diag_clang_ignore ignored +#define __diag_clang_warn warning +#define __diag_clang_error error + +#define __diag_str1(s) #s +#define __diag_str(s) __diag_str1(s) +#define __diag(s) _Pragma(__diag_str(clang diagnostic s)) + +#if CONFIG_CLANG_VERSION >= 110000 +#define __diag_clang_11(s) __diag(s) +#else +#define __diag_clang_11(s) +#endif + +#define __diag_ignore_all(option, comment) \ + __diag_clang(11, ignore, option) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index ccbbd31b3aae..52299c957c98 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -97,6 +97,10 @@ #define KASAN_ABI_VERSION 4 #endif +#ifdef CONFIG_SHADOW_CALL_STACK +#define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) +#endif + #if __has_attribute(__no_sanitize_address__) #define __no_sanitize_address __attribute__((no_sanitize_address)) #else @@ -151,6 +155,9 @@ #define __diag_GCC_8(s) #endif +#define __diag_ignore_all(option, comment) \ + __diag_GCC(8, ignore, option) + /* * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" * attribute) do not work, and must be disabled. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 429dcebe2b99..0f7fd205ab7e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -117,14 +117,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __stringify_label(n) #n -#define __annotate_reachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ -}) -#define annotate_reachable() __annotate_reachable(__COUNTER__) - #define __annotate_unreachable(c) ({ \ asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ @@ -133,24 +125,21 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) -#define ASM_UNREACHABLE \ - "999:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long 999b - .\n\t" \ +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ ".popsection\n\t" /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table") #else -#define annotate_reachable() #define annotate_unreachable() +# define ASM_REACHABLE #define __annotate_jump_table #endif -#ifndef ASM_UNREACHABLE -# define ASM_UNREACHABLE -#endif #ifndef unreachable # define unreachable() do { \ annotate_unreachable(); \ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3c1795fdb568..1c2c33ae1b37 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -4,6 +4,14 @@ #ifndef __ASSEMBLY__ +#if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ + __has_attribute(btf_type_tag) +# define BTF_TYPE_TAG(value) __attribute__((btf_type_tag(#value))) +#else +# define BTF_TYPE_TAG(value) /* nothing */ +#endif + +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ /* address spaces */ # define __kernel __attribute__((address_space(0))) @@ -32,10 +40,10 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # ifdef STRUCTLEAK_PLUGIN # define __user __attribute__((user)) # else -# define __user +# define __user BTF_TYPE_TAG(user) # endif # define __iomem -# define __percpu +# define __percpu BTF_TYPE_TAG(percpu) # define __rcu # define __chk_user_ptr(x) (void)0 # define __chk_io_ptr(x) (void)0 @@ -137,8 +145,6 @@ struct ftrace_likely_data { */ #define __naked __attribute__((__naked__)) notrace -#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) - /* * Prefer gnu_inline, so that extern inline functions do not emit an * externally visible function. This makes extern inline behave as per gnu89 @@ -368,4 +374,8 @@ struct ftrace_likely_data { #define __diag_error(compiler, version, option, comment) \ __diag_ ## compiler(version, error, option) +#ifndef __diag_ignore_all +#define __diag_ignore_all(option, comment) +#endif + #endif /* __LINUX_COMPILER_TYPES_H */ diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 248a68c668b4..08a1d3e7e46d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -12,22 +12,34 @@ struct core_vma_metadata { unsigned long start, end; unsigned long flags; unsigned long dump_size; + unsigned long pgoff; + struct file *file; +}; + +struct coredump_params { + const kernel_siginfo_t *siginfo; + struct pt_regs *regs; + struct file *file; + unsigned long limit; + unsigned long mm_flags; + loff_t written; + loff_t pos; + loff_t to_skip; + int vma_count; + size_t vma_data_size; + struct core_vma_metadata *vma_meta; }; /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. */ -struct coredump_params; extern void dump_skip_to(struct coredump_params *cprm, unsigned long to); extern void dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, - struct core_vma_metadata **vma_meta, - size_t *vma_data_size_ptr); extern void do_coredump(const kernel_siginfo_t *siginfo); #else static inline void do_coredump(const kernel_siginfo_t *siginfo) {} diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1ab29e61b078..35c7d6db4139 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -382,6 +382,9 @@ struct cpufreq_driver { int (*suspend)(struct cpufreq_policy *policy); int (*resume)(struct cpufreq_policy *policy); + /* Will be called after the driver is fully initialized */ + void (*ready)(struct cpufreq_policy *policy); + struct freq_attr **attr; /* platform specific boost support code */ @@ -658,6 +661,11 @@ struct gov_attr_set { /* sysfs ops for cpufreq governors */ extern const struct sysfs_ops governor_sysfs_ops; +static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) +{ + return container_of(kobj, struct gov_attr_set, kobj); +} + void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 411a428ace4d..82e33137f917 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,6 +100,7 @@ enum cpuhp_state { CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, CPUHP_AP_DTPM_CPU_DEAD, + CPUHP_RANDOM_PREPARE, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -165,6 +166,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_ACPI_STARTING, CPUHP_AP_PERF_ARM_STARTING, + CPUHP_AP_PERF_RISCV_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, @@ -231,6 +233,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, + CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, @@ -240,6 +243,7 @@ enum cpuhp_state { CPUHP_AP_PERF_CSKY_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, + CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 64dae70d31f5..fe29ac7cc469 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -102,7 +102,7 @@ extern atomic_t __num_online_cpus; extern cpumask_t cpus_booted_once_mask; -static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) +static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) { #ifdef CONFIG_DEBUG_PER_CPU_MAPS WARN_ON_ONCE(cpu >= bits); @@ -110,7 +110,7 @@ static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) } /* verify cpu argument to cpumask_* operators */ -static inline unsigned int cpumask_check(unsigned int cpu) +static __always_inline unsigned int cpumask_check(unsigned int cpu) { cpu_max_bits_warn(cpu, nr_cpumask_bits); return cpu; @@ -341,12 +341,12 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -357,12 +357,12 @@ static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) +static __always_inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) { clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) +static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) { __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -374,7 +374,7 @@ static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * * Returns 1 if @cpu is set in @cpumask, else returns 0 */ -static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) +static __always_inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } @@ -388,7 +388,7 @@ static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) * * test_and_set_bit wrapper for cpumasks. */ -static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +static __always_inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -402,7 +402,7 @@ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) * * test_and_clear_bit wrapper for cpumasks. */ -static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) +static __always_inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } diff --git a/include/linux/cpumask_api.h b/include/linux/cpumask_api.h new file mode 100644 index 000000000000..83bd3ebe82b0 --- /dev/null +++ b/include/linux/cpumask_api.h @@ -0,0 +1 @@ +#include <linux/cpumask.h> diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 855869e1fd32..2324ab6f1846 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -133,6 +133,15 @@ #define CRYPTO_ALG_ALLOCATES_MEMORY 0x00010000 /* + * Mark an algorithm as a service implementation only usable by a + * template and never by a normal user of the kernel crypto API. + * This is intended to be used by algorithms that are themselves + * not FIPS-approved but may instead be used to implement parts of + * a FIPS-approved algorithm (e.g., dh vs. ffdhe2048(dh)). + */ +#define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 + +/* * Transform masks and values (for crt_flags). */ #define CRYPTO_TFM_NEED_KEY 0x00000001 diff --git a/include/linux/damon.h b/include/linux/damon.h index 5e1e3a128b77..f23cbfa4248d 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -60,19 +60,18 @@ struct damon_region { /** * struct damon_target - Represents a monitoring target. - * @id: Unique identifier for this target. + * @pid: The PID of the virtual address space to monitor. * @nr_regions: Number of monitoring target regions of this target. * @regions_list: Head of the monitoring target regions of this target. * @list: List head for siblings. * * Each monitoring context could have multiple targets. For example, a context * for virtual memory address spaces could have multiple target processes. The - * @id of each target should be unique among the targets of the context. For - * example, in the virtual address monitoring context, it could be a pidfd or - * an address of an mm_struct. + * @pid should be set for appropriate &struct damon_operations including the + * virtual address spaces monitoring operations. */ struct damon_target { - unsigned long id; + struct pid *pid; unsigned int nr_regions; struct list_head regions_list; struct list_head list; @@ -88,6 +87,7 @@ struct damon_target { * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. * @DAMOS_STAT: Do nothing but count the stat. + * @NR_DAMOS_ACTIONS: Total number of DAMOS actions */ enum damos_action { DAMOS_WILLNEED, @@ -96,6 +96,7 @@ enum damos_action { DAMOS_HUGEPAGE, DAMOS_NOHUGEPAGE, DAMOS_STAT, /* Do nothing but only record the stat */ + NR_DAMOS_ACTIONS, }; /** @@ -121,9 +122,9 @@ enum damos_action { * uses smaller one as the effective quota. * * For selecting regions within the quota, DAMON prioritizes current scheme's - * target memory regions using the &struct damon_primitive->get_scheme_score. + * target memory regions using the &struct damon_operations->get_scheme_score. * You could customize the prioritization logic by setting &weight_sz, - * &weight_nr_accesses, and &weight_age, because monitoring primitives are + * &weight_nr_accesses, and &weight_age, because monitoring operations are * encouraged to respect those. */ struct damos_quota { @@ -158,10 +159,12 @@ struct damos_quota { * * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. + * @NR_DAMOS_WMARK_METRICS: Total number of DAMOS watermark metrics */ enum damos_wmark_metric { DAMOS_WMARK_NONE, DAMOS_WMARK_FREE_MEM_RATE, + NR_DAMOS_WMARK_METRICS, }; /** @@ -254,13 +257,26 @@ struct damos { struct list_head list; }; +/** + * enum damon_ops_id - Identifier for each monitoring operations implementation + * + * @DAMON_OPS_VADDR: Monitoring operations for virtual address spaces + * @DAMON_OPS_PADDR: Monitoring operations for the physical address space + */ +enum damon_ops_id { + DAMON_OPS_VADDR, + DAMON_OPS_PADDR, + NR_DAMON_OPS, +}; + struct damon_ctx; /** - * struct damon_primitive - Monitoring primitives for given use cases. + * struct damon_operations - Monitoring operations for given use cases. * - * @init: Initialize primitive-internal data structures. - * @update: Update primitive-internal data structures. + * @id: Identifier of this operations set. + * @init: Initialize operations-related data structures. + * @update: Update operations-related data structures. * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. * @reset_aggregated: Reset aggregated accesses monitoring results. @@ -270,18 +286,20 @@ struct damon_ctx; * @cleanup: Clean up the context. * * DAMON can be extended for various address spaces and usages. For this, - * users should register the low level primitives for their target address - * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread + * users should register the low level operations for their target address + * space and usecase via the &damon_ctx.ops. Then, the monitoring thread * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting - * the monitoring, @update after each &damon_ctx.primitive_update_interval, and + * the monitoring, @update after each &damon_ctx.ops_update_interval, and * @check_accesses, @target_valid and @prepare_access_checks after each * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each * &damon_ctx.aggr_interval. * - * @init should initialize primitive-internal data structures. For example, + * Each &struct damon_operations instance having valid @id can be registered + * via damon_register_ops() and selected by damon_select_ops() later. + * @init should initialize operations-related data structures. For example, * this could be used to construct proper monitoring target regions and link * those to @damon_ctx.adaptive_targets. - * @update should update the primitive-internal data structures. For example, + * @update should update the operations-related data structures. For example, * this could be used to update monitoring target regions for current status. * @prepare_access_checks should manipulate the monitoring regions to be * prepared for the next access check. @@ -301,7 +319,8 @@ struct damon_ctx; * monitoring. * @cleanup is called from @kdamond just before its termination. */ -struct damon_primitive { +struct damon_operations { + enum damon_ops_id id; void (*init)(struct damon_ctx *context); void (*update)(struct damon_ctx *context); void (*prepare_access_checks)(struct damon_ctx *context); @@ -355,15 +374,15 @@ struct damon_callback { * * @sample_interval: The time between access samplings. * @aggr_interval: The time between monitor results aggregations. - * @primitive_update_interval: The time between monitoring primitive updates. + * @ops_update_interval: The time between monitoring operations updates. * * For each @sample_interval, DAMON checks whether each region is accessed or * not. It aggregates and keeps the access information (number of accesses to * each region) for @aggr_interval time. DAMON also checks whether the target * memory regions need update (e.g., by ``mmap()`` calls from the application, * in case of virtual memory monitoring) and applies the changes for each - * @primitive_update_interval. All time intervals are in micro-seconds. - * Please refer to &struct damon_primitive and &struct damon_callback for more + * @ops_update_interval. All time intervals are in micro-seconds. + * Please refer to &struct damon_operations and &struct damon_callback for more * detail. * * @kdamond: Kernel thread who does the monitoring. @@ -375,7 +394,7 @@ struct damon_callback { * * Once started, the monitoring thread runs until explicitly required to be * terminated or every monitoring target is invalid. The validity of the - * targets is checked via the &damon_primitive.target_valid of @primitive. The + * targets is checked via the &damon_operations.target_valid of @ops. The * termination can also be explicitly requested by writing non-zero to * @kdamond_stop. The thread sets @kdamond to NULL when it terminates. * Therefore, users can know whether the monitoring is ongoing or terminated by @@ -385,7 +404,7 @@ struct damon_callback { * Note that the monitoring thread protects only @kdamond and @kdamond_stop via * @kdamond_lock. Accesses to other fields must be protected by themselves. * - * @primitive: Set of monitoring primitives for given use cases. + * @ops: Set of monitoring operations for given use cases. * @callback: Set of callbacks for monitoring events notifications. * * @min_nr_regions: The minimum number of adaptive monitoring regions. @@ -396,17 +415,17 @@ struct damon_callback { struct damon_ctx { unsigned long sample_interval; unsigned long aggr_interval; - unsigned long primitive_update_interval; + unsigned long ops_update_interval; /* private: internal use only */ struct timespec64 last_aggregation; - struct timespec64 last_primitive_update; + struct timespec64 last_ops_update; /* public: */ struct task_struct *kdamond; struct mutex kdamond_lock; - struct damon_primitive primitive; + struct damon_operations ops; struct damon_callback callback; unsigned long min_nr_regions; @@ -475,7 +494,7 @@ struct damos *damon_new_scheme( void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); -struct damon_target *damon_new_target(unsigned long id); +struct damon_target *damon_new_target(void); void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); bool damon_targets_empty(struct damon_ctx *ctx); void damon_free_target(struct damon_target *t); @@ -484,28 +503,18 @@ unsigned int damon_nr_regions(struct damon_target *t); struct damon_ctx *damon_new_ctx(void); void damon_destroy_ctx(struct damon_ctx *ctx); -int damon_set_targets(struct damon_ctx *ctx, - unsigned long *ids, ssize_t nr_ids); int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, - unsigned long aggr_int, unsigned long primitive_upd_int, + unsigned long aggr_int, unsigned long ops_upd_int, unsigned long min_nr_reg, unsigned long max_nr_reg); int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, ssize_t nr_schemes); int damon_nr_running_ctxs(void); +int damon_register_ops(struct damon_operations *ops); +int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id); -int damon_start(struct damon_ctx **ctxs, int nr_ctxs); +int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); #endif /* CONFIG_DAMON */ -#ifdef CONFIG_DAMON_VADDR -bool damon_va_target_valid(void *t); -void damon_va_set_primitives(struct damon_ctx *ctx); -#endif /* CONFIG_DAMON_VADDR */ - -#ifdef CONFIG_DAMON_PADDR -bool damon_pa_target_valid(void *t); -void damon_pa_set_primitives(struct damon_ctx *ctx); -#endif /* CONFIG_DAMON_PADDR */ - #endif /* _DAMON_H */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b26fecf6c8e8..c2a3758c4aaa 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -317,12 +317,6 @@ struct dm_target { unsigned num_secure_erase_bios; /* - * The number of WRITE SAME bios that will be submitted to the target. - * The bio number can be accessed with dm_bio_get_target_bio_nr. - */ - unsigned num_write_same_bios; - - /* * The number of WRITE ZEROES bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. */ @@ -358,10 +352,16 @@ struct dm_target { bool limit_swap_bios:1; /* - * Set if this target implements a a zoned device and needs emulation of + * Set if this target implements a zoned device and needs emulation of * zone append operations using regular writes. */ bool emulate_zone_append:1; + + /* + * Set if the target will submit IO using dm_submit_bio_remap() + * after returning DM_MAPIO_SUBMITTED from its map function. + */ + bool accounts_remapped_io:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); @@ -465,6 +465,7 @@ int dm_suspended(struct dm_target *ti); int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); +void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 278d489e4bdd..19fa0b5ae5ec 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -52,13 +52,13 @@ * * struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf); * - * dma_buf_map_set_vaddr(&map. 0xdeadbeaf); + * dma_buf_map_set_vaddr(&map, 0xdeadbeaf); * * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem(). * * .. code-block:: c * - * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf); + * dma_buf_map_set_vaddr_iomem(&map, 0xdeadbeaf); * * Instances of struct dma_buf_map do not have to be cleaned up, but * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 7ab50076e7a6..2097760e8e95 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -13,7 +13,7 @@ #ifndef __DMA_BUF_H__ #define __DMA_BUF_H__ -#include <linux/dma-buf-map.h> +#include <linux/iosys-map.h> #include <linux/file.h> #include <linux/err.h> #include <linux/scatterlist.h> @@ -283,8 +283,8 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); - int (*vmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); - void (*vunmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); + int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); + void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); }; /** @@ -347,7 +347,7 @@ struct dma_buf { * @vmap_ptr: * The current vmap ptr if @vmapping_counter > 0. Protected by @lock. */ - struct dma_buf_map vmap_ptr; + struct iosys_map vmap_ptr; /** * @exp_name: @@ -628,6 +628,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dma_buf, int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); -int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); -void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); +int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); +void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); #endif /* __DMA_BUF_H__ */ diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 303dd712220f..fec374f69e12 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -45,19 +45,6 @@ struct dma_fence_array { struct irq_work work; }; -extern const struct dma_fence_ops dma_fence_array_ops; - -/** - * dma_fence_is_array - check if a fence is from the array subsclass - * @fence: fence to test - * - * Return true if it is a dma_fence_array and false otherwise. - */ -static inline bool dma_fence_is_array(struct dma_fence *fence) -{ - return fence->ops == &dma_fence_array_ops; -} - /** * to_dma_fence_array - cast a fence to a dma_fence_array * @fence: fence to cast to a dma_fence_array @@ -68,7 +55,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence) static inline struct dma_fence_array * to_dma_fence_array(struct dma_fence *fence) { - if (fence->ops != &dma_fence_array_ops) + if (!fence || !dma_fence_is_array(fence)) return NULL; return container_of(fence, struct dma_fence_array, base); diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 54fe3443fd2c..10d51bcdf7b7 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -49,7 +49,6 @@ struct dma_fence_chain { spinlock_t lock; }; -extern const struct dma_fence_ops dma_fence_chain_ops; /** * to_dma_fence_chain - cast a fence to a dma_fence_chain @@ -61,13 +60,28 @@ extern const struct dma_fence_ops dma_fence_chain_ops; static inline struct dma_fence_chain * to_dma_fence_chain(struct dma_fence *fence) { - if (!fence || fence->ops != &dma_fence_chain_ops) + if (!fence || !dma_fence_is_chain(fence)) return NULL; return container_of(fence, struct dma_fence_chain, base); } /** + * dma_fence_chain_contained - return the contained fence + * @fence: the fence to test + * + * If the fence is a dma_fence_chain the function returns the fence contained + * inside the chain object, otherwise it returns the fence itself. + */ +static inline struct dma_fence * +dma_fence_chain_contained(struct dma_fence *fence) +{ + struct dma_fence_chain *chain = to_dma_fence_chain(fence); + + return chain ? chain->fence : fence; +} + +/** * dma_fence_chain_alloc * * Returns a new struct dma_fence_chain object or NULL on failure. diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 1ea691753bd3..775cdc0b4f24 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -587,4 +587,42 @@ struct dma_fence *dma_fence_get_stub(void); struct dma_fence *dma_fence_allocate_private_stub(void); u64 dma_fence_context_alloc(unsigned num); +extern const struct dma_fence_ops dma_fence_array_ops; +extern const struct dma_fence_ops dma_fence_chain_ops; + +/** + * dma_fence_is_array - check if a fence is from the array subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_array and false otherwise. + */ +static inline bool dma_fence_is_array(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_array_ops; +} + +/** + * dma_fence_is_chain - check if a fence is from the chain subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_chain and false otherwise. + */ +static inline bool dma_fence_is_chain(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_chain_ops; +} + +/** + * dma_fence_is_container - check if a fence is a container for other fences + * @fence: the fence to test + * + * Return true if this fence is a container for other fences, false otherwise. + * This is important since we can't build up large fence structure or otherwise + * we run into recursion during operation on those fences. + */ +static inline bool dma_fence_is_container(struct dma_fence *fence) +{ + return dma_fence_is_array(fence) || dma_fence_is_chain(fence); +} + #endif /* __LINUX_DMA_FENCE_H */ diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index eebf04325b34..afdfdfac729f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -153,6 +153,13 @@ struct dma_resv { * struct dma_resv_iter - current position into the dma_resv fences * * Don't touch this directly in the driver, use the accessor function instead. + * + * IMPORTANT + * + * When using the lockless iterators like dma_resv_iter_next_unlocked() or + * dma_resv_for_each_fence_unlocked() beware that the iterator can be restarted. + * Code which accumulates statistics or similar needs to check for this with + * dma_resv_iter_is_restarted(). */ struct dma_resv_iter { /** @obj: The dma_resv object we iterate over */ @@ -243,7 +250,11 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * &dma_resv.lock and using RCU instead. The cursor needs to be initialized * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside * the iterator a reference to the dma_fence is held and the RCU lock dropped. - * When the dma_resv is modified the iteration starts over again. + * + * Beware that the iterator can be restarted when the struct dma_resv for + * @cursor is modified. Code which accumulates statistics or similar needs to + * check for this with dma_resv_iter_is_restarted(). For this reason prefer the + * lock iterator dma_resv_for_each_fence() whenever possible. */ #define dma_resv_for_each_fence_unlocked(cursor, fence) \ for (fence = dma_resv_iter_first_unlocked(cursor); \ @@ -458,8 +469,8 @@ void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); -int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, - unsigned *pshared_count, struct dma_fence ***pshared); +int dma_resv_get_fences(struct dma_resv *obj, bool write, + unsigned int *num_fences, struct dma_fence ***fences); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout); diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 939a1beaddf7..3ed117e299ec 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -32,31 +32,29 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); -struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, - u16 tpid, u16 tci); +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge); -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); +void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge); -int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); +struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, + u16 tpid, u16 tci); -void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, + int *vbid); -u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, + int vbid); -u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp); +u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num); -u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp); +u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp); int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); -bool vid_is_dsa_8021q_rxvlan(u16 vid); - -bool vid_is_dsa_8021q_txvlan(u16 vid); - bool vid_is_dsa_8021q(u16 vid); #endif /* _NET_DSA_8021Q_H */ diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h new file mode 100644 index 000000000000..4359fb0221cf --- /dev/null +++ b/include/linux/dsa/tag_qca.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TAG_QCA_H +#define __TAG_QCA_H + +#define QCA_HDR_LEN 2 +#define QCA_HDR_VERSION 0x2 + +#define QCA_HDR_RECV_VERSION GENMASK(15, 14) +#define QCA_HDR_RECV_PRIORITY GENMASK(13, 11) +#define QCA_HDR_RECV_TYPE GENMASK(10, 6) +#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) +#define QCA_HDR_RECV_SOURCE_PORT GENMASK(2, 0) + +/* Packet type for recv */ +#define QCA_HDR_RECV_TYPE_NORMAL 0x0 +#define QCA_HDR_RECV_TYPE_MIB 0x1 +#define QCA_HDR_RECV_TYPE_RW_REG_ACK 0x2 + +#define QCA_HDR_XMIT_VERSION GENMASK(15, 14) +#define QCA_HDR_XMIT_PRIORITY GENMASK(13, 11) +#define QCA_HDR_XMIT_CONTROL GENMASK(10, 8) +#define QCA_HDR_XMIT_FROM_CPU BIT(7) +#define QCA_HDR_XMIT_DP_BIT GENMASK(6, 0) + +/* Packet type for xmit */ +#define QCA_HDR_XMIT_TYPE_NORMAL 0x0 +#define QCA_HDR_XMIT_TYPE_RW_REG 0x1 + +/* Check code for a valid mgmt packet. Switch will ignore the packet + * with this wrong. + */ +#define QCA_HDR_MGMT_CHECK_CODE_VAL 0x5 + +/* Specific define for in-band MDIO read/write with Ethernet packet */ +#define QCA_HDR_MGMT_SEQ_LEN 4 /* 4 byte for the seq */ +#define QCA_HDR_MGMT_COMMAND_LEN 4 /* 4 byte for the command */ +#define QCA_HDR_MGMT_DATA1_LEN 4 /* First 4 byte for the mdio data */ +#define QCA_HDR_MGMT_HEADER_LEN (QCA_HDR_MGMT_SEQ_LEN + \ + QCA_HDR_MGMT_COMMAND_LEN + \ + QCA_HDR_MGMT_DATA1_LEN) + +#define QCA_HDR_MGMT_DATA2_LEN 12 /* Other 12 byte for the mdio data */ +#define QCA_HDR_MGMT_PADDING_LEN 34 /* Padding to reach the min Ethernet packet */ + +#define QCA_HDR_MGMT_PKT_LEN (QCA_HDR_MGMT_HEADER_LEN + \ + QCA_HDR_LEN + \ + QCA_HDR_MGMT_DATA2_LEN + \ + QCA_HDR_MGMT_PADDING_LEN) + +#define QCA_HDR_MGMT_SEQ_NUM GENMASK(31, 0) /* 63, 32 */ +#define QCA_HDR_MGMT_CHECK_CODE GENMASK(31, 29) /* 31, 29 */ +#define QCA_HDR_MGMT_CMD BIT(28) /* 28 */ +#define QCA_HDR_MGMT_LENGTH GENMASK(23, 20) /* 23, 20 */ +#define QCA_HDR_MGMT_ADDR GENMASK(18, 0) /* 18, 0 */ + +/* Special struct emulating a Ethernet header */ +struct qca_mgmt_ethhdr { + u32 command; /* command bit 31:0 */ + u32 seq; /* seq 63:32 */ + u32 mdio_data; /* first 4byte mdio */ + __be16 hdr; /* qca hdr */ +} __packed; + +enum mdio_cmd { + MDIO_WRITE = 0x0, + MDIO_READ +}; + +struct mib_ethhdr { + u32 data[3]; /* first 3 mib counter */ + __be16 hdr; /* qca hdr */ +} __packed; + +struct qca_tagger_data { + void (*rw_reg_ack_handler)(struct dsa_switch *ds, + struct sk_buff *skb); + void (*mib_autocast_handler)(struct dsa_switch *ds, + struct sk_buff *skb); +}; + +#endif /* __TAG_QCA_H */ diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index d37e5d06a357..a4a13514b730 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -32,28 +32,25 @@ struct dtpm_ops { void (*release)(struct dtpm *); }; -typedef int (*dtpm_init_t)(void); +struct device_node; -struct dtpm_descr { - dtpm_init_t init; +struct dtpm_subsys_ops { + const char *name; + int (*init)(void); + void (*exit)(void); + int (*setup)(struct dtpm *, struct device_node *); }; -/* Init section thermal table */ -extern struct dtpm_descr __dtpm_table[]; -extern struct dtpm_descr __dtpm_table_end[]; - -#define DTPM_TABLE_ENTRY(name, __init) \ - static struct dtpm_descr __dtpm_table_entry_##name \ - __used __section("__dtpm_table") = { \ - .init = __init, \ - } - -#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init) +enum DTPM_NODE_TYPE { + DTPM_NODE_VIRTUAL = 0, + DTPM_NODE_DT, +}; -#define for_each_dtpm_table(__dtpm) \ - for (__dtpm = __dtpm_table; \ - __dtpm < __dtpm_table_end; \ - __dtpm++) +struct dtpm_node { + enum DTPM_NODE_TYPE type; + const char *name; + struct dtpm_node *parent; +}; static inline struct dtpm *to_dtpm(struct powercap_zone *zone) { @@ -70,4 +67,7 @@ void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table); + +void dtpm_destroy_hierarchy(void); #endif diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 746e081879a5..f8e206e82476 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) +#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void) { return 0; } -#endif +#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */ #endif /* _LINUX_ELFCORE_H */ diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 2e2b8d6140ed..141952f4fee8 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -454,10 +454,21 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); * * Conditional reschedule with additional sanity checks. */ -void irqentry_exit_cond_resched(void); +void raw_irqentry_exit_cond_resched(void); #ifdef CONFIG_PREEMPT_DYNAMIC -DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) +#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched +#define irqentry_exit_cond_resched_dynamic_disabled NULL +DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); +#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +void dynamic_irqentry_exit_cond_resched(void); +#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() #endif +#else /* CONFIG_PREEMPT_DYNAMIC */ +#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#endif /* CONFIG_PREEMPT_DYNAMIC */ /** * irqentry_exit - Handle return from exception that used irqentry_enter() diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2ad71cc90b37..92b10e67d5f8 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -134,7 +134,7 @@ static inline bool is_multicast_ether_addr(const u8 *addr) #endif } -static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +static inline bool is_multicast_ether_addr_64bits(const u8 *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 #ifdef __BIG_ENDIAN @@ -372,8 +372,7 @@ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits. */ -static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], - const u8 addr2[6+2]) +static inline bool ether_addr_equal_64bits(const u8 *addr1, const u8 *addr2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a26f37a27167..4af58459a1e7 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -70,17 +70,23 @@ enum { /** * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. + * @tcp_data_split: Scatter packet headers and data to separate buffers + * @cqe_size: Size of TX/RX completion queue event */ struct kernel_ethtool_ringparam { u32 rx_buf_len; + u8 tcp_data_split; + u32 cqe_size; }; /** * enum ethtool_supported_ring_param - indicator caps for setting ring params * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len + * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), + ETHTOOL_RING_USE_CQE_SIZE = BIT(1), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -111,7 +117,7 @@ struct ethtool_link_ext_state_info { enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity; enum ethtool_link_ext_substate_cable_issue cable_issue; enum ethtool_link_ext_substate_module module; - u8 __link_ext_substate; + u32 __link_ext_substate; }; }; diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index e525f6957c49..2d04f6448cde 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -64,6 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, struct kmem_cache; +bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order); + int should_failslab(struct kmem_cache *s, gfp_t gfpflags); #ifdef CONFIG_FAILSLAB extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); diff --git a/include/linux/fb.h b/include/linux/fb.h index 3da95842b207..9a77ab615c36 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -204,6 +204,7 @@ struct fb_pixmap { struct fb_deferred_io { /* delay between mkwrite and deferred handler */ unsigned long delay; + bool sort_pagelist; /* sort pagelist by offset */ struct mutex lock; /* mutex that protects the page list */ struct list_head pagelist; /* list of touched pages */ /* callback */ @@ -262,7 +263,7 @@ struct fb_ops { /* Draws a rectangle */ void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect); - /* Copy data from area to another. Obsolete. */ + /* Copy data from area to another */ void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region); /* Draws a image to the display */ void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image); @@ -502,6 +503,7 @@ struct fb_info { } *apertures; bool skip_vt_switch; /* no VT switch on suspend/resume required */ + bool forced_out; /* set when being removed by another driver */ }; static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 71fa57b88bfc..ed0c0ff42ad5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -548,7 +548,7 @@ struct sock_fprog_kern { #define BPF_IMAGE_ALIGNMENT 8 struct bpf_binary_header { - u32 pages; + u32 size; u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); }; @@ -566,13 +566,15 @@ struct bpf_prog { gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ + blinding_requested:1, /* needs constant blinding */ blinded:1, /* Was blinded */ is_func:1, /* program is a bpf function */ kprobe_override:1, /* Do we override a kprobe? */ has_callchain_buf:1, /* callchain buffer allocated? */ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ - call_get_func_ip:1; /* Do we call get_func_ip() */ + call_get_func_ip:1, /* Do we call get_func_ip() */ + tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ @@ -886,17 +888,8 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { set_vm_flush_reset_perms(hdr); - set_memory_ro((unsigned long)hdr, hdr->pages); - set_memory_x((unsigned long)hdr, hdr->pages); -} - -static inline struct bpf_binary_header * -bpf_jit_binary_hdr(const struct bpf_prog *fp) -{ - unsigned long real_start = (unsigned long)fp->bpf_func; - unsigned long addr = real_start & PAGE_MASK; - - return (void *)addr; + set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT); + set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); @@ -1068,6 +1061,18 @@ void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_binary_header * +bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, + unsigned int alignment, + struct bpf_binary_header **rw_hdr, + u8 **rw_image, + bpf_jit_fill_hole_t bpf_fill_ill_insns); +int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, + struct bpf_binary_header *ro_header, + struct bpf_binary_header *rw_header); +void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, + struct bpf_binary_header *rw_header); + int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_jit_poke_descriptor *poke); @@ -1356,7 +1361,10 @@ struct bpf_sockopt_kern { s32 level; s32 optname; s32 optlen; - s32 retval; + /* for retval in struct bpf_cg_run_ctx */ + struct task_struct *current_task; + /* Temporary "register" for indirect stores to ppos. */ + u64 tmp_reg; }; int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); diff --git a/include/linux/firmware/imx/svc/rm.h b/include/linux/firmware/imx/svc/rm.h index 456b6a59d29b..31456f897aa9 100644 --- a/include/linux/firmware/imx/svc/rm.h +++ b/include/linux/firmware/imx/svc/rm.h @@ -59,11 +59,16 @@ enum imx_sc_rm_func { #if IS_ENABLED(CONFIG_IMX_SCU) bool imx_sc_rm_is_resource_owned(struct imx_sc_ipc *ipc, u16 resource); +int imx_sc_rm_get_resource_owner(struct imx_sc_ipc *ipc, u16 resource, u8 *pt); #else static inline bool imx_sc_rm_is_resource_owned(struct imx_sc_ipc *ipc, u16 resource) { return true; } +static inline int imx_sc_rm_get_resource_owner(struct imx_sc_ipc *ipc, u16 resource, u8 *pt) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 907cb01890cf..f6783f58c64a 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -93,6 +93,7 @@ enum pm_api_id { PM_FPGA_LOAD = 22, PM_FPGA_GET_STATUS = 23, PM_GET_CHIPID = 24, + PM_SECURE_SHA = 26, PM_PINCTRL_REQUEST = 28, PM_PINCTRL_RELEASE = 29, PM_PINCTRL_GET_FUNCTION = 30, @@ -427,6 +428,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); int zynqmp_pm_aes_engine(const u64 address, u32 *out); +int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_get_status(u32 *value); int zynqmp_pm_write_ggs(u32 index, u32 value); @@ -601,6 +603,12 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) return -ENODEV; } +static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, + const u32 flags) +{ + return -ENODEV; +} + static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags) { diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h new file mode 100644 index 000000000000..1c2bde0ead73 --- /dev/null +++ b/include/linux/fprobe.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Simple ftrace probe wrapper */ +#ifndef _LINUX_FPROBE_H +#define _LINUX_FPROBE_H + +#include <linux/compiler.h> +#include <linux/ftrace.h> +#include <linux/rethook.h> + +/** + * struct fprobe - ftrace based probe. + * @ops: The ftrace_ops. + * @nmissed: The counter for missing events. + * @flags: The status flag. + * @rethook: The rethook data structure. (internal data) + * @entry_handler: The callback function for function entry. + * @exit_handler: The callback function for function exit. + */ +struct fprobe { +#ifdef CONFIG_FUNCTION_TRACER + /* + * If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too. + * But user of fprobe may keep embedding the struct fprobe on their own + * code. To avoid build error, this will keep the fprobe data structure + * defined here, but remove ftrace_ops data structure. + */ + struct ftrace_ops ops; +#endif + unsigned long nmissed; + unsigned int flags; + struct rethook *rethook; + + void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); + void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); +}; + +/* This fprobe is soft-disabled. */ +#define FPROBE_FL_DISABLED 1 + +/* + * This fprobe handler will be shared with kprobes. + * This flag must be set before registering. + */ +#define FPROBE_FL_KPROBE_SHARED 2 + +static inline bool fprobe_disabled(struct fprobe *fp) +{ + return (fp) ? fp->flags & FPROBE_FL_DISABLED : false; +} + +static inline bool fprobe_shared_with_kprobes(struct fprobe *fp) +{ + return (fp) ? fp->flags & FPROBE_FL_KPROBE_SHARED : false; +} + +#ifdef CONFIG_FPROBE +int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter); +int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num); +int register_fprobe_syms(struct fprobe *fp, const char **syms, int num); +int unregister_fprobe(struct fprobe *fp); +#else +static inline int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) +{ + return -EOPNOTSUPP; +} +static inline int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num) +{ + return -EOPNOTSUPP; +} +static inline int register_fprobe_syms(struct fprobe *fp, const char **syms, int num) +{ + return -EOPNOTSUPP; +} +static inline int unregister_fprobe(struct fprobe *fp) +{ + return -EOPNOTSUPP; +} +#endif + +/** + * disable_fprobe() - Disable fprobe + * @fp: The fprobe to be disabled. + * + * This will soft-disable @fp. Note that this doesn't remove the ftrace + * hooks from the function entry. + */ +static inline void disable_fprobe(struct fprobe *fp) +{ + if (fp) + fp->flags |= FPROBE_FL_DISABLED; +} + +/** + * enable_fprobe() - Enable fprobe + * @fp: The fprobe to be enabled. + * + * This will soft-enable @fp. + */ +static inline void enable_fprobe(struct fprobe *fp) +{ + if (fp) + fp->flags &= ~FPROBE_FL_DISABLED; +} + +#endif diff --git a/include/linux/fs.h b/include/linux/fs.h index f3daaea16554..da3fc3cd2428 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -42,6 +42,7 @@ #include <linux/mount.h> #include <linux/cred.h> #include <linux/mnt_idmapping.h> +#include <linux/slab.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -367,8 +368,8 @@ struct address_space_operations { /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); - /* Set a page dirty. Return true if this dirtied it */ - int (*set_page_dirty)(struct page *page); + /* Mark a folio dirty. Return true if this dirtied it */ + bool (*dirty_folio)(struct address_space *, struct folio *); /* * Reads in the requested pages. Unlike ->readpage(), this is @@ -387,7 +388,7 @@ struct address_space_operations { /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); - void (*invalidatepage) (struct page *, unsigned int, unsigned int); + void (*invalidate_folio) (struct folio *, size_t offset, size_t len); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); @@ -399,9 +400,9 @@ struct address_space_operations { struct page *, struct page *, enum migrate_mode); bool (*isolate_page)(struct page *, isolate_mode_t); void (*putback_page)(struct page *); - int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, unsigned long, - unsigned long); + int (*launder_folio)(struct folio *); + bool (*is_partially_uptodate) (struct folio *, size_t from, + size_t count); void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page)(struct address_space *, struct page *); @@ -930,10 +931,15 @@ struct fown_struct { * struct file_ra_state - Track a file's readahead state. * @start: Where the most recent readahead started. * @size: Number of pages read in the most recent readahead. - * @async_size: Start next readahead when this many pages are left. - * @ra_pages: Maximum size of a readahead request. + * @async_size: Numer of pages that were/are not needed immediately + * and so were/are genuinely "ahead". Start next readahead when + * the first of these pages is accessed. + * @ra_pages: Maximum size of a readahead request, copied from the bdi. * @mmap_miss: How many mmap accesses missed in the page cache. * @prev_pos: The last byte in the most recent read request. + * + * When this structure is passed to ->readahead(), the "most recent" + * readahead means the current readahead. */ struct file_ra_state { pgoff_t start; @@ -1435,6 +1441,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ +#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ /* Possible states of 'frozen' field */ enum { @@ -1483,7 +1490,7 @@ struct super_block { #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif @@ -2746,54 +2753,6 @@ extern void init_special_inode(struct inode *, umode_t, dev_t); extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); -unsigned long invalidate_mapping_pages(struct address_space *mapping, - pgoff_t start, pgoff_t end); - -void invalidate_mapping_pagevec(struct address_space *mapping, - pgoff_t start, pgoff_t end, - unsigned long *nr_pagevec); - -static inline void invalidate_remote_inode(struct inode *inode) -{ - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) - invalidate_mapping_pages(inode->i_mapping, 0, -1); -} -extern int invalidate_inode_pages2(struct address_space *mapping); -extern int invalidate_inode_pages2_range(struct address_space *mapping, - pgoff_t start, pgoff_t end); -extern int write_inode_now(struct inode *, int); -extern int filemap_fdatawrite(struct address_space *); -extern int filemap_flush(struct address_space *); -extern int filemap_fdatawait_keep_errors(struct address_space *mapping); -extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, - loff_t lend); -extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping, - loff_t start_byte, loff_t end_byte); - -static inline int filemap_fdatawait(struct address_space *mapping) -{ - return filemap_fdatawait_range(mapping, 0, LLONG_MAX); -} - -extern bool filemap_range_has_page(struct address_space *, loff_t lstart, - loff_t lend); -extern int filemap_write_and_wait_range(struct address_space *mapping, - loff_t lstart, loff_t lend); -extern int __filemap_fdatawrite_range(struct address_space *mapping, - loff_t start, loff_t end, int sync_mode); -extern int filemap_fdatawrite_range(struct address_space *mapping, - loff_t start, loff_t end); -extern int filemap_check_errors(struct address_space *mapping); -extern void __filemap_set_wb_err(struct address_space *mapping, int err); -int filemap_fdatawrite_wbc(struct address_space *mapping, - struct writeback_control *wbc); - -static inline int filemap_write_and_wait(struct address_space *mapping) -{ - return filemap_write_and_wait_range(mapping, 0, LLONG_MAX); -} - extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, loff_t lend); extern int __must_check file_check_and_advance_wb_err(struct file *file); @@ -2805,67 +2764,6 @@ static inline int file_write_and_wait(struct file *file) return file_write_and_wait_range(file, 0, LLONG_MAX); } -/** - * filemap_set_wb_err - set a writeback error on an address_space - * @mapping: mapping in which to set writeback error - * @err: error to be set in mapping - * - * When writeback fails in some way, we must record that error so that - * userspace can be informed when fsync and the like are called. We endeavor - * to report errors on any file that was open at the time of the error. Some - * internal callers also need to know when writeback errors have occurred. - * - * When a writeback error occurs, most filesystems will want to call - * filemap_set_wb_err to record the error in the mapping so that it will be - * automatically reported whenever fsync is called on the file. - */ -static inline void filemap_set_wb_err(struct address_space *mapping, int err) -{ - /* Fastpath for common case of no error */ - if (unlikely(err)) - __filemap_set_wb_err(mapping, err); -} - -/** - * filemap_check_wb_err - has an error occurred since the mark was sampled? - * @mapping: mapping to check for writeback errors - * @since: previously-sampled errseq_t - * - * Grab the errseq_t value from the mapping, and see if it has changed "since" - * the given value was sampled. - * - * If it has then report the latest error set, otherwise return 0. - */ -static inline int filemap_check_wb_err(struct address_space *mapping, - errseq_t since) -{ - return errseq_check(&mapping->wb_err, since); -} - -/** - * filemap_sample_wb_err - sample the current errseq_t to test for later errors - * @mapping: mapping to be sampled - * - * Writeback errors are always reported relative to a particular sample point - * in the past. This function provides those sample points. - */ -static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) -{ - return errseq_sample(&mapping->wb_err); -} - -/** - * file_sample_sb_err - sample the current errseq_t to test for later errors - * @file: file pointer to be sampled - * - * Grab the most current superblock-level errseq_t value for the given - * struct file. - */ -static inline errseq_t file_sample_sb_err(struct file *file) -{ - return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err); -} - extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); @@ -3108,6 +3006,16 @@ extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); +/* + * This must be used for allocating filesystems specific inodes to set + * up the inode reclaim context correctly. + */ +static inline void * +alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp) +{ + return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp); +} + extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) { @@ -3130,6 +3038,7 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); +int generic_write_checks_count(struct kiocb *iocb, loff_t *count); extern int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); @@ -3173,6 +3082,7 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek(struct file *, loff_t, int); +int rw_verify_area(int, struct file *, const loff_t *, size_t); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); extern int stream_open(struct inode * inode, struct file * filp); @@ -3322,8 +3232,6 @@ extern int simple_rename(struct user_namespace *, struct inode *, extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); -extern void noop_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_write_begin(struct file *file, struct address_space *mapping, @@ -3608,15 +3516,4 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); -/* - * Flush file data before changing attributes. Caller must hold any locks - * required to prevent further writes to this file until we're done setting - * flags. - */ -static inline int inode_drain_writes(struct inode *inode) -{ - inode_dio_wait(inode); - return filemap_write_and_wait(inode->i_mapping); -} - #endif /* _LINUX_FS_H */ diff --git a/include/linux/fs_api.h b/include/linux/fs_api.h new file mode 100644 index 000000000000..83be38d6d413 --- /dev/null +++ b/include/linux/fs_api.h @@ -0,0 +1 @@ +#include <linux/fs.h> diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 296c5f1d9f35..d44ff747a657 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -616,9 +616,11 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, } #if __fscache_available -extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie); +bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio, + struct fscache_cookie *cookie); #else -#define fscache_set_page_dirty(PAGE, COOKIE) (__set_page_dirty_nobuffers((PAGE))) +#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \ + filemap_dirty_folio(MAPPING, FOLIO) #endif /** @@ -626,7 +628,7 @@ extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cook * @wbc: The writeback control * @cookie: The cookie referring to the cache object * - * Unpin the writeback resources pinned by fscache_set_page_dirty(). This is + * Unpin the writeback resources pinned by fscache_dirty_folio(). This is * intended to be called by the netfs's ->write_inode() method. */ static inline void fscache_unpin_writeback(struct writeback_control *wbc, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 91ea9477e9bd..50d92d805bd8 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -714,6 +714,10 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); +bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter); + +u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); + #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) @@ -742,6 +746,20 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, { return true; } + +static inline bool fscrypt_dio_supported(struct kiocb *iocb, + struct iov_iter *iter) +{ + const struct inode *inode = file_inode(iocb->ki_filp); + + return !fscrypt_needs_contents_encryption(inode); +} + +static inline u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, + u64 nr_blocks) +{ + return nr_blocks; +} #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ /** diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 3a2d7dc3c607..bb8467cd11ae 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -225,16 +225,53 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, } /* + * fsnotify_delete - @dentry was unlinked and unhashed + * + * Caller must make sure that dentry->d_name is stable. + * + * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode + * as this may be called after d_delete() and old_dentry may be negative. + */ +static inline void fsnotify_delete(struct inode *dir, struct inode *inode, + struct dentry *dentry) +{ + __u32 mask = FS_DELETE; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, + 0); +} + +/** + * d_delete_notify - delete a dentry and call fsnotify_delete() + * @dentry: The dentry to delete + * + * This helper is used to guaranty that the unlinked inode cannot be found + * by lookup of this name after fsnotify_delete() event has been delivered. + */ +static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + + ihold(inode); + d_delete(dentry); + fsnotify_delete(dir, inode, dentry); + iput(inode); +} + +/* * fsnotify_unlink - 'name' was unlinked * * Caller must make sure that dentry->d_name is stable. */ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* @@ -258,10 +295,10 @@ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) */ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9999e29187de..ed8cf433a46a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -30,6 +30,12 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif +#ifdef CONFIG_TRACING +extern void ftrace_boot_snapshot(void); +#else +static inline void ftrace_boot_snapshot(void) { } +#endif + #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops; struct ftrace_regs; @@ -215,7 +221,10 @@ struct ftrace_ops_hash { void ftrace_free_init_mem(void); void ftrace_free_mem(struct module *mod, void *start, void *end); #else -static inline void ftrace_free_init_mem(void) { } +static inline void ftrace_free_init_mem(void) +{ + ftrace_boot_snapshot(); +} static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif @@ -512,6 +521,8 @@ struct dyn_ftrace { int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset); +int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips, + unsigned int cnt, int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, @@ -802,6 +813,7 @@ static inline unsigned long ftrace_location(unsigned long ip) #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) #define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; }) +#define ftrace_set_filter_ips(ops, ips, cnt, remove, reset) ({ -ENODEV; }) #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_free_filter(ops) do { } while (0) diff --git a/include/linux/genhd.h b/include/linux/genhd.h deleted file mode 100644 index 6906a45bc761..000000000000 --- a/include/linux/genhd.h +++ /dev/null @@ -1,291 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_GENHD_H -#define _LINUX_GENHD_H - -/* - * genhd.h Copyright (C) 1992 Drew Eckhardt - * Generic hard disk header file by - * Drew Eckhardt - * - * <drew@colorado.edu> - */ - -#include <linux/types.h> -#include <linux/kdev_t.h> -#include <linux/uuid.h> -#include <linux/blk_types.h> -#include <linux/device.h> -#include <linux/xarray.h> - -extern const struct device_type disk_type; -extern struct device_type part_type; -extern struct class block_class; - -#define DISK_MAX_PARTS 256 -#define DISK_NAME_LEN 32 - -#define PARTITION_META_INFO_VOLNAMELTH 64 -/* - * Enough for the string representation of any kind of UUID plus NULL. - * EFI UUID is 36 characters. MSDOS UUID is 11 characters. - */ -#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) - -struct partition_meta_info { - char uuid[PARTITION_META_INFO_UUIDLTH]; - u8 volname[PARTITION_META_INFO_VOLNAMELTH]; -}; - -/** - * DOC: genhd capability flags - * - * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to - * removable media. When set, the device remains present even when media is not - * inserted. Shall not be set for devices which are removed entirely when the - * media is removed. - * - * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, - * doesn't appear in sysfs, and can't be opened from userspace or using - * blkdev_get*. Used for the underlying components of multipath devices. - * - * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not - * scan for partitions from add_disk, and users can't add partitions manually. - * - */ -enum { - GENHD_FL_REMOVABLE = 1 << 0, - GENHD_FL_HIDDEN = 1 << 1, - GENHD_FL_NO_PART = 1 << 2, -}; - -enum { - DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ - DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ -}; - -enum { - /* Poll even if events_poll_msecs is unset */ - DISK_EVENT_FLAG_POLL = 1 << 0, - /* Forward events to udev */ - DISK_EVENT_FLAG_UEVENT = 1 << 1, - /* Block event polling when open for exclusive write */ - DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, -}; - -struct disk_events; -struct badblocks; - -struct blk_integrity { - const struct blk_integrity_profile *profile; - unsigned char flags; - unsigned char tuple_size; - unsigned char interval_exp; - unsigned char tag_size; -}; - -struct gendisk { - /* - * major/first_minor/minors should not be set by any new driver, the - * block core will take care of allocating them automatically. - */ - int major; - int first_minor; - int minors; - - char disk_name[DISK_NAME_LEN]; /* name of major driver */ - - unsigned short events; /* supported events */ - unsigned short event_flags; /* flags related to event processing */ - - struct xarray part_tbl; - struct block_device *part0; - - const struct block_device_operations *fops; - struct request_queue *queue; - void *private_data; - - int flags; - unsigned long state; -#define GD_NEED_PART_SCAN 0 -#define GD_READ_ONLY 1 -#define GD_DEAD 2 -#define GD_NATIVE_CAPACITY 3 - - struct mutex open_mutex; /* open/close mutex */ - unsigned open_partitions; /* number of open partitions */ - - struct backing_dev_info *bdi; - struct kobject *slave_dir; -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED - struct list_head slave_bdevs; -#endif - struct timer_rand_state *random; - atomic_t sync_io; /* RAID */ - struct disk_events *ev; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct kobject integrity_kobj; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ -#if IS_ENABLED(CONFIG_CDROM) - struct cdrom_device_info *cdi; -#endif - int node_id; - struct badblocks *bb; - struct lockdep_map lockdep_map; - u64 diskseq; -}; - -static inline bool disk_live(struct gendisk *disk) -{ - return !inode_unhashed(disk->part0->bd_inode); -} - -/* - * The gendisk is refcounted by the part0 block_device, and the bd_device - * therein is also used for device model presentation in sysfs. - */ -#define dev_to_disk(device) \ - (dev_to_bdev(device)->bd_disk) -#define disk_to_dev(disk) \ - (&((disk)->part0->bd_device)) - -#if IS_REACHABLE(CONFIG_CDROM) -#define disk_to_cdi(disk) ((disk)->cdi) -#else -#define disk_to_cdi(disk) NULL -#endif - -static inline dev_t disk_devt(struct gendisk *disk) -{ - return MKDEV(disk->major, disk->first_minor); -} - -void disk_uevent(struct gendisk *disk, enum kobject_action action); - -/* block/genhd.c */ -int __must_check device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups); -static inline int __must_check add_disk(struct gendisk *disk) -{ - return device_add_disk(NULL, disk, NULL); -} -extern void del_gendisk(struct gendisk *gp); - -void invalidate_disk(struct gendisk *disk); - -void set_disk_ro(struct gendisk *disk, bool read_only); - -static inline int get_disk_ro(struct gendisk *disk) -{ - return disk->part0->bd_read_only || - test_bit(GD_READ_ONLY, &disk->state); -} - -static inline int bdev_read_only(struct block_device *bdev) -{ - return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); -} - -extern void disk_block_events(struct gendisk *disk); -extern void disk_unblock_events(struct gendisk *disk); -extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -bool set_capacity_and_notify(struct gendisk *disk, sector_t size); -bool disk_force_media_change(struct gendisk *disk, unsigned int events); - -/* drivers/char/random.c */ -extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; -extern void rand_initialize_disk(struct gendisk *disk); - -static inline sector_t get_start_sect(struct block_device *bdev) -{ - return bdev->bd_start_sect; -} - -static inline sector_t bdev_nr_sectors(struct block_device *bdev) -{ - return bdev->bd_nr_sectors; -} - -static inline loff_t bdev_nr_bytes(struct block_device *bdev) -{ - return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; -} - -static inline sector_t get_capacity(struct gendisk *disk) -{ - return bdev_nr_sectors(disk->part0); -} - -static inline u64 sb_bdev_nr_blocks(struct super_block *sb) -{ - return bdev_nr_sectors(sb->s_bdev) >> - (sb->s_blocksize_bits - SECTOR_SHIFT); -} - -int bdev_disk_changed(struct gendisk *disk, bool invalidate); -void blk_drop_partitions(struct gendisk *disk); - -struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, - struct lock_class_key *lkclass); -extern void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); - -/** - * blk_alloc_disk - allocate a gendisk structure - * @node_id: numa node to allocate on - * - * Allocate and pre-initialize a gendisk structure for use with BIO based - * drivers. - * - * Context: can sleep - */ -#define blk_alloc_disk(node_id) \ -({ \ - static struct lock_class_key __key; \ - \ - __blk_alloc_disk(node_id, &__key); \ -}) -void blk_cleanup_disk(struct gendisk *disk); - -int __register_blkdev(unsigned int major, const char *name, - void (*probe)(dev_t devt)); -#define register_blkdev(major, name) \ - __register_blkdev(major, name, NULL) -void unregister_blkdev(unsigned int major, const char *name); - -bool bdev_check_media_change(struct block_device *bdev); -int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void set_capacity(struct gendisk *disk, sector_t size); - -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED -int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); -int bd_register_pending_holders(struct gendisk *disk); -#else -static inline int bd_link_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - return 0; -} -static inline void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ -} -static inline int bd_register_pending_holders(struct gendisk *disk) -{ - return 0; -} -#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ - -dev_t part_devt(struct gendisk *disk, u8 partno); -void inc_diskseq(struct gendisk *disk); -dev_t blk_lookup_devt(const char *name, int partno); -void blk_request_module(dev_t devt); -#ifdef CONFIG_BLOCK -void printk_all_partitions(void); -#else /* CONFIG_BLOCK */ -static inline void printk_all_partitions(void) -{ -} -#endif /* CONFIG_BLOCK */ - -#endif /* _LINUX_GENHD_H */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 80f63c862be5..0fa17fb85de5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -54,9 +54,17 @@ struct vm_area_struct; #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u -#define ___GFP_SKIP_KASAN_POISON 0x1000000u +#ifdef CONFIG_KASAN_HW_TAGS +#define ___GFP_SKIP_ZERO 0x1000000u +#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u +#define ___GFP_SKIP_KASAN_POISON 0x4000000u +#else +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN_UNPOISON 0 +#define ___GFP_SKIP_KASAN_POISON 0 +#endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x2000000u +#define ___GFP_NOLOCKDEP 0x8000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -79,7 +87,7 @@ struct vm_area_struct; * DOC: Page mobility and placement hints * * Page mobility and placement hints - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * --------------------------------- * * These flags provide hints about how mobile the page is. Pages with similar * mobility are placed within the same pageblocks to minimise problems due @@ -112,7 +120,7 @@ struct vm_area_struct; * DOC: Watermark modifiers * * Watermark modifiers -- controls access to emergency reserves - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * ------------------------------------------------------------ * * %__GFP_HIGH indicates that the caller is high-priority and that granting * the request is necessary before the system can make forward progress. @@ -144,7 +152,7 @@ struct vm_area_struct; * DOC: Reclaim modifiers * * Reclaim modifiers - * ~~~~~~~~~~~~~~~~~ + * ----------------- * Please note that all the following flags are only applicable to sleepable * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). * @@ -224,7 +232,7 @@ struct vm_area_struct; * DOC: Action modifiers * * Action modifiers - * ~~~~~~~~~~~~~~~~ + * ---------------- * * %__GFP_NOWARN suppresses allocation failure reports. * @@ -232,31 +240,40 @@ struct vm_area_struct; * * %__GFP_ZERO returns a zeroed page on success. * - * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if - * __GFP_ZERO is set. + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performace impact. + * + * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. + * Only effective in HW_TAGS mode. * - * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned - * on deallocation. Typically used for userspace pages. Currently only has an - * effect in HW tags mode. + * %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation. + * Typically, used for userspace pages. Only effective in HW_TAGS mode. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) -#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) +#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) +#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (24 + \ + 3 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** * DOC: Useful GFP flag combinations * * Useful GFP flag combinations - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * ---------------------------- * * Useful GFP flag combinations that are commonly used. It is recommended * that subsystems start with one of these combinations and then set/clear diff --git a/include/linux/gfp_api.h b/include/linux/gfp_api.h new file mode 100644 index 000000000000..5a05a2764a86 --- /dev/null +++ b/include/linux/gfp_api.h @@ -0,0 +1 @@ +#include <linux/gfp.h> diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 3ad67b4a72be..c3aa8b330e1c 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -8,27 +8,16 @@ #include <linux/err.h> struct device; - -/** - * Opaque descriptor for a GPIO. These are obtained using gpiod_get() and are - * preferable to the old integer-based handles. - * - * Contrary to integers, a pointer to a gpio_desc is guaranteed to be valid - * until the GPIO is released. - */ struct gpio_desc; - -/** - * Opaque descriptor for a structure of GPIO array attributes. This structure - * is attached to struct gpiod_descs obtained from gpiod_get_array() and can be - * passed back to get/set array functions in order to activate fast processing - * path if applicable. - */ struct gpio_array; /** - * Struct containing an array of descriptors that can be obtained using - * gpiod_get_array(). + * struct gpio_descs - Struct containing an array of descriptors that can be + * obtained using gpiod_get_array() + * + * @info: Pointer to the opaque gpio_array structure + * @ndescs: Number of held descriptors + * @desc: Array of pointers to GPIO descriptors */ struct gpio_descs { struct gpio_array *info; @@ -43,8 +32,16 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) /** - * Optional flags that can be passed to one of gpiod_* to configure direction - * and output value. These values cannot be OR'd. + * enum gpiod_flags - Optional flags that can be passed to one of gpiod_* to + * configure direction and output value. These values + * cannot be OR'd. + * + * @GPIOD_ASIS: Don't change anything + * @GPIOD_IN: Set lines to input mode + * @GPIOD_OUT_LOW: Set lines to output and drive them low + * @GPIOD_OUT_HIGH: Set lines to output and drive them high + * @GPIOD_OUT_LOW_OPEN_DRAIN: Set lines to open-drain output and drive them low + * @GPIOD_OUT_HIGH_OPEN_DRAIN: Set lines to open-drain output and drive them high */ enum gpiod_flags { GPIOD_ASIS = 0, diff --git a/include/linux/greybus/greybus_manifest.h b/include/linux/greybus/greybus_manifest.h index 6e62fe478712..bef9eb2093e9 100644 --- a/include/linux/greybus/greybus_manifest.h +++ b/include/linux/greybus/greybus_manifest.h @@ -100,7 +100,7 @@ enum { struct greybus_descriptor_string { __u8 length; __u8 id; - __u8 string[0]; + __u8 string[]; } __packed; /* @@ -175,7 +175,7 @@ struct greybus_manifest_header { struct greybus_manifest { struct greybus_manifest_header header; - struct greybus_descriptor descriptors[0]; + struct greybus_descriptor descriptors[]; } __packed; #endif /* __GREYBUS_MANIFEST_H */ diff --git a/include/linux/greybus/hd.h b/include/linux/greybus/hd.h index d3faf0c1a569..718e2857054e 100644 --- a/include/linux/greybus/hd.h +++ b/include/linux/greybus/hd.h @@ -58,7 +58,7 @@ struct gb_host_device { struct gb_svc *svc; /* Private data for the host driver */ - unsigned long hd_priv[0] __aligned(sizeof(s64)); + unsigned long hd_priv[] __aligned(sizeof(s64)); }; #define to_gb_host_device(d) container_of(d, struct gb_host_device, dev) diff --git a/include/linux/greybus/module.h b/include/linux/greybus/module.h index 47b839af145d..3efe2133acfd 100644 --- a/include/linux/greybus/module.h +++ b/include/linux/greybus/module.h @@ -23,7 +23,7 @@ struct gb_module { bool disconnected; - struct gb_interface *interfaces[0]; + struct gb_interface *interfaces[]; }; #define to_gb_module(d) container_of(d, struct gb_module, dev) diff --git a/include/linux/hashtable_api.h b/include/linux/hashtable_api.h new file mode 100644 index 000000000000..c268ac2c5c0e --- /dev/null +++ b/include/linux/hashtable_api.h @@ -0,0 +1 @@ +#include <linux/hashtable.h> diff --git a/include/linux/hid.h b/include/linux/hid.h index 7487b0586fe6..4363a63b9775 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -342,12 +342,12 @@ struct hid_item { * HID device quirks. */ -/* +/* * Increase this if you need to configure more HID quirks at module load time */ #define MAX_USBHID_BOOT_QUIRKS 4 -#define HID_QUIRK_INVERT BIT(0) +/* BIT(0) reserved for backward compatibility, was HID_QUIRK_INVERT */ #define HID_QUIRK_NOTOUCH BIT(1) #define HID_QUIRK_IGNORE BIT(2) #define HID_QUIRK_NOGET BIT(3) @@ -476,31 +476,50 @@ struct hid_field { unsigned report_count; /* number of this field in the report */ unsigned report_type; /* (input,output,feature) */ __s32 *value; /* last known value(s) */ + __s32 *new_value; /* newly read value(s) */ + __s32 *usages_priorities; /* priority of each usage when reading the report + * bits 8-16 are reserved for hid-input usage + */ __s32 logical_minimum; __s32 logical_maximum; __s32 physical_minimum; __s32 physical_maximum; __s32 unit_exponent; unsigned unit; + bool ignored; /* this field is ignored in this event */ struct hid_report *report; /* associated report */ unsigned index; /* index into report->field[] */ /* hidinput data */ struct hid_input *hidinput; /* associated input structure */ __u16 dpad; /* dpad input code */ + unsigned int slot_idx; /* slot index in a report */ }; #define HID_MAX_FIELDS 256 +struct hid_field_entry { + struct list_head list; + struct hid_field *field; + unsigned int index; + __s32 priority; +}; + struct hid_report { struct list_head list; struct list_head hidinput_list; + struct list_head field_entry_list; /* ordered list of input fields */ unsigned int id; /* id of this report */ unsigned int type; /* report type */ unsigned int application; /* application usage for this report */ struct hid_field *field[HID_MAX_FIELDS]; /* fields of the report */ + struct hid_field_entry *field_entries; /* allocated memory of input field_entry */ unsigned maxfield; /* maximum valid field index */ unsigned size; /* size of the report (bits) */ struct hid_device *device; /* associated device */ + + /* tool related state */ + bool tool_active; /* whether the current tool is active */ + unsigned int tool; /* BTN_TOOL_* */ }; #define HID_MAX_IDS 256 diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 0a0b2b09b1b8..a77be5630209 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -246,6 +246,16 @@ do { \ __kunmap_atomic(__addr); \ } while (0) +/** + * kunmap_local - Unmap a page mapped via kmap_local_page(). + * @__addr: An address within the page mapped + * + * @__addr can be any address within the mapped page. Commonly it is the + * address return from kmap_local_page(), but it can also include offsets. + * + * Unmapping should be done in the reverse order of the mapping. See + * kmap_local_page() for details. + */ #define kunmap_local(__addr) \ do { \ BUILD_BUG_ON(__same_type((__addr), struct page *)); \ diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h new file mode 100644 index 000000000000..177f7b7cd414 --- /dev/null +++ b/include/linux/hisi_acc_qm.h @@ -0,0 +1,490 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 HiSilicon Limited. */ +#ifndef HISI_ACC_QM_H +#define HISI_ACC_QM_H + +#include <linux/bitfield.h> +#include <linux/debugfs.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/pci.h> + +#define QM_QNUM_V1 4096 +#define QM_QNUM_V2 1024 +#define QM_MAX_VFS_NUM_V2 63 + +/* qm user domain */ +#define QM_ARUSER_M_CFG_1 0x100088 +#define AXUSER_SNOOP_ENABLE BIT(30) +#define AXUSER_CMD_TYPE GENMASK(14, 12) +#define AXUSER_CMD_SMMU_NORMAL 1 +#define AXUSER_NS BIT(6) +#define AXUSER_NO BIT(5) +#define AXUSER_FP BIT(4) +#define AXUSER_SSV BIT(0) +#define AXUSER_BASE (AXUSER_SNOOP_ENABLE | \ + FIELD_PREP(AXUSER_CMD_TYPE, \ + AXUSER_CMD_SMMU_NORMAL) | \ + AXUSER_NS | AXUSER_NO | AXUSER_FP) +#define QM_ARUSER_M_CFG_ENABLE 0x100090 +#define ARUSER_M_CFG_ENABLE 0xfffffffe +#define QM_AWUSER_M_CFG_1 0x100098 +#define QM_AWUSER_M_CFG_ENABLE 0x1000a0 +#define AWUSER_M_CFG_ENABLE 0xfffffffe +#define QM_WUSER_M_CFG_ENABLE 0x1000a8 +#define WUSER_M_CFG_ENABLE 0xffffffff + +/* mailbox */ +#define QM_MB_CMD_SQC 0x0 +#define QM_MB_CMD_CQC 0x1 +#define QM_MB_CMD_EQC 0x2 +#define QM_MB_CMD_AEQC 0x3 +#define QM_MB_CMD_SQC_BT 0x4 +#define QM_MB_CMD_CQC_BT 0x5 +#define QM_MB_CMD_SQC_VFT_V2 0x6 +#define QM_MB_CMD_STOP_QP 0x8 +#define QM_MB_CMD_SRC 0xc +#define QM_MB_CMD_DST 0xd + +#define QM_MB_CMD_SEND_BASE 0x300 +#define QM_MB_EVENT_SHIFT 8 +#define QM_MB_BUSY_SHIFT 13 +#define QM_MB_OP_SHIFT 14 +#define QM_MB_CMD_DATA_ADDR_L 0x304 +#define QM_MB_CMD_DATA_ADDR_H 0x308 +#define QM_MB_MAX_WAIT_CNT 6000 + +/* doorbell */ +#define QM_DOORBELL_CMD_SQ 0 +#define QM_DOORBELL_CMD_CQ 1 +#define QM_DOORBELL_CMD_EQ 2 +#define QM_DOORBELL_CMD_AEQ 3 + +#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000 +#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000 +#define QM_QP_MAX_NUM_SHIFT 11 +#define QM_DB_CMD_SHIFT_V2 12 +#define QM_DB_RAND_SHIFT_V2 16 +#define QM_DB_INDEX_SHIFT_V2 32 +#define QM_DB_PRIORITY_SHIFT_V2 48 +#define QM_VF_STATE 0x60 + +/* qm cache */ +#define QM_CACHE_CTL 0x100050 +#define SQC_CACHE_ENABLE BIT(0) +#define CQC_CACHE_ENABLE BIT(1) +#define SQC_CACHE_WB_ENABLE BIT(4) +#define SQC_CACHE_WB_THRD GENMASK(10, 5) +#define CQC_CACHE_WB_ENABLE BIT(11) +#define CQC_CACHE_WB_THRD GENMASK(17, 12) +#define QM_AXI_M_CFG 0x1000ac +#define AXI_M_CFG 0xffff +#define QM_AXI_M_CFG_ENABLE 0x1000b0 +#define AM_CFG_SINGLE_PORT_MAX_TRANS 0x300014 +#define AXI_M_CFG_ENABLE 0xffffffff +#define QM_PEH_AXUSER_CFG 0x1000cc +#define QM_PEH_AXUSER_CFG_ENABLE 0x1000d0 +#define PEH_AXUSER_CFG 0x401001 +#define PEH_AXUSER_CFG_ENABLE 0xffffffff + +#define QM_AXI_RRESP BIT(0) +#define QM_AXI_BRESP BIT(1) +#define QM_ECC_MBIT BIT(2) +#define QM_ECC_1BIT BIT(3) +#define QM_ACC_GET_TASK_TIMEOUT BIT(4) +#define QM_ACC_DO_TASK_TIMEOUT BIT(5) +#define QM_ACC_WB_NOT_READY_TIMEOUT BIT(6) +#define QM_SQ_CQ_VF_INVALID BIT(7) +#define QM_CQ_VF_INVALID BIT(8) +#define QM_SQ_VF_INVALID BIT(9) +#define QM_DB_TIMEOUT BIT(10) +#define QM_OF_FIFO_OF BIT(11) +#define QM_DB_RANDOM_INVALID BIT(12) +#define QM_MAILBOX_TIMEOUT BIT(13) +#define QM_FLR_TIMEOUT BIT(14) + +#define QM_BASE_NFE (QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \ + QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \ + QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID | \ + QM_MAILBOX_TIMEOUT | QM_FLR_TIMEOUT) +#define QM_BASE_CE QM_ECC_1BIT + +#define QM_Q_DEPTH 1024 +#define QM_MIN_QNUM 2 +#define HISI_ACC_SGL_SGE_NR_MAX 255 +#define QM_SHAPER_CFG 0x100164 +#define QM_SHAPER_ENABLE BIT(30) +#define QM_SHAPER_TYPE1_OFFSET 10 + +/* page number for queue file region */ +#define QM_DOORBELL_PAGE_NR 1 + +/* uacce mode of the driver */ +#define UACCE_MODE_NOUACCE 0 /* don't use uacce */ +#define UACCE_MODE_SVA 1 /* use uacce sva mode */ +#define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce" + +enum qm_stop_reason { + QM_NORMAL, + QM_SOFT_RESET, + QM_FLR, +}; + +enum qm_state { + QM_INIT = 0, + QM_START, + QM_CLOSE, + QM_STOP, +}; + +enum qp_state { + QP_INIT = 1, + QP_START, + QP_STOP, + QP_CLOSE, +}; + +enum qm_hw_ver { + QM_HW_UNKNOWN = -1, + QM_HW_V1 = 0x20, + QM_HW_V2 = 0x21, + QM_HW_V3 = 0x30, +}; + +enum qm_fun_type { + QM_HW_PF, + QM_HW_VF, +}; + +enum qm_debug_file { + CURRENT_QM, + CURRENT_Q, + CLEAR_ENABLE, + DEBUG_FILE_NUM, +}; + +enum qm_vf_state { + QM_READY = 0, + QM_NOT_READY, +}; + +struct qm_dfx { + atomic64_t err_irq_cnt; + atomic64_t aeq_irq_cnt; + atomic64_t abnormal_irq_cnt; + atomic64_t create_qp_err_cnt; + atomic64_t mb_err_cnt; +}; + +struct debugfs_file { + enum qm_debug_file index; + struct mutex lock; + struct qm_debug *debug; +}; + +struct qm_debug { + u32 curr_qm_qp_num; + u32 sqe_mask_offset; + u32 sqe_mask_len; + struct qm_dfx dfx; + struct dentry *debug_root; + struct dentry *qm_d; + struct debugfs_file files[DEBUG_FILE_NUM]; +}; + +struct qm_shaper_factor { + u32 func_qos; + u64 cir_b; + u64 cir_u; + u64 cir_s; + u64 cbs_s; +}; + +struct qm_dma { + void *va; + dma_addr_t dma; + size_t size; +}; + +struct hisi_qm_status { + u32 eq_head; + bool eqc_phase; + u32 aeq_head; + bool aeqc_phase; + atomic_t flags; + int stop_reason; +}; + +struct hisi_qm; + +struct hisi_qm_err_info { + char *acpi_rst; + u32 msi_wr_port; + u32 ecc_2bits_mask; + u32 dev_ce_mask; + u32 ce; + u32 nfe; + u32 fe; +}; + +struct hisi_qm_err_status { + u32 is_qm_ecc_mbit; + u32 is_dev_ecc_mbit; +}; + +struct hisi_qm_err_ini { + int (*hw_init)(struct hisi_qm *qm); + void (*hw_err_enable)(struct hisi_qm *qm); + void (*hw_err_disable)(struct hisi_qm *qm); + u32 (*get_dev_hw_err_status)(struct hisi_qm *qm); + void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts); + void (*open_axi_master_ooo)(struct hisi_qm *qm); + void (*close_axi_master_ooo)(struct hisi_qm *qm); + void (*open_sva_prefetch)(struct hisi_qm *qm); + void (*close_sva_prefetch)(struct hisi_qm *qm); + void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); + void (*err_info_init)(struct hisi_qm *qm); +}; + +struct hisi_qm_list { + struct mutex lock; + struct list_head list; + int (*register_to_crypto)(struct hisi_qm *qm); + void (*unregister_from_crypto)(struct hisi_qm *qm); +}; + +struct hisi_qm { + enum qm_hw_ver ver; + enum qm_fun_type fun_type; + const char *dev_name; + struct pci_dev *pdev; + void __iomem *io_base; + void __iomem *db_io_base; + u32 sqe_size; + u32 qp_base; + u32 qp_num; + u32 qp_in_used; + u32 ctrl_qp_num; + u32 max_qp_num; + u32 vfs_num; + u32 db_interval; + struct list_head list; + struct hisi_qm_list *qm_list; + + struct qm_dma qdma; + struct qm_sqc *sqc; + struct qm_cqc *cqc; + struct qm_eqe *eqe; + struct qm_aeqe *aeqe; + dma_addr_t sqc_dma; + dma_addr_t cqc_dma; + dma_addr_t eqe_dma; + dma_addr_t aeqe_dma; + + struct hisi_qm_status status; + const struct hisi_qm_err_ini *err_ini; + struct hisi_qm_err_info err_info; + struct hisi_qm_err_status err_status; + unsigned long misc_ctl; /* driver removing and reset sched */ + + struct rw_semaphore qps_lock; + struct idr qp_idr; + struct hisi_qp *qp_array; + + struct mutex mailbox_lock; + + const struct hisi_qm_hw_ops *ops; + + struct qm_debug debug; + + u32 error_mask; + + struct workqueue_struct *wq; + struct work_struct work; + struct work_struct rst_work; + struct work_struct cmd_process; + + const char *algs; + bool use_sva; + bool is_frozen; + + /* doorbell isolation enable */ + bool use_db_isolation; + resource_size_t phys_base; + resource_size_t db_phys_base; + struct uacce_device *uacce; + int mode; + struct qm_shaper_factor *factor; + u32 mb_qos; + u32 type_rate; +}; + +struct hisi_qp_status { + atomic_t used; + u16 sq_tail; + u16 cq_head; + bool cqc_phase; + atomic_t flags; +}; + +struct hisi_qp_ops { + int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm); +}; + +struct hisi_qp { + u32 qp_id; + u8 alg_type; + u8 req_type; + + struct qm_dma qdma; + void *sqe; + struct qm_cqe *cqe; + dma_addr_t sqe_dma; + dma_addr_t cqe_dma; + + struct hisi_qp_status qp_status; + struct hisi_qp_ops *hw_ops; + void *qp_ctx; + void (*req_cb)(struct hisi_qp *qp, void *data); + void (*event_cb)(struct hisi_qp *qp); + + struct hisi_qm *qm; + bool is_resetting; + bool is_in_kernel; + u16 pasid; + struct uacce_queue *uacce_q; +}; + +static inline int q_num_set(const char *val, const struct kernel_param *kp, + unsigned int device) +{ + struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, + device, NULL); + u32 n, q_num; + int ret; + + if (!val) + return -EINVAL; + + if (!pdev) { + q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); + pr_info("No device found currently, suppose queue number is %u\n", + q_num); + } else { + if (pdev->revision == QM_HW_V1) + q_num = QM_QNUM_V1; + else + q_num = QM_QNUM_V2; + } + + ret = kstrtou32(val, 10, &n); + if (ret || n < QM_MIN_QNUM || n > q_num) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int vfs_num_set(const char *val, const struct kernel_param *kp) +{ + u32 n; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtou32(val, 10, &n); + if (ret < 0) + return ret; + + if (n > QM_MAX_VFS_NUM_V2) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int mode_set(const char *val, const struct kernel_param *kp) +{ + u32 n; + int ret; + + if (!val) + return -EINVAL; + + ret = kstrtou32(val, 10, &n); + if (ret != 0 || (n != UACCE_MODE_SVA && + n != UACCE_MODE_NOUACCE)) + return -EINVAL; + + return param_set_int(val, kp); +} + +static inline int uacce_mode_set(const char *val, const struct kernel_param *kp) +{ + return mode_set(val, kp); +} + +static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) +{ + INIT_LIST_HEAD(&qm_list->list); + mutex_init(&qm_list->lock); +} + +int hisi_qm_init(struct hisi_qm *qm); +void hisi_qm_uninit(struct hisi_qm *qm); +int hisi_qm_start(struct hisi_qm *qm); +int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); +struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type); +int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); +int hisi_qm_stop_qp(struct hisi_qp *qp); +void hisi_qm_release_qp(struct hisi_qp *qp); +int hisi_qp_send(struct hisi_qp *qp, const void *msg); +int hisi_qm_get_free_qp_num(struct hisi_qm *qm); +int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); +void hisi_qm_debug_init(struct hisi_qm *qm); +enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); +void hisi_qm_debug_regs_clear(struct hisi_qm *qm); +int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); +int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); +int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); +void hisi_qm_dev_err_init(struct hisi_qm *qm); +void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); +pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev); +void hisi_qm_reset_prepare(struct pci_dev *pdev); +void hisi_qm_reset_done(struct pci_dev *pdev); + +int hisi_qm_wait_mb_ready(struct hisi_qm *qm); +int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, + bool op); + +struct hisi_acc_sgl_pool; +struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, + struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool, + u32 index, dma_addr_t *hw_sgl_dma); +void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl, + struct hisi_acc_hw_sgl *hw_sgl); +struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, + u32 count, u32 sge_nr); +void hisi_acc_free_sgl_pool(struct device *dev, + struct hisi_acc_sgl_pool *pool); +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps); +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); +void hisi_qm_dev_shutdown(struct pci_dev *pdev); +void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_resume(struct device *dev); +int hisi_qm_suspend(struct device *dev); +void hisi_qm_pm_uninit(struct hisi_qm *qm); +void hisi_qm_pm_init(struct hisi_qm *qm); +int hisi_qm_get_dfx_access(struct hisi_qm *qm); +void hisi_qm_put_dfx_access(struct hisi_qm *qm); +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); + +/* Used by VFIO ACC live migration driver */ +struct pci_driver *hisi_sec_get_pf_driver(void); +struct pci_driver *hisi_hpre_get_pf_driver(void); +struct pci_driver *hisi_zip_get_pf_driver(void); +#endif diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 2fd2e91d5107..d5a6f101f843 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -9,14 +9,9 @@ #ifndef LINUX_HMM_H #define LINUX_HMM_H -#include <linux/kconfig.h> -#include <linux/pgtable.h> +#include <linux/mm.h> -#include <linux/device.h> -#include <linux/migrate.h> -#include <linux/memremap.h> -#include <linux/completion.h> -#include <linux/mmu_notifier.h> +struct mmu_interval_notifier; /* * On output: diff --git a/include/linux/hrtimer_api.h b/include/linux/hrtimer_api.h new file mode 100644 index 000000000000..8d9700894468 --- /dev/null +++ b/include/linux/hrtimer_api.h @@ -0,0 +1 @@ +#include <linux/hrtimer.h> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e4c18ba8d3bf..2999190adc22 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -183,9 +183,8 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, void prep_transhuge_page(struct page *page); void free_transhuge_page(struct page *page); -bool is_transparent_hugepage(struct page *page); -bool can_split_huge_page(struct page *page, int *pextra_pins); +bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { @@ -194,7 +193,7 @@ static inline int split_huge_page(struct page *page) void deferred_split_huge_page(struct page *page); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct page *page); + unsigned long address, bool freeze, struct folio *folio); #define split_huge_pmd(__vma, __pmd, __address) \ do { \ @@ -207,7 +206,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, - bool freeze, struct page *page); + bool freeze, struct folio *folio); void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); @@ -251,30 +250,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, } /** - * thp_order - Order of a transparent huge page. - * @page: Head page of a transparent huge page. - */ -static inline unsigned int thp_order(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - if (PageHead(page)) - return HPAGE_PMD_ORDER; - return 0; -} - -/** - * thp_nr_pages - The number of regular pages in this huge page. - * @page: The head page of a huge page. - */ -static inline int thp_nr_pages(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - if (PageHead(page)) - return HPAGE_PMD_NR; - return 1; -} - -/** * folio_test_pmd_mappable - Can we map this folio with a PMD? * @folio: The folio to test */ @@ -336,18 +311,6 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) -static inline unsigned int thp_order(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return 0; -} - -static inline int thp_nr_pages(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return 1; -} - static inline bool folio_test_pmd_mappable(struct folio *folio) { return false; @@ -377,17 +340,12 @@ static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, static inline void prep_transhuge_page(struct page *page) {} -static inline bool is_transparent_hugepage(struct page *page) -{ - return false; -} - #define transparent_hugepage_flags 0UL #define thp_get_unmapped_area NULL static inline bool -can_split_huge_page(struct page *page, int *pextra_pins) +can_split_folio(struct folio *folio, int *pextra_pins) { BUILD_BUG(); return false; @@ -406,9 +364,9 @@ static inline void deferred_split_huge_page(struct page *page) {} do { } while (0) static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct page *page) {} + unsigned long address, bool freeze, struct folio *folio) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, - unsigned long address, bool freeze, struct page *page) {} + unsigned long address, bool freeze, struct folio *folio) {} #define split_huge_pud(__vma, __pmd, __address) \ do { } while (0) @@ -483,15 +441,10 @@ static inline bool thp_migration_supported(void) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -/** - * thp_size - Size of a transparent huge page. - * @page: Head page of a transparent huge page. - * - * Return: Number of bytes in this page. - */ -static inline unsigned long thp_size(struct page *page) +static inline int split_folio_to_list(struct folio *folio, + struct list_head *list) { - return PAGE_SIZE << thp_order(page); + return split_huge_page_to_list(&folio->page, list); } #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d1897a69c540..53c1b6082a4c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -754,7 +754,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { } static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) { - return entry; + return pte_mkhuge(entry); } #endif @@ -970,6 +970,11 @@ static inline struct hstate *page_hstate(struct page *page) return NULL; } +static inline struct hstate *size_to_hstate(unsigned long size) +{ + return NULL; +} + static inline unsigned long huge_page_size(struct hstate *h) { return PAGE_SIZE; @@ -1075,12 +1080,6 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr } #endif /* CONFIG_HUGETLB_PAGE */ -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP -extern bool hugetlb_free_vmemmap_enabled; -#else -#define hugetlb_free_vmemmap_enabled false -#endif - static inline spinlock_t *huge_pte_lock(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8e6dd908da21..aa1d4da03538 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); -/** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index fad1f1df26df..eba380b76d15 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -332,12 +332,14 @@ enum hwmon_pwm_attributes { hwmon_pwm_enable, hwmon_pwm_mode, hwmon_pwm_freq, + hwmon_pwm_auto_channels_temp, }; #define HWMON_PWM_INPUT BIT(hwmon_pwm_input) #define HWMON_PWM_ENABLE BIT(hwmon_pwm_enable) #define HWMON_PWM_MODE BIT(hwmon_pwm_mode) #define HWMON_PWM_FREQ BIT(hwmon_pwm_freq) +#define HWMON_PWM_AUTO_CHANNELS_TEMP BIT(hwmon_pwm_auto_channels_temp) enum hwmon_intrusion_attributes { hwmon_intrusion_alarm, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f565a8938836..fe2e0179ed51 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1262,6 +1262,7 @@ struct hv_device { struct vmbus_channel *channel; struct kset *channels_kset; struct device_dma_parameters dma_parms; + u64 dma_mask; /* place holder to keep track of the dir for hv device in debugfs */ struct dentry *debug_dir; diff --git a/include/linux/i3c/ccc.h b/include/linux/i3c/ccc.h index 73b0982cc519..ad59a4ae60d1 100644 --- a/include/linux/i3c/ccc.h +++ b/include/linux/i3c/ccc.h @@ -132,7 +132,7 @@ struct i3c_ccc_dev_desc { struct i3c_ccc_defslvs { u8 count; struct i3c_ccc_dev_desc master; - struct i3c_ccc_dev_desc slaves[0]; + struct i3c_ccc_dev_desc slaves[]; } __packed; /** @@ -240,7 +240,7 @@ struct i3c_ccc_bridged_slave_desc { */ struct i3c_ccc_setbrgtgt { u8 count; - struct i3c_ccc_bridged_slave_desc bslaves[0]; + struct i3c_ccc_bridged_slave_desc bslaves[]; } __packed; /** @@ -318,7 +318,7 @@ enum i3c_ccc_setxtime_subcmd { */ struct i3c_ccc_setxtime { u8 subcmd; - u8 data[0]; + u8 data[]; } __packed; #define I3C_CCC_GETXTIME_SYNC_MODE BIT(0) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 559b6c644938..75d40acb60c1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2021 Intel Corporation + * Copyright (c) 2018 - 2022 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -18,6 +18,7 @@ #include <linux/types.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> +#include <linux/bitfield.h> #include <asm/byteorder.h> #include <asm/unaligned.h> @@ -1023,6 +1024,8 @@ struct ieee80211_tpc_report_ie { #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK GENMASK(2, 1) #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_SHIFT 1 #define IEEE80211_ADDBA_EXT_NO_FRAG BIT(0) +#define IEEE80211_ADDBA_EXT_BUF_SIZE_MASK GENMASK(7, 5) +#define IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT 10 struct ieee80211_addba_ext_ie { u8 data; @@ -1697,10 +1700,12 @@ struct ieee80211_ht_operation { * A-MPDU buffer sizes * According to HT size varies from 8 to 64 frames * HE adds the ability to have up to 256 frames. + * EHT adds the ability to have up to 1K frames. */ #define IEEE80211_MIN_AMPDU_BUF 0x8 #define IEEE80211_MAX_AMPDU_BUF_HT 0x40 -#define IEEE80211_MAX_AMPDU_BUF 0x100 +#define IEEE80211_MAX_AMPDU_BUF_HE 0x100 +#define IEEE80211_MAX_AMPDU_BUF_EHT 0x400 /* Spatial Multiplexing Power Save Modes (for capability) */ @@ -1925,6 +1930,111 @@ struct ieee80211_mu_edca_param_set { struct ieee80211_he_mu_edca_param_ac_rec ac_vo; } __packed; +#define IEEE80211_EHT_MCS_NSS_RX 0x0f +#define IEEE80211_EHT_MCS_NSS_TX 0xf0 + +/** + * struct ieee80211_eht_mcs_nss_supp_20mhz_only - EHT 20MHz only station max + * supported NSS for per MCS. + * + * For each field below, bits 0 - 3 indicate the maximal number of spatial + * streams for Rx, and bits 4 - 7 indicate the maximal number of spatial streams + * for Tx. + * + * @rx_tx_mcs7_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 0 - 7. + * @rx_tx_mcs9_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 8 - 9. + * @rx_tx_mcs11_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 10 - 11. + * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 12 - 13. + */ +struct ieee80211_eht_mcs_nss_supp_20mhz_only { + u8 rx_tx_mcs7_max_nss; + u8 rx_tx_mcs9_max_nss; + u8 rx_tx_mcs11_max_nss; + u8 rx_tx_mcs13_max_nss; +}; + +/** + * struct ieee80211_eht_mcs_nss_supp_bw - EHT max supported NSS per MCS (except + * 20MHz only stations). + * + * For each field below, bits 0 - 3 indicate the maximal number of spatial + * streams for Rx, and bits 4 - 7 indicate the maximal number of spatial streams + * for Tx. + * + * @rx_tx_mcs9_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 0 - 9. + * @rx_tx_mcs11_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 10 - 11. + * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams + * supported for reception and the maximum number of spatial streams + * supported for transmission for MCS 12 - 13. + */ +struct ieee80211_eht_mcs_nss_supp_bw { + u8 rx_tx_mcs9_max_nss; + u8 rx_tx_mcs11_max_nss; + u8 rx_tx_mcs13_max_nss; +}; + +/** + * struct ieee80211_eht_cap_elem_fixed - EHT capabilities fixed data + * + * This structure is the "EHT Capabilities element" fixed fields as + * described in P802.11be_D1.4 section 9.4.2.313. + * + * @mac_cap_info: MAC capabilities, see IEEE80211_EHT_MAC_CAP* + * @phy_cap_info: PHY capabilities, see IEEE80211_EHT_PHY_CAP* + */ +struct ieee80211_eht_cap_elem_fixed { + u8 mac_cap_info[2]; + u8 phy_cap_info[9]; +} __packed; + +/** + * struct ieee80211_eht_cap_elem - EHT capabilities element + * @fixed: fixed parts, see &ieee80211_eht_cap_elem_fixed + * @optional: optional parts + */ +struct ieee80211_eht_cap_elem { + struct ieee80211_eht_cap_elem_fixed fixed; + + /* + * Followed by: + * Supported EHT-MCS And NSS Set field: 4, 3, 6 or 9 octets. + * EHT PPE Thresholds field: variable length. + */ + u8 optional[]; +} __packed; + +/** + * struct ieee80211_eht_operation - eht operation element + * + * This structure is the "EHT Operation Element" fields as + * described in P802.11be_D1.4 section 9.4.2.311 + * + * FIXME: The spec is unclear how big the fields are, and doesn't + * indicate the "Disabled Subchannel Bitmap Present" in the + * structure (Figure 9-1002a) at all ... + */ +struct ieee80211_eht_operation { + u8 chan_width; + u8 ccfs; + u8 present_bm; + + u8 disable_subchannel_bitmap[]; +} __packed; + +#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x1 + /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 0x00000001 @@ -2129,6 +2239,8 @@ enum ieee80211_client_reg_power { #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G 0x04 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G 0x08 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G 0x10 +#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL 0x1e + #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G 0x20 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G 0x40 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK 0xfe @@ -2309,6 +2421,7 @@ ieee80211_he_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap) #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK 0x78 #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS (3) #define IEEE80211_PPE_THRES_INFO_PPET_SIZE (3) +#define IEEE80211_HE_PPE_THRES_INFO_HEADER_SIZE (7) /* * Calculate 802.11ax HE capabilities IE PPE field size @@ -2338,6 +2451,29 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) return n; } +static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_he_cap_elem *he_cap_ie_elem = (const void *)data; + u8 needed = sizeof(*he_cap_ie_elem); + + if (len < needed) + return false; + + needed += ieee80211_he_mcs_nss_size(he_cap_ie_elem); + if (len < needed) + return false; + + if (he_cap_ie_elem->phy_cap_info[6] & + IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) { + if (len < needed + 1) + return false; + needed += ieee80211_he_ppe_size(data[needed], + he_cap_ie_elem->phy_cap_info); + } + + return len >= needed; +} + /* HE Operation defines */ #define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK 0x00000007 #define IEEE80211_HE_OPERATION_TWT_REQUIRED 0x00000008 @@ -2427,7 +2563,7 @@ struct ieee80211_tx_pwr_env { static inline u8 ieee80211_he_oper_size(const u8 *he_oper_ie) { - struct ieee80211_he_operation *he_oper = (void *)he_oper_ie; + const struct ieee80211_he_operation *he_oper = (const void *)he_oper_ie; u8 oper_len = sizeof(struct ieee80211_he_operation); u32 he_oper_params; @@ -2460,7 +2596,7 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) static inline const struct ieee80211_he_6ghz_oper * ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) { - const u8 *ret = (void *)&he_oper->optional; + const u8 *ret = (const void *)&he_oper->optional; u32 he_oper_params; if (!he_oper) @@ -2475,7 +2611,7 @@ ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) ret++; - return (void *)ret; + return (const void *)ret; } /* HE Spatial Reuse defines */ @@ -2496,7 +2632,7 @@ ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) static inline u8 ieee80211_he_spr_size(const u8 *he_spr_ie) { - struct ieee80211_he_spr *he_spr = (void *)he_spr_ie; + const struct ieee80211_he_spr *he_spr = (const void *)he_spr_ie; u8 spr_len = sizeof(struct ieee80211_he_spr); u8 he_spr_params; @@ -2599,6 +2735,194 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ BIT(0) #define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) +/* EHT MAC capabilities as defined in P802.11be_D1.4 section 9.4.2.313.2 */ +#define IEEE80211_EHT_MAC_CAP0_NSEP_PRIO_ACCESS 0x01 +#define IEEE80211_EHT_MAC_CAP0_OM_CONTROL 0x02 +#define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 0x04 +#define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 0x08 +#define IEEE80211_EHT_MAC_CAP0_RESTRICTED_TWT 0x10 +#define IEEE80211_EHT_MAC_CAP0_SCS_TRAFFIC_DESC 0x20 +#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_MASK 0xc0 +#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_3895 0 +#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_7991 1 +#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_11454 2 + +/* EHT PHY capabilities as defined in P802.11be_D1.4 section 9.4.2.313.3 */ +#define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 +#define IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ 0x04 +#define IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI 0x08 +#define IEEE80211_EHT_PHY_CAP0_PARTIAL_BW_UL_MU_MIMO 0x10 +#define IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER 0x20 +#define IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE 0x40 + +/* EHT beamformee number of spatial streams <= 80MHz is split */ +#define IEEE80211_EHT_PHY_CAP0_BEAMFORMEE_SS_80MHZ_MASK 0x80 +#define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_80MHZ_MASK 0x03 + +#define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK 0x1c +#define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK 0xe0 + +#define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_80MHZ_MASK 0x07 +#define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK 0x38 + +/* EHT number of sounding dimensions for 320MHz is split */ +#define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK 0xc0 +#define IEEE80211_EHT_PHY_CAP3_SOUNDING_DIM_320MHZ_MASK 0x01 +#define IEEE80211_EHT_PHY_CAP3_NG_16_SU_FEEDBACK 0x02 +#define IEEE80211_EHT_PHY_CAP3_NG_16_MU_FEEDBACK 0x04 +#define IEEE80211_EHT_PHY_CAP3_CODEBOOK_4_2_SU_FDBK 0x08 +#define IEEE80211_EHT_PHY_CAP3_CODEBOOK_7_5_MU_FDBK 0x10 +#define IEEE80211_EHT_PHY_CAP3_TRIG_SU_BF_FDBK 0x20 +#define IEEE80211_EHT_PHY_CAP3_TRIG_MU_BF_PART_BW_FDBK 0x40 +#define IEEE80211_EHT_PHY_CAP3_TRIG_CQI_FDBK 0x80 + +#define IEEE80211_EHT_PHY_CAP4_PART_BW_DL_MU_MIMO 0x01 +#define IEEE80211_EHT_PHY_CAP4_PSR_SR_SUPP 0x02 +#define IEEE80211_EHT_PHY_CAP4_POWER_BOOST_FACT_SUPP 0x04 +#define IEEE80211_EHT_PHY_CAP4_EHT_MU_PPDU_4_EHT_LTF_08_GI 0x08 +#define IEEE80211_EHT_PHY_CAP4_MAX_NC_MASK 0xf0 + +#define IEEE80211_EHT_PHY_CAP5_NON_TRIG_CQI_FEEDBACK 0x01 +#define IEEE80211_EHT_PHY_CAP5_TX_LESS_242_TONE_RU_SUPP 0x02 +#define IEEE80211_EHT_PHY_CAP5_RX_LESS_242_TONE_RU_SUPP 0x04 +#define IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT 0x08 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK 0x30 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_0US 0 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_8US 1 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_16US 2 +#define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_20US 3 + +/* Maximum number of supported EHT LTF is split */ +#define IEEE80211_EHT_PHY_CAP5_MAX_NUM_SUPP_EHT_LTF_MASK 0xc0 +#define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 + +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 +#define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 + +#define IEEE80211_EHT_PHY_CAP7_20MHZ_STA_RX_NDP_WIDER_BW 0x01 +#define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ 0x02 +#define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ 0x04 +#define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ 0x08 +#define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ 0x10 +#define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ 0x20 +#define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ 0x40 +#define IEEE80211_EHT_PHY_CAP7_TB_SOUNDING_FDBK_RATE_LIMIT 0x80 + +#define IEEE80211_EHT_PHY_CAP8_RX_1024QAM_WIDER_BW_DL_OFDMA 0x01 +#define IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA 0x02 + +/* + * EHT operation channel width as defined in P802.11be_D1.4 section 9.4.2.311 + */ +#define IEEE80211_EHT_OPER_CHAN_WIDTH 0x7 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ 0 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_40MHZ 1 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ 2 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ 3 +#define IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ 4 + +/* Calculate 802.11be EHT capabilities IE Tx/Rx EHT MCS NSS Support Field size */ +static inline u8 +ieee80211_eht_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap, + const struct ieee80211_eht_cap_elem_fixed *eht_cap) +{ + u8 count = 0; + + /* on 2.4 GHz, if it supports 40 MHz, the result is 3 */ + if (he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) + return 3; + + /* on 2.4 GHz, these three bits are reserved, so should be 0 */ + if (he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G) + count += 3; + + if (he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) + count += 3; + + if (eht_cap->phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) + count += 3; + + return count ? count : 4; +} + +/* 802.11be EHT PPE Thresholds */ +#define IEEE80211_EHT_PPE_THRES_NSS_POS 0 +#define IEEE80211_EHT_PPE_THRES_NSS_MASK 0xf +#define IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK 0x1f0 +#define IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE 3 +#define IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE 9 + +/* + * Calculate 802.11be EHT capabilities IE EHT field size + */ +static inline u8 +ieee80211_eht_ppe_size(u16 ppe_thres_hdr, const u8 *phy_cap_info) +{ + u32 n; + + if (!(phy_cap_info[5] & + IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT)) + return 0; + + n = hweight16(ppe_thres_hdr & + IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK); + n *= 1 + u16_get_bits(ppe_thres_hdr, IEEE80211_EHT_PPE_THRES_NSS_MASK); + + /* + * Each pair is 6 bits, and we need to add the 9 "header" bits to the + * total size. + */ + n = n * IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE * 2 + + IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE; + return DIV_ROUND_UP(n, 8); +} + +static inline bool +ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len) +{ + const struct ieee80211_eht_cap_elem_fixed *elem = (const void *)data; + u8 needed = sizeof(struct ieee80211_eht_cap_elem_fixed); + + if (len < needed || !he_capa) + return false; + + needed += ieee80211_eht_mcs_nss_size((const void *)he_capa, + (const void *)data); + if (len < needed) + return false; + + if (elem->phy_cap_info[5] & + IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT) { + u16 ppe_thres_hdr; + + if (len < needed + sizeof(ppe_thres_hdr)) + return false; + + ppe_thres_hdr = get_unaligned_le16(data + needed); + needed += ieee80211_eht_ppe_size(ppe_thres_hdr, + elem->phy_cap_info); + } + + return len >= needed; +} + +static inline bool +ieee80211_eht_oper_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_eht_operation *elem = (const void *)data; + u8 needed = sizeof(*elem); + + if (len < needed) + return false; + + if (elem->present_bm & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) + needed += 2; + + return len >= needed; +} #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) @@ -3054,6 +3378,9 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_SHORT_SSID_LIST = 58, WLAN_EID_EXT_HE_6GHZ_CAPA = 59, WLAN_EID_EXT_UL_MU_POWER_CAPA = 60, + WLAN_EID_EXT_EHT_OPERATION = 106, + WLAN_EID_EXT_EHT_MULTI_LINK = 107, + WLAN_EID_EXT_EHT_CAPABILITY = 108, }; /* Action category code */ @@ -4005,10 +4332,10 @@ static inline bool for_each_element_completed(const struct element *element, #define IEEE80211_RNR_TBTT_PARAMS_COLOC_AP 0x40 struct ieee80211_neighbor_ap_info { - u8 tbtt_info_hdr; - u8 tbtt_info_len; - u8 op_class; - u8 channel; + u8 tbtt_info_hdr; + u8 tbtt_info_len; + u8 op_class; + u8 channel; } __packed; enum ieee80211_range_params_max_total_ltf { diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index b712217f7030..1ed52441972f 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_VOID: case ARPHRD_NONE: case ARPHRD_RAWIP: + case ARPHRD_PIMREG: return false; default: return true; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 509e18c7e740..d62ef428e3aa 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -58,6 +58,7 @@ struct br_ip_list { #define BR_MRP_LOST_CONT BIT(18) #define BR_MRP_LOST_IN_CONT BIT(19) #define BR_TX_FWD_OFFLOAD BIT(20) +#define BR_PORT_LOCKED BIT(21) #define BR_DEFAULT_AGEING_TIME (300 * HZ) @@ -118,6 +119,9 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); +bool br_mst_enabled(const struct net_device *dev); +int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids); +int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state); #else static inline bool br_vlan_enabled(const struct net_device *dev) { @@ -150,6 +154,22 @@ static inline int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, { return -EINVAL; } + +static inline bool br_mst_enabled(const struct net_device *dev) +{ + return false; +} + +static inline int br_mst_get_info(const struct net_device *dev, u16 msti, + unsigned long *vids) +{ + return -EINVAL; +} +static inline int br_mst_get_state(const struct net_device *dev, u16 msti, + u8 *state) +{ + return -EINVAL; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h index 38bbc537d4e4..408539d5ea5f 100644 --- a/include/linux/if_hsr.h +++ b/include/linux/if_hsr.h @@ -9,6 +9,22 @@ enum hsr_version { PRP_V1, }; +/* HSR Tag. + * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, + * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, + * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr, + * encapsulated protocol } instead. + * + * Field names as defined in the IEC:2010 standard for HSR. + */ +struct hsr_tag { + __be16 path_and_LSDU_size; + __be16 sequence_nr; + __be16 encap_proto; +} __packed; + +#define HSR_HLEN 6 + #if IS_ENABLED(CONFIG_HSR) extern bool is_hsr_master(struct net_device *dev); extern int hsr_get_version(struct net_device *dev, enum hsr_version *ver); diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 10c94a3936ca..b42294739063 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -21,6 +21,7 @@ struct macvlan_dev { struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; + netdevice_tracker dev_tracker; void *accel_priv; struct vlan_pcpu_stats __percpu *pcpu_stats; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8420fe504927..2be4dd7e90a9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -46,8 +46,10 @@ struct vlan_hdr { * @h_vlan_encapsulated_proto: packet type ID or len */ struct vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; + struct_group(addrs, + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + ); __be16 h_vlan_proto; __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 674aeead6260..ead323243e7b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -150,6 +150,7 @@ struct in_ifaddr { __be32 ifa_broadcast; unsigned char ifa_scope; unsigned char ifa_prefixlen; + unsigned char ifa_proto; __u32 ifa_flags; char ifa_label[IFNAMSIZ]; diff --git a/include/linux/init.h b/include/linux/init.h index d82b4b2e1d25..baf0b29a7010 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -320,12 +320,19 @@ struct obs_kernel_param { __aligned(__alignof__(struct obs_kernel_param)) \ = { __setup_str_##unique_id, fn, early } +/* + * NOTE: __setup functions return values: + * @fn returns 1 (or non-zero) if the option argument is "handled" + * and returns 0 if the option argument is "not handled". + */ #define __setup(str, fn) \ __setup_param(str, fn, fn, 0) /* - * NOTE: fn is as per module_param, not __setup! - * Emits warning if fn returns non-zero. + * NOTE: @fn is as per module_param, not __setup! + * I.e., @fn returns 0 for no error or non-zero for error + * (possibly @fn returns a -errno value, but it does not matter). + * Emits warning if @fn returns non-zero. */ #define early_param(str, fn) \ __setup_param(str, fn, fn, 1) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 69230fd695ea..2f9891cb3d00 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -525,12 +525,6 @@ struct context_entry { */ #define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) -/* - * Domain represents a virtual machine which demands iommu nested - * translation mode support. - */ -#define DOMAIN_FLAG_NESTING_MODE BIT(2) - struct dmar_domain { int nid; /* node id */ @@ -548,7 +542,6 @@ struct dmar_domain { u8 iommu_snooping: 1; /* indicate snooping control feature */ struct list_head devices; /* all devices' list */ - struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -563,11 +556,6 @@ struct dmar_domain { 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ u64 max_addr; /* maximum mapped address */ - u32 default_pasid; /* - * The default pasid used for non-SVM - * traffic on mediated devices. - */ - struct iommu_domain domain; /* generic domain data structure for iommu core */ }; @@ -590,7 +578,6 @@ struct intel_iommu { #ifdef CONFIG_INTEL_IOMMU unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain ***domains; /* ptr to domains */ spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ @@ -620,21 +607,11 @@ struct intel_iommu { void *perf_statistic; }; -/* Per subdevice private data */ -struct subdev_domain_info { - struct list_head link_phys; /* link to phys device siblings */ - struct list_head link_domain; /* link to domain siblings */ - struct device *pdev; /* physical device derived from */ - struct dmar_domain *domain; /* aux-domain */ - int users; /* user count */ -}; - /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ - struct list_head subdevices; /* subdevices sibling */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ @@ -645,7 +622,6 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; - u8 auxd_enabled:1; /* Multiple domains per device */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -717,7 +693,6 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte) } extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void dmar_disable_qi(struct intel_iommu *iommu); @@ -757,17 +732,12 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); void iommu_flush_write_buffer(struct intel_iommu *iommu); int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); -struct dmar_domain *find_domain(struct device *dev); -struct device_domain_info *get_domain_info(struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); #ifdef CONFIG_INTEL_IOMMU_SVM extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, - struct iommu_gpasid_bind_data *data); -int intel_svm_unbind_gpasid(struct device *dev, u32 pasid); struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void intel_svm_unbind(struct iommu_sva *handle); @@ -795,7 +765,6 @@ struct intel_svm { unsigned int flags; u32 pasid; - int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; }; #else @@ -813,6 +782,8 @@ bool context_present(struct context_entry *context); struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u8 devfn, int alloc); +extern const struct iommu_ops intel_iommu_ops; + #ifdef CONFIG_INTEL_IOMMU extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 1b73bab7eeff..b3b125b332aa 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -25,17 +25,5 @@ * do such IOTLB flushes automatically. */ #define SVM_FLAG_SUPERVISOR_MODE BIT(0) -/* - * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest - * processes. Compared to the host bind, the primary differences are: - * 1. mm life cycle management - * 2. fault reporting - */ -#define SVM_FLAG_GUEST_MODE BIT(1) -/* - * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, - * which requires guest and host PASID translation at both directions. - */ -#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9367f1cb2e3c..f40754caaefa 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -579,7 +579,16 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * The following vectors can be safely ignored after ksoftirqd is parked: + * + * _ RCU: + * 1) rcutree_migrate_callbacks() migrates the queue. + * 2) rcu_report_dead() reports the final quiescent states. + * + * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 649a4d7c241b..1814e698d861 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -9,11 +9,14 @@ struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); +void io_uring_unreg_ringfd(void); static inline void io_uring_files_cancel(void) { - if (current->io_uring) + if (current->io_uring) { + io_uring_unreg_ringfd(); __io_uring_cancel(false); + } } static inline void io_uring_task_cancel(void) { diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index e9dacd4b9f6b..af1c9d62e642 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,13 +34,16 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_get(ioasid_t ioasid); -bool ioasid_put(ioasid_t ioasid); +void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -49,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_get(ioasid_t ioasid) -{ -} - -static inline bool ioasid_put(ioasid_t ioasid) -{ - return false; -} +static inline void ioasid_free(ioasid_t ioasid) { } static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) @@ -78,5 +74,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return false; +} + #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b55bd49e55f5..b76f0dd149fb 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -227,12 +227,9 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); -int iomap_is_partially_uptodate(struct page *page, unsigned long from, - unsigned long count); +bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); int iomap_releasepage(struct page *page, gfp_t gfp_mask); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); -void iomap_invalidatepage(struct page *page, unsigned int offset, - unsigned int len); #ifdef CONFIG_MIGRATION int iomap_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); @@ -263,9 +260,11 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ + u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ + sector_t io_sector; /* start sector of ioend */ struct bio *io_bio; /* bio being built */ struct bio io_inline_bio; /* MUST BE LAST! */ }; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index de0c57a567c8..9208eca4b0d1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -37,6 +37,7 @@ struct iommu_group; struct bus_type; struct device; struct iommu_domain; +struct iommu_domain_ops; struct notifier_block; struct iommu_sva; struct iommu_fault_event; @@ -88,7 +89,7 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; - const struct iommu_ops *ops; + const struct iommu_domain_ops *ops; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ iommu_fault_handler_t handler; void *handler_token; @@ -144,7 +145,6 @@ struct iommu_resv_region { /** * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_AUX: Auxiliary domain feature * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally * enabling %IOMMU_DEV_FEAT_SVA requires @@ -157,7 +157,6 @@ struct iommu_resv_region { * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature(). */ enum iommu_dev_features { - IOMMU_DEV_FEAT_AUX, IOMMU_DEV_FEAT_SVA, IOMMU_DEV_FEAT_IOPF, }; @@ -194,48 +193,28 @@ struct iommu_iotlb_gather { * struct iommu_ops - iommu ops and capabilities * @capable: check capability * @domain_alloc: allocate iommu domain - * @domain_free: free iommu domain - * @attach_dev: attach device to an iommu domain - * @detach_dev: detach device from an iommu domain - * @map: map a physically contiguous memory region to an iommu domain - * @map_pages: map a physically contiguous set of pages of the same size to - * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain - * @unmap_pages: unmap a number of pages of the same size from an iommu domain - * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain - * @iotlb_sync_map: Sync mappings created recently using @map to the hardware - * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush - * queue - * @iova_to_phys: translate iova to physical address * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain * @device_group: find iommu group for a particular device - * @enable_nesting: Enable nesting - * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device - * @apply_resv_region: Temporary helper call-back for iova reserved ranges * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) * @dev_has/enable/disable_feat: per device entries to check/enable/disable * iommu specific features. * @dev_feat_enabled: check enabled feature - * @aux_attach/detach_dev: aux-domain specific attach/detach entries. - * @aux_get_pasid: get the pasid given an aux-domain * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle * @page_response: handle page request response - * @cache_invalidate: invalidate translation caches - * @sva_bind_gpasid: bind guest pasid and mm - * @sva_unbind_gpasid: unbind guest pasid and mm * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting + * @default_domain_ops: the default ops for domains * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops */ @@ -244,43 +223,18 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); - void (*domain_free)(struct iommu_domain *); - int (*attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); - int (*map_pages)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t pgsize, size_t pgcount, - int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); - size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, - size_t pgsize, size_t pgcount, - struct iommu_iotlb_gather *iotlb_gather); - void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); - void (*iotlb_sync)(struct iommu_domain *domain, - struct iommu_iotlb_gather *iotlb_gather); - phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); - int (*enable_nesting)(struct iommu_domain *domain); - int (*set_pgtable_quirks)(struct iommu_domain *domain, - unsigned long quirks); /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); void (*put_resv_regions)(struct device *dev, struct list_head *list); - void (*apply_resv_region)(struct device *dev, - struct iommu_domain *domain, - struct iommu_resv_region *region); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); - bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); + bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); @@ -288,11 +242,6 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - /* Aux-domain specific attach/detach entries */ - int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, void *drvdata); void (*sva_unbind)(struct iommu_sva *handle); @@ -301,20 +250,64 @@ struct iommu_ops { int (*page_response)(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); - int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev, - struct iommu_cache_invalidate_info *inv_info); - int (*sva_bind_gpasid)(struct iommu_domain *domain, - struct device *dev, struct iommu_gpasid_bind_data *data); - - int (*sva_unbind_gpasid)(struct device *dev, u32 pasid); int (*def_domain_type)(struct device *dev); + const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; }; /** + * struct iommu_domain_ops - domain specific operations + * @attach_dev: attach an iommu domain to a device + * @detach_dev: detach an iommu domain from a device + * @map: map a physically contiguous memory region to an iommu domain + * @map_pages: map a physically contiguous set of pages of the same size to + * an iommu domain. + * @unmap: unmap a physically contiguous memory region from an iommu domain + * @unmap_pages: unmap a number of pages of the same size from an iommu domain + * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain + * @iotlb_sync_map: Sync mappings created recently using @map to the hardware + * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * queue + * @iova_to_phys: translate iova to physical address + * @enable_nesting: Enable nesting + * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) + * @free: Release the domain after use. + */ +struct iommu_domain_ops { + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + + int (*map)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + int (*map_pages)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped); + size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *iotlb_gather); + size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather); + + void (*flush_iotlb_all)(struct iommu_domain *domain); + void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); + void (*iotlb_sync)(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather); + + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, + dma_addr_t iova); + + int (*enable_nesting)(struct iommu_domain *domain); + int (*set_pgtable_quirks)(struct iommu_domain *domain, + unsigned long quirks); + + void (*free)(struct iommu_domain *domain); +}; + +/** * struct iommu_device - IOMMU core representation of one IOMMU hardware * instance * @list: Used by the iommu-core to keep a list of registered iommus @@ -403,6 +396,17 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } +static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) +{ + /* + * Assume that valid ops must be installed if iommu_probe_device() + * has succeeded. The device ops are essentially for internal use + * within the IOMMU subsystem itself, so we should be able to trust + * ourselves not to misuse the helper. + */ + return dev->iommu->iommu_dev->ops; +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -421,14 +425,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - void __user *uinfo); - -extern int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); -extern int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata); extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); @@ -672,9 +668,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); -int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); -void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); -int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, @@ -1019,23 +1012,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) return -ENODEV; } -static inline int -iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) -{ - return -ENODEV; -} - -static inline void -iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) -{ -} - -static inline int -iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) -{ - return -ENODEV; -} - static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) { @@ -1051,33 +1027,6 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) return IOMMU_PASID_INVALID; } -static inline int -iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - struct iommu_cache_invalidate_info *inv_info) -{ - return -ENODEV; -} - -static inline int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) -{ - return -ENODEV; -} - -static inline int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, void __user *udata) -{ - return -ENODEV; -} - -static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, - ioasid_t pasid) -{ - return -ENODEV; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h new file mode 100644 index 000000000000..e69a002d5aa4 --- /dev/null +++ b/include/linux/iosys-map.h @@ -0,0 +1,459 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Pointer abstraction for IO/system memory + */ + +#ifndef __IOSYS_MAP_H__ +#define __IOSYS_MAP_H__ + +#include <linux/io.h> +#include <linux/string.h> + +/** + * DOC: overview + * + * When accessing a memory region, depending on its location, users may have to + * access it with I/O operations or memory load/store operations. For example, + * copying to system memory could be done with memcpy(), copying to I/O memory + * would be done with memcpy_toio(). + * + * .. code-block:: c + * + * void *vaddr = ...; // pointer to system memory + * memcpy(vaddr, src, len); + * + * void *vaddr_iomem = ...; // pointer to I/O memory + * memcpy_toio(vaddr, _iomem, src, len); + * + * The user of such pointer may not have information about the mapping of that + * region or may want to have a single code path to handle operations on that + * buffer, regardless if it's located in system or IO memory. The type + * :c:type:`struct iosys_map <iosys_map>` and its helpers abstract that so the + * buffer can be passed around to other drivers or have separate duties inside + * the same driver for allocation, read and write operations. + * + * Open-coding access to :c:type:`struct iosys_map <iosys_map>` is considered + * bad style. Rather then accessing its fields directly, use one of the provided + * helper functions, or implement your own. For example, instances of + * :c:type:`struct iosys_map <iosys_map>` can be initialized statically with + * IOSYS_MAP_INIT_VADDR(), or at runtime with iosys_map_set_vaddr(). These + * helpers will set an address in system memory. + * + * .. code-block:: c + * + * struct iosys_map map = IOSYS_MAP_INIT_VADDR(0xdeadbeaf); + * + * iosys_map_set_vaddr(&map, 0xdeadbeaf); + * + * To set an address in I/O memory, use iosys_map_set_vaddr_iomem(). + * + * .. code-block:: c + * + * iosys_map_set_vaddr_iomem(&map, 0xdeadbeaf); + * + * Instances of struct iosys_map do not have to be cleaned up, but + * can be cleared to NULL with iosys_map_clear(). Cleared mappings + * always refer to system memory. + * + * .. code-block:: c + * + * iosys_map_clear(&map); + * + * Test if a mapping is valid with either iosys_map_is_set() or + * iosys_map_is_null(). + * + * .. code-block:: c + * + * if (iosys_map_is_set(&map) != iosys_map_is_null(&map)) + * // always true + * + * Instances of :c:type:`struct iosys_map <iosys_map>` can be compared for + * equality with iosys_map_is_equal(). Mappings that point to different memory + * spaces, system or I/O, are never equal. That's even true if both spaces are + * located in the same address space, both mappings contain the same address + * value, or both mappings refer to NULL. + * + * .. code-block:: c + * + * struct iosys_map sys_map; // refers to system memory + * struct iosys_map io_map; // refers to I/O memory + * + * if (iosys_map_is_equal(&sys_map, &io_map)) + * // always false + * + * A set up instance of struct iosys_map can be used to access or manipulate the + * buffer memory. Depending on the location of the memory, the provided helpers + * will pick the correct operations. Data can be copied into the memory with + * iosys_map_memcpy_to(). The address can be manipulated with iosys_map_incr(). + * + * .. code-block:: c + * + * const void *src = ...; // source buffer + * size_t len = ...; // length of src + * + * iosys_map_memcpy_to(&map, src, len); + * iosys_map_incr(&map, len); // go to first byte after the memcpy + */ + +/** + * struct iosys_map - Pointer to IO/system memory + * @vaddr_iomem: The buffer's address if in I/O memory + * @vaddr: The buffer's address if in system memory + * @is_iomem: True if the buffer is located in I/O memory, or false + * otherwise. + */ +struct iosys_map { + union { + void __iomem *vaddr_iomem; + void *vaddr; + }; + bool is_iomem; +}; + +/** + * IOSYS_MAP_INIT_VADDR - Initializes struct iosys_map to an address in system memory + * @vaddr_: A system-memory address + */ +#define IOSYS_MAP_INIT_VADDR(vaddr_) \ + { \ + .vaddr = (vaddr_), \ + .is_iomem = false, \ + } + +/** + * IOSYS_MAP_INIT_OFFSET - Initializes struct iosys_map from another iosys_map + * @map_: The dma-buf mapping structure to copy from + * @offset_: Offset to add to the other mapping + * + * Initializes a new iosys_map struct based on another passed as argument. It + * does a shallow copy of the struct so it's possible to update the back storage + * without changing where the original map points to. It is the equivalent of + * doing: + * + * .. code-block:: c + * + * iosys_map map = other_map; + * iosys_map_incr(&map, &offset); + * + * Example usage: + * + * .. code-block:: c + * + * void foo(struct device *dev, struct iosys_map *base_map) + * { + * ... + * struct iosys_map map = IOSYS_MAP_INIT_OFFSET(base_map, FIELD_OFFSET); + * ... + * } + * + * The advantage of using the initializer over just increasing the offset with + * iosys_map_incr() like above is that the new map will always point to the + * right place of the buffer during its scope. It reduces the risk of updating + * the wrong part of the buffer and having no compiler warning about that. If + * the assignment to IOSYS_MAP_INIT_OFFSET() is forgotten, the compiler can warn + * about the use of uninitialized variable. + */ +#define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({ \ + struct iosys_map copy = *map_; \ + iosys_map_incr(©, offset_); \ + copy; \ +}) + +/** + * iosys_map_set_vaddr - Sets a iosys mapping structure to an address in system memory + * @map: The iosys_map structure + * @vaddr: A system-memory address + * + * Sets the address and clears the I/O-memory flag. + */ +static inline void iosys_map_set_vaddr(struct iosys_map *map, void *vaddr) +{ + map->vaddr = vaddr; + map->is_iomem = false; +} + +/** + * iosys_map_set_vaddr_iomem - Sets a iosys mapping structure to an address in I/O memory + * @map: The iosys_map structure + * @vaddr_iomem: An I/O-memory address + * + * Sets the address and the I/O-memory flag. + */ +static inline void iosys_map_set_vaddr_iomem(struct iosys_map *map, + void __iomem *vaddr_iomem) +{ + map->vaddr_iomem = vaddr_iomem; + map->is_iomem = true; +} + +/** + * iosys_map_is_equal - Compares two iosys mapping structures for equality + * @lhs: The iosys_map structure + * @rhs: A iosys_map structure to compare with + * + * Two iosys mapping structures are equal if they both refer to the same type of memory + * and to the same address within that memory. + * + * Returns: + * True is both structures are equal, or false otherwise. + */ +static inline bool iosys_map_is_equal(const struct iosys_map *lhs, + const struct iosys_map *rhs) +{ + if (lhs->is_iomem != rhs->is_iomem) + return false; + else if (lhs->is_iomem) + return lhs->vaddr_iomem == rhs->vaddr_iomem; + else + return lhs->vaddr == rhs->vaddr; +} + +/** + * iosys_map_is_null - Tests for a iosys mapping to be NULL + * @map: The iosys_map structure + * + * Depending on the state of struct iosys_map.is_iomem, tests if the + * mapping is NULL. + * + * Returns: + * True if the mapping is NULL, or false otherwise. + */ +static inline bool iosys_map_is_null(const struct iosys_map *map) +{ + if (map->is_iomem) + return !map->vaddr_iomem; + return !map->vaddr; +} + +/** + * iosys_map_is_set - Tests if the iosys mapping has been set + * @map: The iosys_map structure + * + * Depending on the state of struct iosys_map.is_iomem, tests if the + * mapping has been set. + * + * Returns: + * True if the mapping is been set, or false otherwise. + */ +static inline bool iosys_map_is_set(const struct iosys_map *map) +{ + return !iosys_map_is_null(map); +} + +/** + * iosys_map_clear - Clears a iosys mapping structure + * @map: The iosys_map structure + * + * Clears all fields to zero, including struct iosys_map.is_iomem, so + * mapping structures that were set to point to I/O memory are reset for + * system memory. Pointers are cleared to NULL. This is the default. + */ +static inline void iosys_map_clear(struct iosys_map *map) +{ + if (map->is_iomem) { + map->vaddr_iomem = NULL; + map->is_iomem = false; + } else { + map->vaddr = NULL; + } +} + +/** + * iosys_map_memcpy_to - Memcpy into offset of iosys_map + * @dst: The iosys_map structure + * @dst_offset: The offset from which to copy + * @src: The source buffer + * @len: The number of byte in src + * + * Copies data into a iosys_map with an offset. The source buffer is in + * system memory. Depending on the buffer's location, the helper picks the + * correct method of accessing the memory. + */ +static inline void iosys_map_memcpy_to(struct iosys_map *dst, size_t dst_offset, + const void *src, size_t len) +{ + if (dst->is_iomem) + memcpy_toio(dst->vaddr_iomem + dst_offset, src, len); + else + memcpy(dst->vaddr + dst_offset, src, len); +} + +/** + * iosys_map_memcpy_from - Memcpy from iosys_map into system memory + * @dst: Destination in system memory + * @src: The iosys_map structure + * @src_offset: The offset from which to copy + * @len: The number of byte in src + * + * Copies data from a iosys_map with an offset. The dest buffer is in + * system memory. Depending on the mapping location, the helper picks the + * correct method of accessing the memory. + */ +static inline void iosys_map_memcpy_from(void *dst, const struct iosys_map *src, + size_t src_offset, size_t len) +{ + if (src->is_iomem) + memcpy_fromio(dst, src->vaddr_iomem + src_offset, len); + else + memcpy(dst, src->vaddr + src_offset, len); +} + +/** + * iosys_map_incr - Increments the address stored in a iosys mapping + * @map: The iosys_map structure + * @incr: The number of bytes to increment + * + * Increments the address stored in a iosys mapping. Depending on the + * buffer's location, the correct value will be updated. + */ +static inline void iosys_map_incr(struct iosys_map *map, size_t incr) +{ + if (map->is_iomem) + map->vaddr_iomem += incr; + else + map->vaddr += incr; +} + +/** + * iosys_map_memset - Memset iosys_map + * @dst: The iosys_map structure + * @offset: Offset from dst where to start setting value + * @value: The value to set + * @len: The number of bytes to set in dst + * + * Set value in iosys_map. Depending on the buffer's location, the helper + * picks the correct method of accessing the memory. + */ +static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, + int value, size_t len) +{ + if (dst->is_iomem) + memset_io(dst->vaddr_iomem + offset, value, len); + else + memset(dst->vaddr + offset, value, len); +} + +/** + * iosys_map_rd - Read a C-type value from the iosys_map + * + * @map__: The iosys_map structure + * @offset__: The offset from which to read + * @type__: Type of the value being read + * + * Read a C type value from iosys_map, handling possible un-aligned accesses to + * the mapping. + * + * Returns: + * The value read from the mapping. + */ +#define iosys_map_rd(map__, offset__, type__) ({ \ + type__ val; \ + iosys_map_memcpy_from(&val, map__, offset__, sizeof(val)); \ + val; \ +}) + +/** + * iosys_map_wr - Write a C-type value to the iosys_map + * + * @map__: The iosys_map structure + * @offset__: The offset from the mapping to write to + * @type__: Type of the value being written + * @val__: Value to write + * + * Write a C-type value to the iosys_map, handling possible un-aligned accesses + * to the mapping. + */ +#define iosys_map_wr(map__, offset__, type__, val__) ({ \ + type__ val = (val__); \ + iosys_map_memcpy_to(map__, offset__, &val, sizeof(val)); \ +}) + +/** + * iosys_map_rd_field - Read a member from a struct in the iosys_map + * + * @map__: The iosys_map structure + * @struct_offset__: Offset from the beggining of the map, where the struct + * is located + * @struct_type__: The struct describing the layout of the mapping + * @field__: Member of the struct to read + * + * Read a value from iosys_map considering its layout is described by a C struct + * starting at @struct_offset__. The field offset and size is calculated and its + * value read handling possible un-aligned memory accesses. For example: suppose + * there is a @struct foo defined as below and the value ``foo.field2.inner2`` + * needs to be read from the iosys_map: + * + * .. code-block:: c + * + * struct foo { + * int field1; + * struct { + * int inner1; + * int inner2; + * } field2; + * int field3; + * } __packed; + * + * This is the expected memory layout of a buffer using iosys_map_rd_field(): + * + * +------------------------------+--------------------------+ + * | Address | Content | + * +==============================+==========================+ + * | buffer + 0000 | start of mmapped buffer | + * | | pointed by iosys_map | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + ``struct_offset__`` | start of ``struct foo`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + wwww | ``foo.field2.inner2`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + yyyy | end of ``struct foo`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + zzzz | end of mmaped buffer | + * +------------------------------+--------------------------+ + * + * Values automatically calculated by this macro or not needed are denoted by + * wwww, yyyy and zzzz. This is the code to read that value: + * + * .. code-block:: c + * + * x = iosys_map_rd_field(&map, offset, struct foo, field2.inner2); + * + * Returns: + * The value read from the mapping. + */ +#define iosys_map_rd_field(map__, struct_offset__, struct_type__, field__) ({ \ + struct_type__ *s; \ + iosys_map_rd(map__, struct_offset__ + offsetof(struct_type__, field__), \ + typeof(s->field__)); \ +}) + +/** + * iosys_map_wr_field - Write to a member of a struct in the iosys_map + * + * @map__: The iosys_map structure + * @struct_offset__: Offset from the beggining of the map, where the struct + * is located + * @struct_type__: The struct describing the layout of the mapping + * @field__: Member of the struct to read + * @val__: Value to write + * + * Write a value to the iosys_map considering its layout is described by a C struct + * starting at @struct_offset__. The field offset and size is calculated and the + * @val__ is written handling possible un-aligned memory accesses. Refer to + * iosys_map_rd_field() for expected usage and memory layout. + */ +#define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({ \ + struct_type__ *s; \ + iosys_map_wr(map__, struct_offset__ + offsetof(struct_type__, field__), \ + typeof(s->field__), val__); \ +}) + +#endif /* __IOSYS_MAP_H__ */ diff --git a/include/linux/iova.h b/include/linux/iova.h index cea79cb9f26c..320a70e40233 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -21,18 +21,8 @@ struct iova { unsigned long pfn_lo; /* Lowest allocated pfn */ }; -struct iova_magazine; -struct iova_cpu_rcache; -#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ -#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ - -struct iova_rcache { - spinlock_t lock; - unsigned long depot_size; - struct iova_magazine *depot[MAX_GLOBAL_MAGS]; - struct iova_cpu_rcache __percpu *cpu_rcaches; -}; +struct iova_rcache; /* holds all the iova translations for a domain */ struct iova_domain { @@ -46,7 +36,7 @@ struct iova_domain { unsigned long max32_alloc_size; /* Size of last failed allocation */ struct iova anchor; /* rbtree lookup anchor */ - struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ + struct iova_rcache *rcaches; struct hlist_node cpuhp_dead; }; @@ -102,6 +92,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +int iova_domain_init_rcaches(struct iova_domain *iovad); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a59d25f19385..16870f86c74d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -51,7 +51,7 @@ struct ipv6_devconf { __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - __s32 mc_forwarding; + atomic_t mc_forwarding; #endif __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; @@ -371,19 +371,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return NULL; } -static inline struct inet6_request_sock * - inet6_rsk(const struct request_sock *rsk) -{ - return NULL; -} - static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; } #define inet6_rcv_saddr(__sk) NULL -#define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _IPV6_H */ diff --git a/include/linux/irq.h b/include/linux/irq.h index 848e1e12c5c6..f92788ccdba2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -456,7 +456,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) /** * struct irq_chip - hardware interrupt chip descriptor * - * @parent_device: pointer to parent device for irqchip * @name: name for /proc/interrupts * @irq_startup: start up the interrupt (defaults to ->enable if NULL) * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) @@ -503,7 +502,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @flags: chip specific flags */ struct irq_chip { - struct device *parent_device; const char *name; unsigned int (*irq_startup)(struct irq_data *data); void (*irq_shutdown)(struct irq_data *data); @@ -712,10 +710,11 @@ extern struct irq_chip no_irq_chip; extern struct irq_chip dummy_irq_chip; extern void -irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, irq_flow_handler_t handle, const char *name); -static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip, +static inline void irq_set_chip_and_handler(unsigned int irq, + const struct irq_chip *chip, irq_flow_handler_t handle) { irq_set_chip_and_handler_name(irq, chip, handle, NULL); @@ -805,7 +804,7 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq) } /* Set/get chip/data for an IRQ: */ -extern int irq_set_chip(unsigned int irq, struct irq_chip *chip); +extern int irq_set_chip(unsigned int irq, const struct irq_chip *chip); extern int irq_set_handler_data(unsigned int irq, void *data); extern int irq_set_chip_data(unsigned int irq, void *data); extern int irq_set_irq_type(unsigned int irq, unsigned int type); diff --git a/include/linux/irqchip/versatile-fpga.h b/include/linux/irqchip/versatile-fpga.h deleted file mode 100644 index a978fc8c7996..000000000000 --- a/include/linux/irqchip/versatile-fpga.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PLAT_FPGA_IRQ_H -#define PLAT_FPGA_IRQ_H - -struct device_node; -struct pt_regs; - -void fpga_handle_irq(struct pt_regs *regs); -void fpga_irq_init(void __iomem *, const char *, int, int, u32, - struct device_node *node); -int fpga_irq_of_init(struct device_node *node, - struct device_node *parent); - -#endif diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 93d270ca0c56..a77584593f7d 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc) int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); +int generic_handle_irq_safe(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d476405802e9..00d577f90883 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -151,6 +151,8 @@ struct irq_domain_chip_generic; * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. + * @dev: Pointer to a device that the domain represent, and that will be + * used for power management purposes. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * * Revmap data, used internally by irq_domain @@ -171,6 +173,7 @@ struct irq_domain { struct fwnode_handle *fwnode; enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; + struct device *dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif @@ -226,6 +229,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) return to_of_node(d->fwnode); } +static inline void irq_domain_set_pm_device(struct irq_domain *d, + struct device *dev) +{ + if (d) + d->dev = dev; +} + #ifdef CONFIG_IRQ_DOMAIN struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, const char *name, phys_addr_t *pa); @@ -469,7 +479,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name); extern void irq_domain_reset_irq_data(struct irq_data *irq_data); @@ -512,7 +523,7 @@ extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, - struct irq_chip *chip, + const struct irq_chip *chip, void *chip_data); extern void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd933c45281a..de9536680b2b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -554,9 +554,6 @@ struct transaction_chp_stats_s { * ->j_list_lock * * j_state_lock - * ->t_handle_lock - * - * j_state_lock * ->j_list_lock (journal_unmap_buffer) * */ @@ -594,7 +591,7 @@ struct transaction_s */ unsigned long t_log_start; - /* + /* * Number of buffers on the t_buffers list [j_list_lock, no locks * needed for jbd2 thread] */ @@ -1295,7 +1292,7 @@ struct journal_s * Clean-up after fast commit or full commit. JBD2 calls this function * after every commit operation. */ - void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); /** * @j_fc_replay_callback: @@ -1419,9 +1416,7 @@ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_free_buffer(struct journal_head *bh); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_clean_data_list(transaction_t *transaction); static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) { list_add_tail(&bh->b_assoc_buffers, head); @@ -1486,9 +1481,6 @@ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct buffer_head **bh_out, sector_t blocknr); -/* Transaction locking */ -extern void __wait_on_journal (journal_t *); - /* Transaction cache support */ extern void jbd2_journal_destroy_transaction_cache(void); extern int __init jbd2_journal_init_transaction_cache(void); @@ -1535,14 +1527,16 @@ void jbd2_journal_set_triggers(struct buffer_head *, struct jbd2_buffer_trigger_type *type); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); -extern int jbd2_journal_invalidatepage(journal_t *, - struct page *, unsigned int, unsigned int); +int jbd2_journal_invalidate_folio(journal_t *, struct folio *, + size_t offset, size_t length); extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page); extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_flush(journal_t *journal, unsigned int flags); extern void jbd2_journal_lock_updates (journal_t *); extern void jbd2_journal_unlock_updates (journal_t *); +void jbd2_journal_wait_updates(journal_t *); + extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, unsigned long long start, int len, int bsize); @@ -1774,8 +1768,6 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 -extern int jbd_blocks_per_page(struct inode *inode); - /* JBD uses a CRC32 checksum */ #define JBD_MAX_CHECKSUM_SIZE 4 diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 48b9b2a82767..107751cc047b 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -82,10 +82,9 @@ extern bool static_key_initialized; "%s(): static key '%pS' used before call to jump_label_init()", \ __func__, (key)) -#ifdef CONFIG_JUMP_LABEL - struct static_key { atomic_t enabled; +#ifdef CONFIG_JUMP_LABEL /* * Note: * To make anonymous unions work with old compilers, the static @@ -104,13 +103,9 @@ struct static_key { struct jump_entry *entries; struct static_key_mod *next; }; +#endif /* CONFIG_JUMP_LABEL */ }; -#else -struct static_key { - atomic_t enabled; -}; -#endif /* CONFIG_JUMP_LABEL */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_JUMP_LABEL @@ -251,10 +246,10 @@ extern void static_key_disable_cpuslocked(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - { .entries = (void *)JUMP_TYPE_TRUE } } + { .type = JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - { .entries = (void *)JUMP_TYPE_FALSE } } + { .type = JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 4176c7eca7b5..ce1bd2fbf23e 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -48,7 +48,7 @@ static inline int is_ksym_addr(unsigned long addr) static inline void *dereference_symbol_descriptor(void *ptr) { -#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h new file mode 100644 index 000000000000..6f612d69ea0c --- /dev/null +++ b/include/linux/kasan-enabled.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_ENABLED_H +#define _LINUX_KASAN_ENABLED_H + +#include <linux/static_key.h> + +#ifdef CONFIG_KASAN_HW_TAGS + +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return kasan_enabled(); +} + +#else /* CONFIG_KASAN_HW_TAGS */ + +static inline bool kasan_enabled(void) +{ + return IS_ENABLED(CONFIG_KASAN); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return false; +} + +#endif /* CONFIG_KASAN_HW_TAGS */ + +#endif /* LINUX_KASAN_ENABLED_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4a45562d8893..ceebcb9de7bf 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,7 @@ #define _LINUX_KASAN_H #include <linux/bug.h> +#include <linux/kasan-enabled.h> #include <linux/kernel.h> #include <linux/static_key.h> #include <linux/types.h> @@ -18,13 +19,15 @@ struct task_struct; #include <linux/linkage.h> #include <asm/kasan.h> -/* kasan_data struct is used in KUnit tests for KASAN expected failures */ -struct kunit_kasan_expectation { - bool report_found; -}; - #endif +typedef unsigned int __bitwise kasan_vmalloc_flags_t; + +#define KASAN_VMALLOC_NONE 0x00u +#define KASAN_VMALLOC_INIT 0x01u +#define KASAN_VMALLOC_VM_ALLOC 0x02u +#define KASAN_VMALLOC_PROT_NORMAL 0x04u + #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #include <linux/pgtable.h> @@ -83,47 +86,8 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS -DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); - -static __always_inline bool kasan_enabled(void) -{ - return static_branch_likely(&kasan_flag_enabled); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return kasan_enabled(); -} - -void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); -void kasan_free_pages(struct page *page, unsigned int order); - #else /* CONFIG_KASAN_HW_TAGS */ -static inline bool kasan_enabled(void) -{ - return IS_ENABLED(CONFIG_KASAN); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return false; -} - -static __always_inline void kasan_alloc_pages(struct page *page, - unsigned int order, gfp_t flags) -{ - /* Only available for integrated init. */ - BUILD_BUG(); -} - -static __always_inline void kasan_free_pages(struct page *page, - unsigned int order) -{ - /* Only available for integrated init. */ - BUILD_BUG(); -} - #endif /* CONFIG_KASAN_HW_TAGS */ static inline bool kasan_has_integrated_init(void) @@ -303,10 +267,6 @@ static __always_inline bool kasan_check_byte(const void *addr) return true; } - -bool kasan_save_enable_multi_shot(void); -void kasan_restore_multi_shot(bool enabled); - #else /* CONFIG_KASAN */ static inline slab_flags_t kasan_never_merge(void) @@ -435,34 +395,71 @@ static inline void kasan_init_hw_tags(void) { } #ifdef CONFIG_KASAN_VMALLOC +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + +void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); int kasan_populate_vmalloc(unsigned long addr, unsigned long size); -void kasan_poison_vmalloc(const void *start, unsigned long size); -void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +static inline void kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) +{ } +static inline int kasan_populate_vmalloc(unsigned long start, + unsigned long size) +{ + return 0; +} +static inline void kasan_release_vmalloc(unsigned long start, + unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) { } + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, + kasan_vmalloc_flags_t flags); +static __always_inline void *kasan_unpoison_vmalloc(const void *start, + unsigned long size, + kasan_vmalloc_flags_t flags) +{ + if (kasan_enabled()) + return __kasan_unpoison_vmalloc(start, size, flags); + return (void *)start; +} + +void __kasan_poison_vmalloc(const void *start, unsigned long size); +static __always_inline void kasan_poison_vmalloc(const void *start, + unsigned long size) +{ + if (kasan_enabled()) + __kasan_poison_vmalloc(start, size); +} #else /* CONFIG_KASAN_VMALLOC */ +static inline void kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { return 0; } - -static inline void kasan_poison_vmalloc(const void *start, unsigned long size) -{ } -static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ } static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end) {} + unsigned long free_region_end) { } -static inline void kasan_populate_early_vm_area_shadow(void *start, - unsigned long size) +static inline void *kasan_unpoison_vmalloc(const void *start, + unsigned long size, + kasan_vmalloc_flags_t flags) +{ + return (void *)start; +} +static inline void kasan_poison_vmalloc(const void *start, unsigned long size) { } #endif /* CONFIG_KASAN_VMALLOC */ @@ -471,17 +468,17 @@ static inline void kasan_populate_early_vm_area_shadow(void *start, !defined(CONFIG_KASAN_VMALLOC) /* - * These functions provide a special case to support backing module - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. + * These functions allocate and free shadow memory for kernel modules. + * They are only required when KASAN_VMALLOC is not supported, as otherwise + * shadow memory is allocated by the generic vmalloc handlers. */ -int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); -void kasan_free_shadow(const struct vm_struct *vm); +int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask); +void kasan_free_module_shadow(const struct vm_struct *vm); #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { return 0; } +static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 33f47a996513..a890428bcc1a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -99,7 +99,7 @@ struct user; extern int __cond_resched(void); # define might_resched() __cond_resched() -#elif defined(CONFIG_PREEMPT_DYNAMIC) +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) extern int __cond_resched(void); @@ -110,6 +110,11 @@ static __always_inline void might_resched(void) static_call_mod(might_resched)(); } +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) + +extern int dynamic_might_resched(void); +# define might_resched() dynamic_might_resched() + #else # define might_resched() do { } while (0) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0c994ae37729..58d1b58a971e 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -20,6 +20,12 @@ #include <uapi/linux/kexec.h> +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; +extern note_buf_t __percpu *crash_notes; + #ifdef CONFIG_KEXEC_CORE #include <linux/list.h> #include <linux/compat.h> @@ -350,12 +356,6 @@ extern int kexec_load_disabled; #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS) -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; -extern note_buf_t __percpu *crash_notes; - /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 4b5e3679a72c..f49e64222628 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -17,6 +17,8 @@ #include <linux/atomic.h> #include <linux/static_key.h> +extern unsigned long kfence_sample_interval; + /* * We allocate an even number of pages, as it simplifies calculations to map * address to metadata indices; effectively, the very first page serves as an diff --git a/include/linux/kobject_api.h b/include/linux/kobject_api.h new file mode 100644 index 000000000000..6e36a054c2d6 --- /dev/null +++ b/include/linux/kobject_api.h @@ -0,0 +1 @@ +#include <linux/kobject.h> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 19b884353b15..5f1859836deb 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -427,6 +427,9 @@ static inline struct kprobe *kprobe_running(void) { return NULL; } +#define kprobe_busy_begin() do {} while (0) +#define kprobe_busy_end() do {} while (0) + static inline int register_kprobe(struct kprobe *p) { return -EOPNOTSUPP; diff --git a/include/linux/kref_api.h b/include/linux/kref_api.h new file mode 100644 index 000000000000..d67e554721d2 --- /dev/null +++ b/include/linux/kref_api.h @@ -0,0 +1 @@ +#include <linux/kref.h> diff --git a/include/linux/ksm.h b/include/linux/ksm.h index a38a5bca1ba5..0630e545f4cb 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -51,7 +51,7 @@ static inline void ksm_exit(struct mm_struct *mm) struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); -void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk_ksm(struct folio *folio, const struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */ @@ -78,8 +78,8 @@ static inline struct page *ksm_might_need_to_copy(struct page *page, return page; } -static inline void rmap_walk_ksm(struct page *page, - struct rmap_walk_control *rwc) +static inline void rmap_walk_ksm(struct folio *folio, + const struct rmap_walk_control *rwc) { } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 3df4ea04716f..de5d75bafd66 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -141,12 +141,6 @@ struct kthread_delayed_work { struct timer_list timer; }; -#define KTHREAD_WORKER_INIT(worker) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock), \ - .work_list = LIST_HEAD_INIT((worker).work_list), \ - .delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\ - } - #define KTHREAD_WORK_INIT(work, fn) { \ .node = LIST_HEAD_INIT((work).node), \ .func = (fn), \ @@ -158,9 +152,6 @@ struct kthread_delayed_work { TIMER_IRQSAFE), \ } -#define DEFINE_KTHREAD_WORKER(worker) \ - struct kthread_worker worker = KTHREAD_WORKER_INIT(worker) - #define DEFINE_KTHREAD_WORK(work, fn) \ struct kthread_work work = KTHREAD_WORK_INIT(work, fn) @@ -168,19 +159,6 @@ struct kthread_delayed_work { struct kthread_delayed_work dwork = \ KTHREAD_DELAYED_WORK_INIT(dwork, fn) -/* - * kthread_worker.lock needs its own lockdep class key when defined on - * stack with lockdep enabled. Use the following macros in such cases. - */ -#ifdef CONFIG_LOCKDEP -# define KTHREAD_WORKER_INIT_ONSTACK(worker) \ - ({ kthread_init_worker(&worker); worker; }) -# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) \ - struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker) -#else -# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker) -#endif - extern void __kthread_init_worker(struct kthread_worker *worker, const char *name, struct lock_class_key *key); diff --git a/include/linux/ktime_api.h b/include/linux/ktime_api.h new file mode 100644 index 000000000000..f697d493960f --- /dev/null +++ b/include/linux/ktime_api.h @@ -0,0 +1 @@ +#include <linux/ktime.h> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 06912d6b39d0..9536ffa0473b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -29,7 +29,9 @@ #include <linux/refcount.h> #include <linux/nospec.h> #include <linux/notifier.h> +#include <linux/ftrace.h> #include <linux/hashtable.h> +#include <linux/instrumentation.h> #include <linux/interval_tree.h> #include <linux/rbtree.h> #include <linux/xarray.h> @@ -151,10 +153,9 @@ static inline bool is_error_page(struct page *page) * Bits 4-7 are reserved for more arch-independent bits. */ #define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_UNHALT 3 -#define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQUEST_ARCH_BASE 8 @@ -368,8 +369,11 @@ struct kvm_vcpu { u64 last_used_slot_gen; }; -/* must be called with irqs disabled */ -static __always_inline void guest_enter_irqoff(void) +/* + * Start accounting time towards a guest. + * Must be called before entering guest context. + */ +static __always_inline void guest_timing_enter_irqoff(void) { /* * This is running in ioctl context so its safe to assume that it's the @@ -378,7 +382,18 @@ static __always_inline void guest_enter_irqoff(void) instrumentation_begin(); vtime_account_guest_enter(); instrumentation_end(); +} +/* + * Enter guest context and enter an RCU extended quiescent state. + * + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is + * unsafe to use any code which may directly or indirectly use RCU, tracing + * (including IRQ flag tracing), or lockdep. All code in this period must be + * non-instrumentable. + */ +static __always_inline void guest_context_enter_irqoff(void) +{ /* * KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -394,16 +409,79 @@ static __always_inline void guest_enter_irqoff(void) } } -static __always_inline void guest_exit_irqoff(void) +/* + * Deprecated. Architectures should move to guest_timing_enter_irqoff() and + * guest_state_enter_irqoff(). + */ +static __always_inline void guest_enter_irqoff(void) +{ + guest_timing_enter_irqoff(); + guest_context_enter_irqoff(); +} + +/** + * guest_state_enter_irqoff - Fixup state when entering a guest + * + * Entry to a guest will enable interrupts, but the kernel state is interrupts + * disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Tell lockdep that interrupts are enabled + * + * Invoked from architecture specific code before entering a guest. + * Must be called with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke guest_timing_enter_irqoff() before this. + * + * Note: this is analogous to exit_to_user_mode(). + */ +static __always_inline void guest_state_enter_irqoff(void) +{ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + instrumentation_end(); + + guest_context_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + +/* + * Exit guest context and exit an RCU extended quiescent state. + * + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is + * unsafe to use any code which may directly or indirectly use RCU, tracing + * (including IRQ flag tracing), or lockdep. All code in this period must be + * non-instrumentable. + */ +static __always_inline void guest_context_exit_irqoff(void) { context_tracking_guest_exit(); +} +/* + * Stop accounting time towards a guest. + * Must be called after exiting guest context. + */ +static __always_inline void guest_timing_exit_irqoff(void) +{ instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ vtime_account_guest_exit(); instrumentation_end(); } +/* + * Deprecated. Architectures should move to guest_state_exit_irqoff() and + * guest_timing_exit_irqoff(). + */ +static __always_inline void guest_exit_irqoff(void) +{ + guest_context_exit_irqoff(); + guest_timing_exit_irqoff(); +} + static inline void guest_exit(void) { unsigned long flags; @@ -413,6 +491,33 @@ static inline void guest_exit(void) local_irq_restore(flags); } +/** + * guest_state_exit_irqoff - Establish state when returning from guest mode + * + * Entry from a guest disables interrupts, but guest mode is traced as + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + * + * Invoked from architecture specific code after exiting a guest. + * Must be invoked with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke guest_timing_exit_irqoff() after this. + * + * Note: this is analogous to enter_from_user_mode(). + */ +static __always_inline void guest_state_exit_irqoff(void) +{ + lockdep_hardirqs_off(CALLER_ADDR0); + guest_context_exit_irqoff(); + + instrumentation_begin(); + trace_hardirqs_off_finish(); + instrumentation_end(); +} + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* @@ -1219,7 +1324,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); void kvm_flush_remote_tlbs(struct kvm *kvm); -void kvm_reload_remote_mmus(struct kvm *kvm); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); diff --git a/include/linux/libata.h b/include/linux/libata.h index 605756f645be..9b1d3d8b1252 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -380,6 +380,7 @@ enum { ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ + ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ @@ -518,7 +519,10 @@ struct ata_taskfile { u8 hob_lbam; u8 hob_lbah; - u8 feature; + union { + u8 error; + u8 feature; + }; u8 nsect; u8 lbal; u8 lbam; @@ -526,7 +530,10 @@ struct ata_taskfile { u8 device; - u8 command; /* IO operation */ + union { + u8 status; + u8 command; + }; u32 auxiliary; /* auxiliary field */ /* from SATA 3.1 and */ @@ -1080,7 +1087,7 @@ extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, extern bool ata_link_online(struct ata_link *link); extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM -extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); +extern void ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); extern void ata_sas_port_suspend(struct ata_port *ap); extern void ata_sas_port_resume(struct ata_port *ap); diff --git a/include/linux/linkage.h b/include/linux/linkage.h index dbf8506decca..acb1ad2356f1 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -165,7 +165,18 @@ #ifndef SYM_END #define SYM_END(name, sym_type) \ .type name sym_type ASM_NL \ - .size name, .-name + .set .L__sym_size_##name, .-name ASM_NL \ + .size name, .L__sym_size_##name +#endif + +/* SYM_ALIAS -- use only if you have to */ +#ifndef SYM_ALIAS +#define SYM_ALIAS(alias, name, sym_type, linkage) \ + linkage(alias) ASM_NL \ + .set alias, name ASM_NL \ + .type alias sym_type ASM_NL \ + .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \ + .size alias, .L__sym_size_##alias #endif /* === code annotations === */ @@ -200,30 +211,8 @@ SYM_ENTRY(name, linkage, SYM_A_NONE) #endif -/* - * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one - * function - */ -#ifndef SYM_FUNC_START_LOCAL_ALIAS -#define SYM_FUNC_START_LOCAL_ALIAS(name) \ - SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) -#endif - -/* - * SYM_FUNC_START_ALIAS -- use where there are two global names for one - * function - */ -#ifndef SYM_FUNC_START_ALIAS -#define SYM_FUNC_START_ALIAS(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) -#endif - /* SYM_FUNC_START -- use for global functions */ #ifndef SYM_FUNC_START -/* - * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two - * later. - */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif @@ -236,7 +225,6 @@ /* SYM_FUNC_START_LOCAL -- use for local functions */ #ifndef SYM_FUNC_START_LOCAL -/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif @@ -259,22 +247,39 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) #endif -/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ -#ifndef SYM_FUNC_END_ALIAS -#define SYM_FUNC_END_ALIAS(name) \ - SYM_END(name, SYM_T_FUNC) -#endif - /* * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, * SYM_FUNC_START_WEAK, ... */ #ifndef SYM_FUNC_END -/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_END(name) \ SYM_END(name, SYM_T_FUNC) #endif +/* + * SYM_FUNC_ALIAS -- define a global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS +#define SYM_FUNC_ALIAS(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + +/* + * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_LOCAL +#define SYM_FUNC_ALIAS_LOCAL(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL) +#endif + +/* + * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_WEAK +#define SYM_FUNC_ALIAS_WEAK(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) +#endif + /* SYM_CODE_START -- use for non-C (special) functions */ #ifndef SYM_CODE_START #define SYM_CODE_START(name) \ diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index f8397f300fcd..15e0e0209da4 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -66,11 +66,6 @@ static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr, linkmode_clear_bit(nr, addr); } -static inline void linkmode_change_bit(int nr, volatile unsigned long *addr) -{ - __change_bit(nr, addr); -} - static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) { return test_bit(nr, addr); diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 1b5fceb565df..b35968ee9fb5 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/nodemask.h> #include <linux/shrinker.h> +#include <linux/xarray.h> struct mem_cgroup; @@ -33,8 +34,8 @@ struct list_lru_one { struct list_lru_memcg { struct rcu_head rcu; - /* array of per cgroup lists, indexed by memcg_cache_id */ - struct list_lru_one *lru[]; + /* array of per cgroup per node lists, indexed by node id */ + struct list_lru_one node[]; }; struct list_lru_node { @@ -42,11 +43,7 @@ struct list_lru_node { spinlock_t lock; /* global list, used for the root cgroup in cgroup aware lrus */ struct list_lru_one lru; -#ifdef CONFIG_MEMCG_KMEM - /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ - struct list_lru_memcg __rcu *memcg_lrus; -#endif - long nr_items; + long nr_items; } ____cacheline_aligned_in_smp; struct list_lru { @@ -55,6 +52,7 @@ struct list_lru { struct list_head list; int shrinker_id; bool memcg_aware; + struct xarray xa; #endif }; @@ -69,8 +67,9 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, #define list_lru_init_memcg(lru, shrinker) \ __list_lru_init((lru), true, NULL, shrinker) -int memcg_update_all_list_lrus(int num_memcgs); -void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg); +int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, + gfp_t gfp); +void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent); /** * list_lru_add: add an element to the lru list's tail diff --git a/include/linux/llist_api.h b/include/linux/llist_api.h new file mode 100644 index 000000000000..625bec0393a1 --- /dev/null +++ b/include/linux/llist_api.h @@ -0,0 +1 @@ +#include <linux/llist.h> diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 975e33b793a7..6d635e8306d6 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -44,9 +44,9 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) -static inline void local_lock_acquire(local_lock_t *l) { } -static inline void local_lock_release(local_lock_t *l) { } -static inline void local_lock_debug_init(local_lock_t *l) { } +# define local_lock_acquire(__ll) do { typecheck(local_lock_t *, __ll); } while (0) +# define local_lock_release(__ll) do { typecheck(local_lock_t *, __ll); } while (0) +# define local_lock_debug_init(__ll) do { typecheck(local_lock_t *, __ll); } while (0) #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } diff --git a/include/linux/lockdep_api.h b/include/linux/lockdep_api.h new file mode 100644 index 000000000000..907e66979ab2 --- /dev/null +++ b/include/linux/lockdep_api.h @@ -0,0 +1 @@ +#include <linux/lockdep.h> diff --git a/include/linux/log2.h b/include/linux/log2.h index df0b155c2141..9f30d087a128 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -18,7 +18,7 @@ * - the arch is not required to handle n==0 if implementing the fallback */ #ifndef CONFIG_ARCH_HAS_ILOG2_U32 -static inline __attribute__((const)) +static __always_inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; @@ -26,7 +26,7 @@ int __ilog2_u32(u32 n) #endif #ifndef CONFIG_ARCH_HAS_ILOG2_U64 -static inline __attribute__((const)) +static __always_inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index a5a724c308d8..db924fe379c9 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -80,7 +80,7 @@ LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb, unsigned long *set_kern_flags) LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) -LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, +LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, void **ctx, u32 *ctxlen) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, @@ -332,6 +332,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) +LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc, + struct sk_buff *skb) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3bf5c658bc44..419b5febc3ca 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1046,6 +1046,11 @@ * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. * @newsk pointer to new sock structure. + * @sctp_assoc_established: + * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet + * to the security module. + * @asoc pointer to sctp association structure. + * @skb pointer to skbuff of association packet. * * Security hooks for Infiniband * diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b72d75141e12..a68dce3873fc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -34,6 +34,7 @@ enum memcg_stat_item { MEMCG_SOCK, MEMCG_PERCPU_B, MEMCG_VMALLOC, + MEMCG_KMEM, MEMCG_NR_STAT, }; @@ -219,7 +220,7 @@ struct obj_cgroup { struct mem_cgroup *memcg; atomic_t nr_charged_bytes; union { - struct list_head list; + struct list_head list; /* protected by objcg_lock */ struct rcu_head rcu; }; }; @@ -315,7 +316,8 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; struct obj_cgroup __rcu *objcg; - struct list_head objcg_list; /* list of inherited objcgs */ + /* list of inherited objcgs, protected by objcg_lock */ + struct list_head objcg_list; #endif MEMCG_PADDING(_pad2_); @@ -522,6 +524,20 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } +static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) +{ + struct mem_cgroup *memcg; + + rcu_read_lock(); +retry: + memcg = obj_cgroup_memcg(objcg); + if (unlikely(!css_tryget(&memcg->css))) + goto retry; + rcu_read_unlock(); + + return memcg; +} + #ifdef CONFIG_MEMCG_KMEM /* * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. @@ -840,9 +856,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) */ static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { - if (!memcg->memory.parent) - return NULL; - return mem_cgroup_from_counter(memcg->memory.parent, memory); + return mem_cgroup_from_css(memcg->css.parent); } static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, @@ -1671,18 +1685,6 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); extern struct static_key_false memcg_kmem_enabled_key; -extern int memcg_nr_cache_ids; -void memcg_get_cache_ids(void); -void memcg_put_cache_ids(void); - -/* - * Helper macro to loop through all memcg-specific caches. Callers must still - * check if the cache is valid (it is either valid or NULL). - * the slab_mutex must be held when looping through those caches - */ -#define for_each_memcg_cache_index(_idx) \ - for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++) - static inline bool memcg_kmem_enabled(void) { return static_branch_likely(&memcg_kmem_enabled_key); @@ -1706,7 +1708,7 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order) * A helper for accessing memcg's kmem_id, used for getting * corresponding LRU lists. */ -static inline int memcg_cache_id(struct mem_cgroup *memcg) +static inline int memcg_kmem_id(struct mem_cgroup *memcg) { return memcg ? memcg->kmemcg_id : -1; } @@ -1739,27 +1741,16 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } -#define for_each_memcg_cache_index(_idx) \ - for (; NULL; ) - static inline bool memcg_kmem_enabled(void) { return false; } -static inline int memcg_cache_id(struct mem_cgroup *memcg) +static inline int memcg_kmem_id(struct mem_cgroup *memcg) { return -1; } -static inline void memcg_get_cache_ids(void) -{ -} - -static inline void memcg_put_cache_ids(void) -{ -} - static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) { return NULL; diff --git a/include/linux/memory.h b/include/linux/memory.h index 88eb587b5143..aa619464a1df 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -70,6 +70,13 @@ struct memory_block { unsigned long state; /* serialized by the dev->lock */ int online_type; /* for passing data to online routine */ int nid; /* NID for this memory block */ + /* + * The single zone of this memory block if all PFNs of this memory block + * that are System RAM (not a memory hole, not ZONE_DEVICE ranges) are + * managed by a single zone. NULL if multiple zones (including nodes) + * apply. + */ + struct zone *zone; struct device dev; /* * Number of vmemmap pages. These pages @@ -161,6 +168,11 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, }) #define register_hotmemory_notifier(nb) register_memory_notifier(nb) #define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) + +#ifdef CONFIG_NUMA +void memory_block_add_nid(struct memory_block *mem, int nid, + enum meminit_context context); +#endif /* CONFIG_NUMA */ #endif /* CONFIG_MEMORY_HOTPLUG */ /* diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index be48e003a518..1ce6f8044f1e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -16,6 +16,62 @@ struct memory_group; struct resource; struct vmem_altmap; +#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION +/* + * For supporting node-hotadd, we have to allocate a new pgdat. + * + * If an arch has generic style NODE_DATA(), + * node_data[nid] = kzalloc() works well. But it depends on the architecture. + * + * In general, generic_alloc_nodedata() is used. + * + */ +extern pg_data_t *arch_alloc_nodedata(int nid); +extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); + +#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ + +#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid) + +#ifdef CONFIG_NUMA +/* + * XXX: node aware allocation can't work well to get new node's memory at this time. + * Because, pgdat for the new node is not allocated/initialized yet itself. + * To use new node's memory, more consideration will be necessary. + */ +#define generic_alloc_nodedata(nid) \ +({ \ + memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \ +}) +/* + * This definition is just for error path in node hotadd. + * For node hotremove, we have to replace this. + */ +#define generic_free_nodedata(pgdat) kfree(pgdat) + +extern pg_data_t *node_data[]; +static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ + node_data[nid] = pgdat; +} + +#else /* !CONFIG_NUMA */ + +/* never called */ +static inline pg_data_t *generic_alloc_nodedata(int nid) +{ + BUG(); + return NULL; +} +static inline void generic_free_nodedata(pg_data_t *pgdat) +{ +} +static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ + #ifdef CONFIG_MEMORY_HOTPLUG struct page *pfn_to_online_page(unsigned long pfn); @@ -107,8 +163,6 @@ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); -extern struct zone *test_pages_in_a_zone(unsigned long start_pfn, - unsigned long end_pfn); extern void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn); @@ -154,66 +208,6 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ -#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION -/* - * For supporting node-hotadd, we have to allocate a new pgdat. - * - * If an arch has generic style NODE_DATA(), - * node_data[nid] = kzalloc() works well. But it depends on the architecture. - * - * In general, generic_alloc_nodedata() is used. - * Now, arch_free_nodedata() is just defined for error path of node_hot_add. - * - */ -extern pg_data_t *arch_alloc_nodedata(int nid); -extern void arch_free_nodedata(pg_data_t *pgdat); -extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); - -#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ - -#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid) -#define arch_free_nodedata(pgdat) generic_free_nodedata(pgdat) - -#ifdef CONFIG_NUMA -/* - * If ARCH_HAS_NODEDATA_EXTENSION=n, this func is used to allocate pgdat. - * XXX: kmalloc_node() can't work well to get new node's memory at this time. - * Because, pgdat for the new node is not allocated/initialized yet itself. - * To use new node's memory, more consideration will be necessary. - */ -#define generic_alloc_nodedata(nid) \ -({ \ - kzalloc(sizeof(pg_data_t), GFP_KERNEL); \ -}) -/* - * This definition is just for error path in node hotadd. - * For node hotremove, we have to replace this. - */ -#define generic_free_nodedata(pgdat) kfree(pgdat) - -extern pg_data_t *node_data[]; -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ - node_data[nid] = pgdat; -} - -#else /* !CONFIG_NUMA */ - -/* never called */ -static inline pg_data_t *generic_alloc_nodedata(int nid) -{ - BUG(); - return NULL; -} -static inline void generic_free_nodedata(pg_data_t *pgdat) -{ -} -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ -} -#endif /* CONFIG_NUMA */ -#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ - void get_online_mems(void); void put_online_mems(void); @@ -297,7 +291,7 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {} extern void try_offline_node(int nid); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages, - struct memory_group *group); + struct zone *zone, struct memory_group *group); extern int remove_memory(u64 start, u64 size); extern void __remove_memory(u64 start, u64 size); extern int offline_and_remove_memory(u64 start, u64 size); @@ -306,7 +300,7 @@ extern int offline_and_remove_memory(u64 start, u64 size); static inline void try_offline_node(int nid) {} static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages, - struct memory_group *group) + struct zone *zone, struct memory_group *group) { return -EINVAL; } @@ -323,7 +317,7 @@ extern void set_zone_contiguous(struct zone *zone); extern void clear_zone_contiguous(struct zone *zone); #ifdef CONFIG_MEMORY_HOTPLUG -extern void __ref free_area_init_core_hotplug(int nid); +extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat); extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory_resource(int nid, struct resource *resource, diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 1fafcc38acba..8af304f6b504 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ + +#include <linux/mm.h> #include <linux/range.h> #include <linux/ioport.h> #include <linux/percpu-refcount.h> @@ -66,9 +68,9 @@ enum memory_type { struct dev_pagemap_ops { /* - * Called once the page refcount reaches 1. (ZONE_DEVICE pages never - * reach 0 refcount unless there is a refcount bug. This allows the - * device driver to implement its own memory management.) + * Called once the page refcount reaches 0. The reference count will be + * reset to one by the core code after the method is called to prepare + * for handing out the page again. */ void (*page_free)(struct page *page); @@ -129,6 +131,25 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) return 1 << pgmap->vmemmap_shift; } +static inline bool is_device_private_page(const struct page *page) +{ + return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && + is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PRIVATE; +} + +static inline bool folio_is_device_private(const struct folio *folio) +{ + return is_device_private_page(&folio->page); +} + +static inline bool is_pci_p2pdma_page(const struct page *page) +{ + return IS_ENABLED(CONFIG_PCI_P2PDMA) && + is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; +} + #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); diff --git a/include/linux/mfd/idt82p33_reg.h b/include/linux/mfd/idt82p33_reg.h index 129a6c078221..1db532feeb91 100644 --- a/include/linux/mfd/idt82p33_reg.h +++ b/include/linux/mfd/idt82p33_reg.h @@ -7,6 +7,8 @@ #ifndef HAVE_IDT82P33_REG #define HAVE_IDT82P33_REG +#define REG_ADDR(page, offset) (((page) << 0x7) | ((offset) & 0x7f)) + /* Register address */ #define DPLL1_TOD_CNFG 0x134 #define DPLL2_TOD_CNFG 0x1B4 @@ -41,6 +43,7 @@ #define REG_SOFT_RESET 0X381 #define OUT_MUX_CNFG(outn) REG_ADDR(0x6, (0xC * (outn))) +#define TOD_TRIGGER(wr_trig, rd_trig) ((wr_trig & 0xf) << 4 | (rd_trig & 0xf)) /* Register bit definitions */ #define SYNC_TOD BIT(1) diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 39967a5eca6d..ea4a4b1b246a 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -8,7 +8,7 @@ #ifndef LPC_ICH_H #define LPC_ICH_H -#include <linux/platform_data/x86/intel-spi.h> +#include <linux/platform_data/x86/spi-intel.h> /* GPIO resources */ #define ICH_RES_GPIO 0 diff --git a/include/linux/migrate.h b/include/linux/migrate.h index db96e10eb8da..90e75d5a54d6 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -48,7 +48,15 @@ int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); extern bool numa_demotion_enabled; +extern void migrate_on_reclaim_init(void); +#ifdef CONFIG_HOTPLUG_CPU +extern void set_migration_target_nodes(void); #else +static inline void set_migration_target_nodes(void) {} +#endif +#else + +static inline void set_migration_target_nodes(void) {} static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t new, diff --git a/include/linux/mii.h b/include/linux/mii.h index 12ea29e04293..5ee13083cec7 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -355,56 +355,6 @@ static inline u32 mii_adv_to_ethtool_adv_x(u32 adv) } /** - * mii_lpa_mod_linkmode_adv_sgmii - * @lp_advertising: pointer to destination link mode. - * @lpa: value of the MII_LPA register - * - * A small helper function that translates MII_LPA bits to - * linkmode advertisement settings for SGMII. - * Leaves other bits unchanged. - */ -static inline void -mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa) -{ - u32 speed_duplex = lpa & LPA_SGMII_DPX_SPD_MASK; - - linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_1000HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_1000FULL); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_100HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_100FULL); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, lp_advertising, - speed_duplex == LPA_SGMII_10HALF); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, lp_advertising, - speed_duplex == LPA_SGMII_10FULL); -} - -/** - * mii_lpa_to_linkmode_adv_sgmii - * @advertising: pointer to destination link mode. - * @lpa: value of the MII_LPA register - * - * A small helper function that translates MII_ADVERTISE bits - * to linkmode advertisement settings when in SGMII mode. - * Clears the old value of advertising. - */ -static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising, - u32 lpa) -{ - linkmode_zero(lp_advertising); - - mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa); -} - -/** * mii_adv_mod_linkmode_adv_t * @advertising:pointer to destination link mode. * @adv: value of the MII_ADVERTISE register diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 7bfb67363434..cb15308b5cb0 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -183,6 +183,8 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq) complete(&cq->free); } +int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 78655d8d13a7..9424503eb8d3 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -264,6 +264,14 @@ enum { struct mlx5_cmd_stats { u64 sum; u64 n; + /* number of times command failed */ + u64 failed; + /* number of times command failed on bad status returned by FW */ + u64 failed_mbox_status; + /* last command failed returned errno */ + u32 last_failed_errno; + /* last bad status returned by FW */ + u8 last_failed_mbox_status; struct dentry *root; /* protect command average calculations */ spinlock_t lock; @@ -543,6 +551,15 @@ struct mlx5_adev { int idx; }; +struct mlx5_debugfs_entries { + struct dentry *dbg_root; + struct dentry *qp_debugfs; + struct dentry *eq_debugfs; + struct dentry *cq_debugfs; + struct dentry *cmdif_debugfs; + struct dentry *pages_debugfs; +}; + struct mlx5_ft_pool; struct mlx5_priv { /* IRQ table valid only for real pci devices PF or VF */ @@ -553,21 +570,19 @@ struct mlx5_priv { struct mlx5_nb pg_nb; struct workqueue_struct *pg_wq; struct xarray page_root_xa; - int fw_pages; + u32 fw_pages; atomic_t reg_pages; struct list_head free_list; - int vfs_pages; - int host_pf_pages; + u32 vfs_pages; + u32 host_pf_pages; + u32 fw_pages_alloc_failed; + u32 give_pages_dropped; + u32 reclaim_pages_discard; struct mlx5_core_health health; struct list_head traps; - /* start: qp staff */ - struct dentry *qp_debugfs; - struct dentry *eq_debugfs; - struct dentry *cq_debugfs; - struct dentry *cmdif_debugfs; - /* end: qp staff */ + struct mlx5_debugfs_entries dbg; /* start: alloc staff */ /* protect buffer allocation according to numa node */ @@ -577,7 +592,6 @@ struct mlx5_priv { struct mutex pgdir_mutex; struct list_head pgdir_list; /* end: alloc staff */ - struct dentry *dbg_root; struct list_head ctx_list; spinlock_t ctx_lock; @@ -863,20 +877,10 @@ struct mlx5_hca_vport_context { bool grh_required; }; -static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset) -{ - return buf->frags->buf + offset; -} - #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field -static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) -{ - return pci_get_drvdata(pdev); -} - extern struct dentry *mlx5_debugfs_root; static inline u16 fw_rev_maj(struct mlx5_core_dev *dev) @@ -965,6 +969,8 @@ typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; + u16 opcode; /* cmd opcode */ + void *out; /* pointer to the cmd output buffer */ }; void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, @@ -973,7 +979,9 @@ void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, void *out, int out_size, mlx5_async_cbk_t callback, struct mlx5_async_work *work); - +void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out); +int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); +int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); @@ -991,7 +999,6 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); @@ -1002,9 +1009,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); -int mlx5_buf_alloc(struct mlx5_core_dev *dev, - int size, struct mlx5_frag_buf *buf); -void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); @@ -1023,6 +1027,8 @@ int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); +void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); @@ -1030,15 +1036,18 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); +struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, + void *data_out, int size_out, u16 reg_id, int arg, + int write, bool verbose); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); @@ -1143,6 +1152,9 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, u64 length, u16 uid, phys_addr_t addr, u32 obj_id); +struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev); +void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev); + #ifdef CONFIG_MLX5_CORE_IPOIB struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct ib_device *ibdev, diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index b1aad14689e3..e3bfed68b08a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -224,6 +224,7 @@ struct mlx5_flow_act { u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; + struct mlx5_flow_group *fg; }; #define MLX5_DECLARE_FLOW_ACT(name) \ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 598ac3bcc901..7d2d0ba82144 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -64,13 +64,6 @@ enum { }; enum { - MLX5_MODIFY_TIR_BITMASK_LRO = 0x0, - MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE = 0x1, - MLX5_MODIFY_TIR_BITMASK_HASH = 0x2, - MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3 -}; - -enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, @@ -127,6 +120,11 @@ enum { MLX5_CMD_OP_QUERY_SF_PARTITION = 0x111, MLX5_CMD_OP_ALLOC_SF = 0x113, MLX5_CMD_OP_DEALLOC_SF = 0x114, + MLX5_CMD_OP_SUSPEND_VHCA = 0x115, + MLX5_CMD_OP_RESUME_VHCA = 0x116, + MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE = 0x117, + MLX5_CMD_OP_SAVE_VHCA_STATE = 0x118, + MLX5_CMD_OP_LOAD_VHCA_STATE = 0x119, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, @@ -500,7 +498,10 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 tcp_sport[0x10]; u8 tcp_dport[0x10]; - u8 reserved_at_c0[0x18]; + u8 reserved_at_c0[0x10]; + u8 ipv4_ihl[0x4]; + u8 reserved_at_c4[0x4]; + u8 ttl_hoplimit[0x8]; u8 udp_sport[0x10]; @@ -1350,6 +1351,7 @@ enum mlx5_fc_bulk_alloc_bitmask { enum { MLX5_STEERING_FORMAT_CONNECTX_5 = 0, MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, + MLX5_STEERING_FORMAT_CONNECTX_7 = 2, }; struct mlx5_ifc_cmd_hca_cap_bits { @@ -1426,8 +1428,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_130[0xa]; u8 log_max_ra_res_dc[0x6]; - u8 reserved_at_140[0x6]; + u8 reserved_at_140[0x5]; u8 release_all_pages[0x1]; + u8 must_not_use[0x1]; u8 reserved_at_147[0x2]; u8 roce_accl[0x1]; u8 log_max_ra_req_qp[0x6]; @@ -1757,7 +1760,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_682[0x1]; u8 log_max_sf[0x5]; u8 apu[0x1]; - u8 reserved_at_689[0x7]; + u8 reserved_at_689[0x4]; + u8 migration[0x1]; + u8 reserved_at_68e[0x2]; u8 log_min_sf_size[0x8]; u8 max_num_sf_partitions[0x8]; @@ -3434,7 +3439,6 @@ enum { enum { MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0), MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1), - MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO = BIT(2), }; enum { @@ -9694,7 +9698,10 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x6b]; + u8 reserved_at_0[0x5d]; + u8 mcia_32dwords[0x1]; + u8 reserved_at_5e[0xc]; + u8 reset_state[0x1]; u8 ptpcyc2realtime_modify[0x1]; u8 reserved_at_6c[0x2]; u8 pci_status_and_power[0x1]; @@ -9888,10 +9895,10 @@ struct mlx5_ifc_pcmr_reg_bits { }; struct mlx5_ifc_lane_2_module_mapping_bits { - u8 reserved_at_0[0x6]; - u8 rx_lane[0x2]; - u8 reserved_at_8[0x6]; - u8 tx_lane[0x2]; + u8 reserved_at_0[0x4]; + u8 rx_lane[0x4]; + u8 reserved_at_8[0x4]; + u8 tx_lane[0x4]; u8 reserved_at_10[0x8]; u8 module[0x8]; }; @@ -9900,8 +9907,8 @@ struct mlx5_ifc_bufferx_reg_bits { u8 reserved_at_0[0x6]; u8 lossy[0x1]; u8 epsb[0x1]; - u8 reserved_at_8[0xc]; - u8 size[0xc]; + u8 reserved_at_8[0x8]; + u8 size[0x10]; u8 xoff_threshold[0x10]; u8 xon_threshold[0x10]; @@ -10376,6 +10383,14 @@ struct mlx5_ifc_mcda_reg_bits { }; enum { + MLX5_MFRL_REG_RESET_STATE_IDLE = 0, + MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1, + MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2, + MLX5_MFRL_REG_RESET_STATE_TIMEOUT = 3, + MLX5_MFRL_REG_RESET_STATE_NACK = 4, +}; + +enum { MLX5_MFRL_REG_RESET_TYPE_FULL_CHIP = BIT(0), MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE = BIT(1), }; @@ -10393,7 +10408,8 @@ struct mlx5_ifc_mfrl_reg_bits { u8 pci_sync_for_fw_update_start[0x1]; u8 pci_sync_for_fw_update_resp[0x2]; u8 rst_type_sel[0x3]; - u8 reserved_at_28[0x8]; + u8 reserved_at_28[0x4]; + u8 reset_state[0x4]; u8 reset_type[0x8]; u8 reset_level[0x8]; }; @@ -11519,4 +11535,142 @@ enum { MLX5_MTT_PERM_RW = MLX5_MTT_PERM_READ | MLX5_MTT_PERM_WRITE, }; +enum { + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR = 0x0, + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER = 0x1, +}; + +struct mlx5_ifc_suspend_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_suspend_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +enum { + MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER = 0x0, + MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR = 0x1, +}; + +struct mlx5_ifc_resume_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_resume_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_vhca_migration_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_query_vhca_migration_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 required_umem_size[0x20]; + + u8 reserved_at_a0[0x160]; +}; + +struct mlx5_ifc_save_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; + + u8 va[0x40]; + + u8 mkey[0x20]; + + u8 size[0x20]; +}; + +struct mlx5_ifc_save_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 actual_image_size[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_load_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; + + u8 va[0x40]; + + u8 mkey[0x20]; + + u8 size[0x20]; +}; + +struct mlx5_ifc_load_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 77ea4f9c5265..28a928b0684b 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -56,8 +56,6 @@ enum mlx5_an_status { MLX5_AN_LINK_DOWN = 4, }; -#define MLX5_EEPROM_MAX_BYTES 32 -#define MLX5_EEPROM_IDENTIFIER_BYTE_MASK 0x000000ff #define MLX5_I2C_ADDR_LOW 0x50 #define MLX5_I2C_ADDR_HIGH 0x51 #define MLX5_EEPROM_PAGE_LENGTH 256 diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 61e48d459b23..8bda3ba5b109 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -202,6 +202,9 @@ struct mlx5_wqe_fmr_seg { struct mlx5_wqe_ctrl_seg { __be32 opmod_idx_opcode; __be32 qpn_ds; + + struct_group(trailer, + u8 signature; u8 rsvd[2]; u8 fm_ce_se; @@ -211,6 +214,8 @@ struct mlx5_wqe_ctrl_seg { __be32 umr_mkey; __be32 tis_tir_num; }; + + ); /* end of trailer group */ }; #define MLX5_WQE_CTRL_DS_MASK 0x3f diff --git a/include/linux/mm.h b/include/linux/mm.h index e1a84b1e6787..e34edb775334 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3,9 +3,6 @@ #define _LINUX_MM_H #include <linux/errno.h> - -#ifdef __KERNEL__ - #include <linux/mmdebug.h> #include <linux/gfp.h> #include <linux/bug.h> @@ -26,7 +23,6 @@ #include <linux/err.h> #include <linux/page-flags.h> #include <linux/page_ref.h> -#include <linux/memremap.h> #include <linux/overflow.h> #include <linux/sizes.h> #include <linux/sched.h> @@ -216,8 +212,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) +#define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio)) #else #define nth_page(page,n) ((page) + (n)) +#define folio_page_idx(folio, p) ((p) - &(folio)->page) #endif /* to align the pointer to the (next) page boundary */ @@ -227,6 +225,10 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +static inline struct folio *lru_to_folio(struct list_head *head) +{ + return list_entry((head)->prev, struct folio, lru); +} void setup_initial_init_mm(void *start_code, void *end_code, void *end_data, void *brk); @@ -478,7 +480,8 @@ struct vm_fault { struct vm_area_struct *vma; /* Target VMA */ gfp_t gfp_mask; /* gfp mask to be used for allocations */ pgoff_t pgoff; /* Logical page offset based on vma */ - unsigned long address; /* Faulting virtual address */ + unsigned long address; /* Faulting virtual address - masked */ + unsigned long real_address; /* Faulting virtual address - unmasked */ }; enum fault_flag flags; /* FAULT_FLAG_xxx flags * XXX: should really be 'const' */ @@ -774,21 +777,26 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif -static inline int head_compound_mapcount(struct page *head) +/* + * How many times the entire folio is mapped as a single unit (eg by a + * PMD or PUD entry). This is probably not what you want, except for + * debugging purposes; look at folio_mapcount() or page_mapcount() + * instead. + */ +static inline int folio_entire_mapcount(struct folio *folio) { - return atomic_read(compound_mapcount_ptr(head)) + 1; + VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); + return atomic_read(folio_mapcount_ptr(folio)) + 1; } /* * Mapcount of compound page as a whole, does not include mapped sub-pages. * - * Must be called only for compound pages or any their tail sub-pages. + * Must be called only for compound pages. */ static inline int compound_mapcount(struct page *page) { - VM_BUG_ON_PAGE(!PageCompound(page), page); - page = compound_head(page); - return head_compound_mapcount(page); + return folio_entire_mapcount(page_folio(page)); } /* @@ -818,15 +826,16 @@ static inline int page_mapcount(struct page *page) return atomic_read(&page->_mapcount) + 1; } +int folio_mapcount(struct folio *folio); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE -int total_mapcount(struct page *page); -int page_trans_huge_mapcount(struct page *page); -#else static inline int total_mapcount(struct page *page) { - return page_mapcount(page); + return folio_mapcount(page_folio(page)); } -static inline int page_trans_huge_mapcount(struct page *page) + +#else +static inline int total_mapcount(struct page *page) { return page_mapcount(page); } @@ -889,33 +898,17 @@ static inline void destroy_compound_page(struct page *page) compound_page_dtors[page[1].compound_dtor](page); } -static inline bool hpage_pincount_available(struct page *page) -{ - /* - * Can the page->hpage_pinned_refcount field be used? That field is in - * the 3rd page of the compound page, so the smallest (2-page) compound - * pages cannot support it. - */ - page = compound_head(page); - return PageCompound(page) && compound_order(page) > 1; -} - static inline int head_compound_pincount(struct page *head) { return atomic_read(compound_pincount_ptr(head)); } -static inline int compound_pincount(struct page *page) -{ - VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); - page = compound_head(page); - return head_compound_pincount(page); -} - static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; +#ifdef CONFIG_64BIT page[1].compound_nr = 1U << order; +#endif } /* Returns the number of pages in this potentially compound page. */ @@ -923,7 +916,11 @@ static inline unsigned long compound_nr(struct page *page) { if (!PageHead(page)) return 1; +#ifdef CONFIG_64BIT return page[1].compound_nr; +#else + return 1UL << compound_order(page); +#endif } /* Returns the number of bytes in this potentially compound page. */ @@ -938,6 +935,37 @@ static inline unsigned int page_shift(struct page *page) return PAGE_SHIFT + compound_order(page); } +/** + * thp_order - Order of a transparent huge page. + * @page: Head page of a transparent huge page. + */ +static inline unsigned int thp_order(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return compound_order(page); +} + +/** + * thp_nr_pages - The number of regular pages in this huge page. + * @page: The head page of a huge page. + */ +static inline int thp_nr_pages(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return compound_nr(page); +} + +/** + * thp_size - Size of a transparent huge page. + * @page: Head page of a transparent huge page. + * + * Return: Number of bytes in this page. + */ +static inline unsigned long thp_size(struct page *page) +{ + return PAGE_SIZE << thp_order(page); +} + void free_compound_page(struct page *page); #ifdef CONFIG_MMU @@ -1089,59 +1117,35 @@ static inline bool is_zone_device_page(const struct page *page) } #endif +static inline bool folio_is_zone_device(const struct folio *folio) +{ + return is_zone_device_page(&folio->page); +} + static inline bool is_zone_movable_page(const struct page *page) { return page_zonenum(page) == ZONE_MOVABLE; } -#ifdef CONFIG_DEV_PAGEMAP_OPS -void free_devmap_managed_page(struct page *page); +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -static inline bool page_is_devmap_managed(struct page *page) +bool __put_devmap_managed_page(struct page *page); +static inline bool put_devmap_managed_page(struct page *page) { if (!static_branch_unlikely(&devmap_managed_key)) return false; if (!is_zone_device_page(page)) return false; - switch (page->pgmap->type) { - case MEMORY_DEVICE_PRIVATE: - case MEMORY_DEVICE_FS_DAX: - return true; - default: - break; - } - return false; + return __put_devmap_managed_page(page); } -void put_devmap_managed_page(struct page *page); - -#else /* CONFIG_DEV_PAGEMAP_OPS */ -static inline bool page_is_devmap_managed(struct page *page) +#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ +static inline bool put_devmap_managed_page(struct page *page) { return false; } - -static inline void put_devmap_managed_page(struct page *page) -{ -} -#endif /* CONFIG_DEV_PAGEMAP_OPS */ - -static inline bool is_device_private_page(const struct page *page) -{ - return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && - IS_ENABLED(CONFIG_DEVICE_PRIVATE) && - is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PRIVATE; -} - -static inline bool is_pci_p2pdma_page(const struct page *page) -{ - return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && - IS_ENABLED(CONFIG_PCI_P2PDMA) && - is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; -} +#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ @@ -1167,9 +1171,6 @@ static inline void get_page(struct page *page) } bool __must_check try_grab_page(struct page *page, unsigned int flags); -struct page *try_grab_compound_head(struct page *page, int refs, - unsigned int flags); - static inline __must_check bool try_get_page(struct page *page) { @@ -1224,16 +1225,11 @@ static inline void put_page(struct page *page) struct folio *folio = page_folio(page); /* - * For devmap managed pages we need to catch refcount transition from - * 2 to 1, when refcount reach one it means the page is free and we - * need to inform the device driver through callback. See - * include/linux/memremap.h and HMM for details. + * For some devmap managed pages we need to catch refcount transition + * from 2 to 1: */ - if (page_is_devmap_managed(&folio->page)) { - put_devmap_managed_page(&folio->page); + if (put_devmap_managed_page(&folio->page)) return; - } - folio_put(folio); } @@ -1263,10 +1259,9 @@ static inline void put_page(struct page *page) * applications that don't have huge page reference counts, this won't be an * issue. * - * Locking: the lockless algorithm described in page_cache_get_speculative() - * and page_cache_gup_pin_speculative() provides safe operation for - * get_user_pages and page_mkclean and other calls that race to set up page - * table entries. + * Locking: the lockless algorithm described in folio_try_get_rcu() + * provides safe operation for get_user_pages(), page_mkclean() and + * other calls that race to set up page table entries. */ #define GUP_PIN_COUNTING_BIAS (1U << 10) @@ -1277,70 +1272,11 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty); void unpin_user_pages(struct page **pages, unsigned long npages); -/** - * page_maybe_dma_pinned - Report if a page is pinned for DMA. - * @page: The page. - * - * This function checks if a page has been pinned via a call to - * a function in the pin_user_pages() family. - * - * For non-huge pages, the return value is partially fuzzy: false is not fuzzy, - * because it means "definitely not pinned for DMA", but true means "probably - * pinned for DMA, but possibly a false positive due to having at least - * GUP_PIN_COUNTING_BIAS worth of normal page references". - * - * False positives are OK, because: a) it's unlikely for a page to get that many - * refcounts, and b) all the callers of this routine are expected to be able to - * deal gracefully with a false positive. - * - * For huge pages, the result will be exactly correct. That's because we have - * more tracking data available: the 3rd struct page in the compound page is - * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS - * scheme). - * - * For more information, please see Documentation/core-api/pin_user_pages.rst. - * - * Return: True, if it is likely that the page has been "dma-pinned". - * False, if the page is definitely not dma-pinned. - */ -static inline bool page_maybe_dma_pinned(struct page *page) -{ - if (hpage_pincount_available(page)) - return compound_pincount(page) > 0; - - /* - * page_ref_count() is signed. If that refcount overflows, then - * page_ref_count() returns a negative value, and callers will avoid - * further incrementing the refcount. - * - * Here, for that overflow case, use the signed bit to count a little - * bit higher via unsigned math, and thus still get an accurate result. - */ - return ((unsigned int)page_ref_count(compound_head(page))) >= - GUP_PIN_COUNTING_BIAS; -} - static inline bool is_cow_mapping(vm_flags_t flags) { return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } -/* - * This should most likely only be called during fork() to see whether we - * should break the cow immediately for a page on the src mm. - */ -static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, - struct page *page) -{ - if (!is_cow_mapping(vma->vm_flags)) - return false; - - if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) - return false; - - return page_maybe_dma_pinned(page); -} - #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif @@ -1506,11 +1442,18 @@ static inline u8 page_kasan_tag(const struct page *page) static inline void page_kasan_tag_set(struct page *page, u8 tag) { - if (kasan_enabled()) { - tag ^= 0xff; - page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); - page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; - } + unsigned long old_flags, flags; + + if (!kasan_enabled()) + return; + + tag ^= 0xff; + old_flags = READ_ONCE(page->flags); + do { + flags = old_flags; + flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); + flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); } static inline void page_kasan_tag_reset(struct page *page) @@ -1578,6 +1521,74 @@ static inline unsigned long folio_pfn(struct folio *folio) return page_to_pfn(&folio->page); } +static inline atomic_t *folio_pincount_ptr(struct folio *folio) +{ + return &folio_page(folio, 1)->compound_pincount; +} + +/** + * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA. + * @folio: The folio. + * + * This function checks if a folio has been pinned via a call to + * a function in the pin_user_pages() family. + * + * For small folios, the return value is partially fuzzy: false is not fuzzy, + * because it means "definitely not pinned for DMA", but true means "probably + * pinned for DMA, but possibly a false positive due to having at least + * GUP_PIN_COUNTING_BIAS worth of normal folio references". + * + * False positives are OK, because: a) it's unlikely for a folio to + * get that many refcounts, and b) all the callers of this routine are + * expected to be able to deal gracefully with a false positive. + * + * For large folios, the result will be exactly correct. That's because + * we have more tracking data available: the compound_pincount is used + * instead of the GUP_PIN_COUNTING_BIAS scheme. + * + * For more information, please see Documentation/core-api/pin_user_pages.rst. + * + * Return: True, if it is likely that the page has been "dma-pinned". + * False, if the page is definitely not dma-pinned. + */ +static inline bool folio_maybe_dma_pinned(struct folio *folio) +{ + if (folio_test_large(folio)) + return atomic_read(folio_pincount_ptr(folio)) > 0; + + /* + * folio_ref_count() is signed. If that refcount overflows, then + * folio_ref_count() returns a negative value, and callers will avoid + * further incrementing the refcount. + * + * Here, for that overflow case, use the sign bit to count a little + * bit higher via unsigned math, and thus still get an accurate result. + */ + return ((unsigned int)folio_ref_count(folio)) >= + GUP_PIN_COUNTING_BIAS; +} + +static inline bool page_maybe_dma_pinned(struct page *page) +{ + return folio_maybe_dma_pinned(page_folio(page)); +} + +/* + * This should most likely only be called during fork() to see whether we + * should break the cow immediately for a page on the src mm. + */ +static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, + struct page *page) +{ + if (!is_cow_mapping(vma->vm_flags)) + return false; + + if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) + return false; + + return page_maybe_dma_pinned(page); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) @@ -1592,6 +1603,11 @@ static inline bool is_pinnable_page(struct page *page) } #endif +static inline bool folio_is_pinnable(struct folio *folio) +{ + return is_pinnable_page(&folio->page); +} + static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); @@ -1741,7 +1757,6 @@ static inline void *folio_address(const struct folio *folio) } extern void *page_rmapping(struct page *page); -extern struct anon_vma *page_anon_vma(struct page *page); extern pgoff_t __page_file_index(struct page *page); /* @@ -1847,7 +1862,6 @@ extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int generic_error_remove_page(struct address_space *mapping, struct page *page); -int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, @@ -1909,10 +1923,6 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, long pin_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - unsigned int gup_flags, struct page **pages, int *locked); -long pin_user_pages_locked(unsigned long start, unsigned long nr_pages, - unsigned int gup_flags, struct page **pages, int *locked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, @@ -1932,9 +1942,6 @@ int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); -extern void do_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); - bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); @@ -2446,7 +2453,6 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) } extern void __init pagecache_init(void); -extern void __init free_area_init_memoryless_node(int nid); extern void free_initmem(void); /* @@ -2619,7 +2625,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx, const char *); + struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); @@ -2918,13 +2924,11 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO * and return without waiting upon it */ -#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */ #define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ -#define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_ANON 0x8000 /* don't do file mappings */ @@ -3144,10 +3148,12 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) } #endif +#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse); int vmemmap_remap_alloc(unsigned long start, unsigned long end, unsigned long reuse, gfp_t gfp_mask); +#endif void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, @@ -3237,6 +3243,7 @@ enum mf_action_page_type { MF_MSG_BUDDY, MF_MSG_DAX, MF_MSG_UNSPLIT_THP, + MF_MSG_DIFFERENT_PAGE_SIZE, MF_MSG_UNKNOWN, }; @@ -3365,14 +3372,14 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) #ifdef CONFIG_ANON_VMA_NAME int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, const char *name); + unsigned long len_in, + struct anon_vma_name *anon_name); #else static inline int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, const char *name) { + unsigned long len_in, struct anon_vma_name *anon_name) { return 0; } #endif -#endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_api.h b/include/linux/mm_api.h new file mode 100644 index 000000000000..a5ace2b198b8 --- /dev/null +++ b/include/linux/mm_api.h @@ -0,0 +1 @@ +#include <linux/mm.h> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index b725839dfe71..ac32125745ab 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -99,7 +99,8 @@ void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); - list_add(&folio->lru, &lruvec->lists[lru]); + if (lru != LRU_UNEVICTABLE) + list_add(&folio->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list(struct page *page, @@ -115,6 +116,7 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); + /* This is not expected to be used on LRU_UNEVICTABLE */ list_add_tail(&folio->lru, &lruvec->lists[lru]); } @@ -127,8 +129,11 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page, static __always_inline void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) { - list_del(&folio->lru); - update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio), + enum lru_list lru = folio_lru_list(folio); + + if (lru != LRU_UNEVICTABLE) + list_del(&folio->lru); + update_lru_size(lruvec, lru, folio_zonenum(folio), -folio_nr_pages(folio)); } @@ -140,50 +145,91 @@ static __always_inline void del_page_from_lru_list(struct page *page, #ifdef CONFIG_ANON_VMA_NAME /* - * mmap_lock should be read-locked when calling vma_anon_name() and while using - * the returned pointer. + * mmap_lock should be read-locked when calling anon_vma_name(). Caller should + * either keep holding the lock while using the returned pointer or it should + * raise anon_vma_name refcount before releasing the lock. */ -extern const char *vma_anon_name(struct vm_area_struct *vma); +extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); +extern struct anon_vma_name *anon_vma_name_alloc(const char *name); +extern void anon_vma_name_free(struct kref *kref); -/* - * mmap_lock should be read-locked for orig_vma->vm_mm. - * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be - * isolated. - */ -extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma); +/* mmap_lock should be read-locked */ +static inline void anon_vma_name_get(struct anon_vma_name *anon_name) +{ + if (anon_name) + kref_get(&anon_name->kref); +} -/* - * mmap_lock should be write-locked or vma should have been isolated under - * write-locked mmap_lock protection. - */ -extern void free_vma_anon_name(struct vm_area_struct *vma); +static inline void anon_vma_name_put(struct anon_vma_name *anon_name) +{ + if (anon_name) + kref_put(&anon_name->kref, anon_vma_name_free); +} -/* mmap_lock should be read-locked */ -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) +static inline +struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name) +{ + /* Prevent anon_name refcount saturation early on */ + if (kref_read(&anon_name->kref) < REFCOUNT_MAX) { + anon_vma_name_get(anon_name); + return anon_name; + + } + return anon_vma_name_alloc(anon_name->name); +} + +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) { - const char *vma_name = vma_anon_name(vma); + struct anon_vma_name *anon_name = anon_vma_name(orig_vma); - /* either both NULL, or pointers to same string */ - if (vma_name == name) + if (anon_name) + new_vma->anon_name = anon_vma_name_reuse(anon_name); +} + +static inline void free_anon_vma_name(struct vm_area_struct *vma) +{ + /* + * Not using anon_vma_name because it generates a warning if mmap_lock + * is not held, which might be the case here. + */ + if (!vma->vm_file) + anon_vma_name_put(vma->anon_name); +} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) +{ + if (anon_name1 == anon_name2) return true; - return name && vma_name && !strcmp(name, vma_name); + return anon_name1 && anon_name2 && + !strcmp(anon_name1->name, anon_name2->name); } + #else /* CONFIG_ANON_VMA_NAME */ -static inline const char *vma_anon_name(struct vm_area_struct *vma) +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) { return NULL; } -static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma) {} -static inline void free_vma_anon_name(struct vm_area_struct *vma) {} -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) + +static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {} +static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {} +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) {} +static inline void free_anon_vma_name(struct vm_area_struct *vma) {} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) { return true; } + #endif /* CONFIG_ANON_VMA_NAME */ static inline void init_tlb_flush_pending(struct mm_struct *mm) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9db36dc5d4cf..8834e38c06a4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -85,7 +85,16 @@ struct page { * lruvec->lru_lock. Sometimes used as a generic list * by the page owner. */ - struct list_head lru; + union { + struct list_head lru; + /* Or, for the Unevictable "LRU list" slot */ + struct { + /* Always even, to negate PageTail */ + void *__filler; + /* Count page's or folio's mlocks */ + unsigned int mlock_count; + }; + }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; pgoff_t index; /* Our offset within mapping. */ @@ -126,11 +135,14 @@ struct page { unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; + atomic_t compound_pincount; +#ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ +#endif }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ - atomic_t hpage_pinned_refcount; + unsigned long _compound_pad_2; /* For both global and memcg */ struct list_head deferred_list; }; @@ -241,7 +253,13 @@ struct folio { struct { /* public: */ unsigned long flags; - struct list_head lru; + union { + struct list_head lru; + struct { + void *__filler; + unsigned int mlock_count; + }; + }; struct address_space *mapping; pgoff_t index; void *private; @@ -261,6 +279,7 @@ static_assert(sizeof(struct page) == sizeof(struct folio)); static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); +FOLIO_MATCH(mapping, mapping); FOLIO_MATCH(compound_head, lru); FOLIO_MATCH(index, index); FOLIO_MATCH(private, private); @@ -284,7 +303,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page) static inline atomic_t *compound_pincount_ptr(struct page *page) { - return &page[2].hpage_pinned_refcount; + return &page[1].compound_pincount; } /* @@ -415,7 +434,10 @@ struct vm_area_struct { struct rb_node rb; unsigned long rb_subtree_last; } shared; - /* Serialized by mmap_sem. */ + /* + * Serialized by mmap_sem. Never use directly because it is + * valid only when vm_file is NULL. Use anon_vma_name instead. + */ struct anon_vma_name *anon_name; }; @@ -630,7 +652,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA u32 pasid; #endif } __randomize_layout; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aed44e9b5d89..962b14d403e8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -83,6 +83,17 @@ static inline bool is_migrate_movable(int mt) return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE; } +/* + * Check whether a migratetype can be merged with another migratetype. + * + * It is only mergeable when it can fall back to other migratetypes for + * allocation. See fallbacks[MIGRATE_TYPES][3] in page_alloc.c. + */ +static inline bool migratetype_is_mergeable(int mt) +{ + return mt < MIGRATE_PCPTYPES; +} + #define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) @@ -211,6 +222,9 @@ enum node_stat_item { #ifdef CONFIG_SWAP NR_SWAPCACHE, #endif +#ifdef CONFIG_NUMA_BALANCING + PGPROMOTE_SUCCESS, /* promote successfully */ +#endif NR_VM_NODE_STAT_ITEMS }; @@ -339,6 +353,7 @@ enum zone_watermarks { WMARK_MIN, WMARK_LOW, WMARK_HIGH, + WMARK_PROMO, NR_WMARK }; @@ -920,12 +935,6 @@ typedef struct pglist_data { #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) -#ifdef CONFIG_FLATMEM -#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) -#else -#define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) -#endif -#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) @@ -1101,7 +1110,6 @@ static inline struct pglist_data *NODE_DATA(int nid) { return &contig_page_data; } -#define NODE_MEM_MAP(nid) mem_map #else /* CONFIG_NUMA */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4bb71979a8fd..5da5d990ff58 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -211,7 +211,7 @@ struct css_device_id { kernel_ulong_t driver_data; }; -#define ACPI_ID_LEN 9 +#define ACPI_ID_LEN 16 struct acpi_device_id { __u8 id[ACPI_ID_LEN]; diff --git a/include/linux/mutex_api.h b/include/linux/mutex_api.h new file mode 100644 index 000000000000..85ab9491e13e --- /dev/null +++ b/include/linux/mutex_api.h @@ -0,0 +1 @@ +#include <linux/mutex.h> diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h index 6b3267b49755..ed42bd5f639f 100644 --- a/include/linux/net/intel/i40e_client.h +++ b/include/linux/net/intel/i40e_client.h @@ -26,11 +26,6 @@ struct i40e_client_version { u8 rsvd; }; -enum i40e_client_state { - __I40E_CLIENT_NULL, - __I40E_CLIENT_REGISTERED -}; - enum i40e_client_instance_state { __I40E_CLIENT_INSTANCE_NONE, __I40E_CLIENT_INSTANCE_OPENED, @@ -190,11 +185,6 @@ struct i40e_client { const struct i40e_client_ops *ops; /* client ops provided by the client */ }; -static inline bool i40e_client_is_registered(struct i40e_client *client) -{ - return test_bit(__I40E_CLIENT_REGISTERED, &client->state); -} - void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client); void i40e_client_device_unregister(struct i40e_info *ldev); diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h index 1289593411d3..1c1332e4df26 100644 --- a/include/linux/net/intel/iidc.h +++ b/include/linux/net/intel/iidc.h @@ -32,6 +32,8 @@ enum iidc_rdma_protocol { }; #define IIDC_MAX_USER_PRIORITY 8 +#define IIDC_MAX_DSCP_MAPPING 64 +#define IIDC_DSCP_PFC_MODE 0x1 /* Struct to hold per RDMA Qset info */ struct iidc_rdma_qset_params { @@ -60,6 +62,8 @@ struct iidc_qos_params { u8 vport_relative_bw; u8 vport_priority_type; u8 num_tc; + u8 pfc_mode; + u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; }; struct iidc_event { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3213c7227b59..cd7a597c55b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -28,6 +28,7 @@ #include <linux/prefetch.h> #include <asm/cache.h> #include <asm/byteorder.h> +#include <asm/local.h> #include <linux/percpu.h> #include <linux/rculist.h> @@ -194,6 +195,14 @@ struct net_device_stats { unsigned long tx_compressed; }; +/* per-cpu stats, allocated on demand. + * Try to fit them in a single cache line, for dev_get_stats() sake. + */ +struct net_device_core_stats { + local_t rx_dropped; + local_t tx_dropped; + local_t rx_nohandler; +} __aligned(4 * sizeof(local_t)); #include <linux/cache.h> #include <linux/skbuff.h> @@ -1735,12 +1744,8 @@ enum netdev_ml_priv_type { * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead * - * @rx_dropped: Dropped packets by core network, - * do not use this in drivers - * @tx_dropped: Dropped packets by core network, + * @core_stats: core networking counters, * do not use this in drivers - * @rx_nohandler: nohandler dropped packets by core network on - * inactive devices, do not use this in drivers * @carrier_up_count: Number of times the carrier has been up * @carrier_down_count: Number of times the carrier has been down * @@ -1894,6 +1899,8 @@ enum netdev_ml_priv_type { * @garp_port: GARP * @mrp_port: MRP * + * @dm_private: Drop monitor private + * * @dev: Class/net/name entry * @sysfs_groups: Space for optional device, statistics and wireless * sysfs groups @@ -1948,6 +1955,9 @@ enum netdev_ml_priv_type { * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. * @watchdog_dev_tracker: refcount tracker used by watchdog. + * @dev_registered_tracker: tracker for reference held while + * registered + * @offload_xstats_l3: L3 HW stats for this netdevice. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2020,9 +2030,7 @@ struct net_device { struct net_device_stats stats; /* not used by modern drivers */ - atomic_long_t rx_dropped; - atomic_long_t tx_dropped; - atomic_long_t rx_nohandler; + struct net_device_core_stats __percpu *core_stats; /* Stats to monitor link on/off, flapping */ atomic_t carrier_up_count; @@ -2158,7 +2166,7 @@ struct net_device { struct netdev_queue *_tx ____cacheline_aligned_in_smp; unsigned int num_tx_queues; unsigned int real_num_tx_queues; - struct Qdisc *qdisc; + struct Qdisc __rcu *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; @@ -2234,7 +2242,9 @@ struct net_device { #if IS_ENABLED(CONFIG_MRP) struct mrp_port __rcu *mrp_port; #endif - +#if IS_ENABLED(CONFIG_NET_DROP_MONITOR) + struct dm_hw_stat_delta __rcu *dm_private; +#endif struct device dev; const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_rx_queue_group; @@ -2282,6 +2292,8 @@ struct net_device { u8 dev_addr_shadow[MAX_ADDR_LEN]; netdevice_tracker linkwatch_dev_tracker; netdevice_tracker watchdog_dev_tracker; + netdevice_tracker dev_registered_tracker; + struct rtnl_hw_stats64 *offload_xstats_l3; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2548,6 +2560,7 @@ struct packet_type { struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); + struct net *af_packet_net; void *af_packet_priv; struct list_head list; }; @@ -2721,6 +2734,10 @@ enum netdev_cmd { NETDEV_CVLAN_FILTER_DROP_INFO, NETDEV_SVLAN_FILTER_PUSH_INFO, NETDEV_SVLAN_FILTER_DROP_INFO, + NETDEV_OFFLOAD_XSTATS_ENABLE, + NETDEV_OFFLOAD_XSTATS_DISABLE, + NETDEV_OFFLOAD_XSTATS_REPORT_USED, + NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); @@ -2771,6 +2788,42 @@ struct netdev_notifier_pre_changeaddr_info { const unsigned char *dev_addr; }; +enum netdev_offload_xstats_type { + NETDEV_OFFLOAD_XSTATS_TYPE_L3 = 1, +}; + +struct netdev_notifier_offload_xstats_info { + struct netdev_notifier_info info; /* must be first */ + enum netdev_offload_xstats_type type; + + union { + /* NETDEV_OFFLOAD_XSTATS_REPORT_DELTA */ + struct netdev_notifier_offload_xstats_rd *report_delta; + /* NETDEV_OFFLOAD_XSTATS_REPORT_USED */ + struct netdev_notifier_offload_xstats_ru *report_used; + }; +}; + +int netdev_offload_xstats_enable(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct netlink_ext_ack *extack); +int netdev_offload_xstats_disable(struct net_device *dev, + enum netdev_offload_xstats_type type); +bool netdev_offload_xstats_enabled(const struct net_device *dev, + enum netdev_offload_xstats_type type); +int netdev_offload_xstats_get(struct net_device *dev, + enum netdev_offload_xstats_type type, + struct rtnl_hw_stats64 *stats, bool *used, + struct netlink_ext_ack *extack); +void +netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *rd, + const struct rtnl_hw_stats64 *stats); +void +netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *ru); +void netdev_offload_xstats_push_delta(struct net_device *dev, + enum netdev_offload_xstats_type type, + const struct rtnl_hw_stats64 *stats); + static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { @@ -3613,7 +3666,6 @@ static inline unsigned int get_netdev_rx_queue_index( } #endif -#define DEFAULT_MAX_NUM_RSS_QUEUES (8) int netif_get_num_default_rss_queues(void); enum skb_free_reason { @@ -3668,8 +3720,8 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); -int netif_rx_ni(struct sk_buff *skb); -int netif_rx_any_context(struct sk_buff *skb); +int __netif_rx(struct sk_buff *skb); + int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list_internal(struct list_head *head); @@ -3791,13 +3843,42 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } +struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev); + +static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev) +{ + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ + struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); + + if (likely(p)) + return this_cpu_ptr(p); + + return netdev_core_stats_alloc(dev); +} + +#define DEV_CORE_STATS_INC(FIELD) \ +static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ +{ \ + struct net_device_core_stats *p; \ + \ + preempt_disable(); \ + p = dev_core_stats(dev); \ + \ + if (p) \ + local_inc(&p->FIELD); \ + preempt_enable(); \ +} +DEV_CORE_STATS_INC(rx_dropped) +DEV_CORE_STATS_INC(tx_dropped) +DEV_CORE_STATS_INC(rx_nohandler) + static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, const bool check_mtu) { if (skb_orphan_frags(skb, GFP_ATOMIC) || unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) { - atomic_long_inc(&dev->rx_dropped); + dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); return NET_RX_DROP; } @@ -3816,14 +3897,7 @@ extern unsigned int netdev_budget_usecs; /* Called by rtnetlink.c:rtnl_unlock() */ void netdev_run_todo(void); -/** - * dev_put - release reference to device - * @dev: network device - * - * Release reference to device to allow it to be freed. - * Try using dev_put_track() instead. - */ -static inline void dev_put(struct net_device *dev) +static inline void __dev_put(struct net_device *dev) { if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT @@ -3834,14 +3908,7 @@ static inline void dev_put(struct net_device *dev) } } -/** - * dev_hold - get reference to device - * @dev: network device - * - * Hold reference to device to keep it from being freed. - * Try using dev_hold_track() instead. - */ -static inline void dev_hold(struct net_device *dev) +static inline void __dev_hold(struct net_device *dev) { if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT @@ -3852,11 +3919,24 @@ static inline void dev_hold(struct net_device *dev) } } +static inline void __netdev_tracker_alloc(struct net_device *dev, + netdevice_tracker *tracker, + gfp_t gfp) +{ +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); +#endif +} + +/* netdev_tracker_alloc() can upgrade a prior untracked reference + * taken by dev_get_by_name()/dev_get_by_index() to a tracked one. + */ static inline void netdev_tracker_alloc(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { #ifdef CONFIG_NET_DEV_REFCNT_TRACKER - ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); + refcount_dec(&dev->refcnt_tracker.no_tracker); + __netdev_tracker_alloc(dev, tracker, gfp); #endif } @@ -3872,8 +3952,8 @@ static inline void dev_hold_track(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { if (dev) { - dev_hold(dev); - netdev_tracker_alloc(dev, tracker, gfp); + __dev_hold(dev); + __netdev_tracker_alloc(dev, tracker, gfp); } } @@ -3882,10 +3962,34 @@ static inline void dev_put_track(struct net_device *dev, { if (dev) { netdev_tracker_free(dev, tracker); - dev_put(dev); + __dev_put(dev); } } +/** + * dev_hold - get reference to device + * @dev: network device + * + * Hold reference to device to keep it from being freed. + * Try using dev_hold_track() instead. + */ +static inline void dev_hold(struct net_device *dev) +{ + dev_hold_track(dev, NULL, GFP_ATOMIC); +} + +/** + * dev_put - release reference to device + * @dev: network device + * + * Release reference to device to allow it to be freed. + * Try using dev_put_track() instead. + */ +static inline void dev_put(struct net_device *dev) +{ + dev_put_track(dev, NULL); +} + static inline void dev_replace_track(struct net_device *odev, struct net_device *ndev, netdevice_tracker *tracker, @@ -3894,11 +3998,11 @@ static inline void dev_replace_track(struct net_device *odev, if (odev) netdev_tracker_free(odev, tracker); - dev_hold(ndev); - dev_put(odev); + __dev_hold(ndev); + __dev_put(odev); if (ndev) - netdev_tracker_alloc(ndev, tracker, gfp); + __netdev_tracker_alloc(ndev, tracker, gfp); } /* Carrier loss detection, dial on demand. The functions netif_carrier_on @@ -4601,6 +4705,8 @@ int skb_csum_hwoffload_help(struct sk_buff *skb, struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); +struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, + netdev_features_t features, __be16 type); struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, netdev_features_t features); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 15e71bfff726..c2c6f332fb90 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -379,6 +379,7 @@ struct nf_nat_hook { unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct, enum nf_nat_manip_type mtype, enum ip_conntrack_dir dir); + void (*remove_nat_bysrc)(struct nf_conn *ct); }; extern const struct nf_nat_hook __rcu *nf_nat_hook; diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index a28aa289afdc..c3bdb4370938 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -300,26 +300,22 @@ union pptp_ctrl_union { struct PptpSetLinkInfo setlink; }; -extern int -(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, - struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned int protoff, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq); - -extern int -(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, - struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned int protoff, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq); - -extern void -(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *exp_orig, - struct nf_conntrack_expect *exp_reply); - -extern void -(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct, - struct nf_conntrack_expect *exp); +struct nf_nat_pptp_hook { + int (*outbound)(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + int (*inbound)(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + void (*exp_gre)(struct nf_conntrack_expect *exp_orig, + struct nf_conntrack_expect *exp_reply); + void (*expectfn)(struct nf_conn *ct, + struct nf_conntrack_expect *exp); +}; +extern const struct nf_nat_pptp_hook __rcu *nf_nat_pptp_hook; #endif /* _NF_CONNTRACK_PPTP_H */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index b4dd96e4dc8d..e6487a691136 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -101,7 +101,11 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, nf_hook_state_init(&state, NF_NETDEV_EGRESS, NFPROTO_NETDEV, dev, NULL, NULL, dev_net(dev), NULL); + + /* nf assumes rcu_read_lock, not just read_lock_bh */ + rcu_read_lock(); ret = nf_hook_slow(skb, &state, e, 0); + rcu_read_unlock(); if (ret == 1) { return skb; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b46c39d98bbd..614f22213e21 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -244,6 +244,13 @@ struct netfs_cache_ops { int (*prepare_write)(struct netfs_cache_resources *cres, loff_t *_start, size_t *_len, loff_t i_size, bool no_space_allocated_yet); + + /* Query the occupancy of the cache in a region, returning where the + * next chunk of data starts and how long it is. + */ + int (*query_occupancy)(struct netfs_cache_resources *cres, + loff_t start, size_t len, size_t granularity, + loff_t *_data_start, size_t *_data_len); }; struct readahead_control; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1ec631838af9..bda1c385cffb 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -135,15 +135,6 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, extack->cookie_len = sizeof(cookie); } -static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, - u32 cookie) -{ - if (!extack) - return; - memcpy(extack->cookie, &cookie, sizeof(cookie)); - extack->cookie_len = sizeof(cookie); -} - void netlink_kernel_release(struct sock *sk); int __netlink_change_ngroups(struct sock *sk, unsigned int groups); int netlink_change_ngroups(struct sock *sk, unsigned int groups); diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 0dc7ad38a0da..b06375e88e58 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -36,14 +36,6 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc memcpy(target->data, source->data, source->size); } - -/* - * This is really a general kernel constant, but since nothing like - * this is defined in the kernel headers, I have to do it here. - */ -#define NFS_OFFSET_MAX ((__s64)((~(__u64)0) >> 1)) - - enum nfs3_stable_how { NFS_UNSTABLE = 0, NFS_DATA_SYNC = 1, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 00835bacd236..784120cc217e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -61,7 +61,9 @@ struct nfs_access_entry { struct rb_node rb_node; struct list_head lru; - const struct cred * cred; + kuid_t fsuid; + kgid_t fsgid; + struct group_info *group_info; __u32 mask; struct rcu_head rcu_head; }; @@ -105,6 +107,7 @@ struct nfs_open_dir_context { __u64 dup_cookie; pgoff_t page_index; signed char duped; + bool eof; }; /* @@ -395,7 +398,7 @@ extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fa extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); -extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *, const struct cred *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct user_namespace *, struct inode *, int); extern int nfs_open(struct inode *, struct file *); @@ -532,8 +535,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); -extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, - bool may_block); +extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, + u32 *mask, bool may_block); /* * linux/fs/nfs/symlink.c @@ -580,7 +583,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page *page); -extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); +int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 77b2dba27bbb..6aa2a200676a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -138,6 +138,7 @@ struct nfs_server { struct nlm_host *nlm_host; /* NLM client handle */ struct nfs_iostats __percpu *io_stats; /* I/O statistics */ atomic_long_t writeback; /* number of writeback pages */ + unsigned int write_congested;/* flag set when writeback gets too high */ unsigned int flags; /* various flags */ /* The following are for internal use only. Also see uapi/linux/nfs_mount.h */ @@ -266,6 +267,8 @@ struct nfs_server { #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) #define NFS_CAP_LGOPEN (1U << 5) +#define NFS_CAP_CASE_INSENSITIVE (1U << 6) +#define NFS_CAP_CASE_PRESERVING (1U << 7) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) @@ -282,5 +285,5 @@ struct nfs_server { #define NFS_CAP_COPY_NOTIFY (1U << 27) #define NFS_CAP_XATTR (1U << 28) #define NFS_CAP_READ_PLUS (1U << 29) - +#define NFS_CAP_FS_LOCATIONS (1U << 30) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 967a0098f0a9..728cb0c1f0b6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1194,6 +1194,8 @@ struct nfs4_server_caps_res { u32 has_links; u32 has_symlinks; u32 fh_expire_type; + u32 case_insensitive; + u32 case_preserving; }; #define NFS4_PATHNAME_MAXCOMPONENTS 512 @@ -1737,7 +1739,7 @@ struct nfs_rpc_ops { struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, struct nfs_fattr *); - int (*access) (struct inode *, struct nfs_access_entry *); + int (*access) (struct inode *, struct nfs_access_entry *, const struct cred *); int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); int (*create) (struct inode *, struct dentry *, @@ -1795,6 +1797,7 @@ struct nfs_rpc_ops { struct nfs_server *(*create_server)(struct fs_context *); struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); + int (*discover_trunking)(struct nfs_server *, struct nfs_fh *); }; /* diff --git a/include/linux/node.h b/include/linux/node.h index bb21fd631b16..40d641a8bfb0 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -99,19 +99,20 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) -void link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context); +void register_memory_blocks_under_node(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context); #else -static inline void link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context) +static inline void register_memory_blocks_under_node(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context) { } #endif extern void unregister_node(struct node *node); #ifdef CONFIG_NUMA +extern void node_dev_init(void); /* Core of the node registration - only memory hotplug should use this */ extern int __register_one_node(int nid); @@ -128,8 +129,8 @@ static inline int register_one_node(int nid) error = __register_one_node(nid); if (error) return error; - /* link memory sections under this node */ - link_mem_sections(nid, start_pfn, end_pfn, MEMINIT_EARLY); + register_memory_blocks_under_node(nid, start_pfn, end_pfn, + MEMINIT_EARLY); } return error; @@ -149,6 +150,9 @@ extern void register_hugetlbfs_with_node(node_registration_func_t doregister, node_registration_func_t unregister); #endif #else +static inline void node_dev_init(void) +{ +} static inline int __register_one_node(int nid) { return 0; @@ -181,4 +185,9 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, #define to_node(device) container_of(device, struct node, dev) +static inline bool node_is_toptier(int node) +{ + return node_state(node, N_CPU); +} + #endif /* _LINUX_NODE_H_ */ diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index cb909edb76c4..5358a5facdee 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -721,7 +721,7 @@ enum { * * Fields with static values for the port. Initialized by the * port_info struct supplied to the registration call. - * @port_num: NVME-FC transport subsytem port number + * @port_num: NVME-FC transport subsystem port number * @node_name: FC WWNN for the port * @port_name: FC WWPN for the port * @private: pointer to memory allocated alongside the local port diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 959e0bd9a913..75470159a194 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -12,6 +12,7 @@ #define NVME_TCP_DISC_PORT 8009 #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 +#define NVME_TCP_MIN_MAXH2CDATA 4096 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 855dd9b3e84b..9dbc3ef4daf7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -43,6 +43,12 @@ enum nvme_ctrl_type { NVME_CTRL_ADMIN = 3, /* Administrative controller */ }; +enum nvme_dctype { + NVME_DCTYPE_NOT_REPORTED = 0, + NVME_DCTYPE_DDC = 1, /* Direct Discovery Controller */ + NVME_DCTYPE_CDC = 2, /* Central Discovery Controller */ +}; + /* Address Family codes for Discovery Log Page entry ADRFAM field */ enum { NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */ @@ -320,7 +326,9 @@ struct nvme_id_ctrl { __le16 icdoff; __u8 ctrattr; __u8 msdbd; - __u8 rsvd1804[244]; + __u8 rsvd1804[2]; + __u8 dctype; + __u8 rsvd1807[241]; struct nvme_id_power_state psd[32]; __u8 vs[1024]; }; @@ -1636,6 +1644,7 @@ enum { NVME_SC_HOST_ABORTED_CMD = 0x371, NVME_SC_CRD = 0x1800, + NVME_SC_MORE = 0x2000, NVME_SC_DNR = 0x4000, }; diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 98efb7b5660d..c9a3ac9efeaa 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -70,7 +70,8 @@ struct nvmem_keepout { * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. * @priv: User context passed to read/write callbacks. - * @wp-gpio: Write protect pin + * @wp-gpio: Write protect pin + * @ignore_wp: Write Protect pin is managed by the provider. * * Note: A default "nvmem<id>" name will be assigned to the device if * no name is specified in its configuration. In such case "<id>" is @@ -92,6 +93,7 @@ struct nvmem_config { enum nvmem_type type; bool read_only; bool root_only; + bool ignore_wp; struct device_node *of_node; bool no_of_node; nvmem_reg_read_t reg_read; diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 4669632bd72b..f1221d11f8e5 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -4,6 +4,7 @@ #include <linux/compiler.h> #include <linux/limits.h> +#include <linux/const.h> /* * We need to compute the minimum and maximum values representable in a given @@ -118,81 +119,94 @@ static inline bool __must_check __must_check_overflow(bool overflow) })) /** - * array_size() - Calculate size of 2-dimensional array. - * - * @a: dimension one - * @b: dimension two + * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX * - * Calculates size of 2-dimensional array: @a * @b. + * @factor1: first factor + * @factor2: second factor * - * Returns: number of bytes needed to represent the array or SIZE_MAX on - * overflow. + * Returns: calculate @factor1 * @factor2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t array_size(size_t a, size_t b) +static inline size_t __must_check size_mul(size_t factor1, size_t factor2) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) + if (check_mul_overflow(factor1, factor2, &bytes)) return SIZE_MAX; return bytes; } /** - * array3_size() - Calculate size of 3-dimensional array. + * size_add() - Calculate size_t addition with saturation at SIZE_MAX * - * @a: dimension one - * @b: dimension two - * @c: dimension three - * - * Calculates size of 3-dimensional array: @a * @b * @c. + * @addend1: first addend + * @addend2: second addend * - * Returns: number of bytes needed to represent the array or SIZE_MAX on - * overflow. + * Returns: calculate @addend1 + @addend2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) +static inline size_t __must_check size_add(size_t addend1, size_t addend2) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) - return SIZE_MAX; - if (check_mul_overflow(bytes, c, &bytes)) + if (check_add_overflow(addend1, addend2, &bytes)) return SIZE_MAX; return bytes; } -/* - * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for - * struct_size() below. +/** + * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX + * + * @minuend: value to subtract from + * @subtrahend: value to subtract from @minuend + * + * Returns: calculate @minuend - @subtrahend, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. For + * composition with the size_add() and size_mul() helpers, neither + * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX). + * The lvalue must be size_t to avoid implicit type conversion. */ -static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) +static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) { size_t bytes; - if (check_mul_overflow(a, b, &bytes)) - return SIZE_MAX; - if (check_add_overflow(bytes, c, &bytes)) + if (minuend == SIZE_MAX || subtrahend == SIZE_MAX || + check_sub_overflow(minuend, subtrahend, &bytes)) return SIZE_MAX; return bytes; } /** - * struct_size() - Calculate size of structure with trailing array. - * @p: Pointer to the structure. - * @member: Name of the array member. - * @count: Number of elements in the array. + * array_size() - Calculate size of 2-dimensional array. * - * Calculates size of memory needed for structure @p followed by an - * array of @count number of @member elements. + * @a: dimension one + * @b: dimension two * - * Return: number of bytes needed or SIZE_MAX on overflow. + * Calculates size of 2-dimensional array: @a * @b. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. */ -#define struct_size(p, member, count) \ - __ab_c_size(count, \ - sizeof(*(p)->member) + __must_be_array((p)->member),\ - sizeof(*(p))) +#define array_size(a, b) size_mul(a, b) + +/** + * array3_size() - Calculate size of 3-dimensional array. + * + * @a: dimension one + * @b: dimension two + * @c: dimension three + * + * Calculates size of 3-dimensional array: @a * @b * @c. + * + * Returns: number of bytes needed to represent the array or SIZE_MAX on + * overflow. + */ +#define array3_size(a, b, c) size_mul(size_mul(a, b), c) /** * flex_array_size() - Calculate size of a flexible array member @@ -208,7 +222,25 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) * Return: number of bytes needed or SIZE_MAX on overflow. */ #define flex_array_size(p, member, count) \ - array_size(count, \ - sizeof(*(p)->member) + __must_be_array((p)->member)) + __builtin_choose_expr(__is_constexpr(count), \ + (count) * sizeof(*(p)->member) + __must_be_array((p)->member), \ + size_mul(count, sizeof(*(p)->member) + __must_be_array((p)->member))) + +/** + * struct_size() - Calculate size of structure with trailing flexible array. + * + * @p: Pointer to the structure. + * @member: Name of the array member. + * @count: Number of elements in the array. + * + * Calculates size of memory needed for structure @p followed by an + * array of @count number of @member elements. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define struct_size(p, member, count) \ + __builtin_choose_expr(__is_constexpr(count), \ + sizeof(*(p)) + flex_array_size(p, member, count), \ + size_add(sizeof(*(p)), flex_array_size(p, member, count))) #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1c3b6e5c8bfd..9d8eeaa67d05 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -190,13 +190,81 @@ enum pageflags { #ifndef __GENERATING_BOUNDS_H +#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP +DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON, + hugetlb_free_vmemmap_enabled_key); + +static __always_inline bool hugetlb_free_vmemmap_enabled(void) +{ + return static_branch_maybe(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON, + &hugetlb_free_vmemmap_enabled_key); +} + +/* + * If the feature of freeing some vmemmap pages associated with each HugeTLB + * page is enabled, the head vmemmap page frame is reused and all of the tail + * vmemmap addresses map to the head vmemmap page frame (furture details can + * refer to the figure at the head of the mm/hugetlb_vmemmap.c). In other + * words, there are more than one page struct with PG_head associated with each + * HugeTLB page. We __know__ that there is only one head page struct, the tail + * page structs with PG_head are fake head page structs. We need an approach + * to distinguish between those two different types of page structs so that + * compound_head() can return the real head page struct when the parameter is + * the tail page struct but with PG_head. + * + * The page_fixed_fake_head() returns the real head page struct if the @page is + * fake page head, otherwise, returns @page which can either be a true page + * head or tail. + */ +static __always_inline const struct page *page_fixed_fake_head(const struct page *page) +{ + if (!hugetlb_free_vmemmap_enabled()) + return page; + + /* + * Only addresses aligned with PAGE_SIZE of struct page may be fake head + * struct page. The alignment check aims to avoid access the fields ( + * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly) + * cold cacheline in some cases. + */ + if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && + test_bit(PG_head, &page->flags)) { + /* + * We can safely access the field of the @page[1] with PG_head + * because the @page is a compound page composed with at least + * two contiguous pages. + */ + unsigned long head = READ_ONCE(page[1].compound_head); + + if (likely(head & 1)) + return (const struct page *)(head - 1); + } + return page; +} +#else +static inline const struct page *page_fixed_fake_head(const struct page *page) +{ + return page; +} + +static inline bool hugetlb_free_vmemmap_enabled(void) +{ + return false; +} +#endif + +static __always_inline int page_is_fake_head(struct page *page) +{ + return page_fixed_fake_head(page) != page; +} + static inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); if (unlikely(head & 1)) return head - 1; - return (unsigned long)page; + return (unsigned long)page_fixed_fake_head(page); } #define compound_head(page) ((typeof(page))_compound_head(page)) @@ -231,12 +299,13 @@ static inline unsigned long _compound_head(const struct page *page) static __always_inline int PageTail(struct page *page) { - return READ_ONCE(page->compound_head) & 1; + return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } static __always_inline int PageCompound(struct page *page) { - return test_bit(PG_head, &page->flags) || PageTail(page); + return test_bit(PG_head, &page->flags) || + READ_ONCE(page->compound_head) & 1; } #define PAGE_POISON_PATTERN -1l @@ -412,7 +481,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) -PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) +PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) @@ -695,7 +764,20 @@ static inline bool test_set_page_writeback(struct page *page) return set_page_writeback(page); } -__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) +static __always_inline bool folio_test_head(struct folio *folio) +{ + return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY)); +} + +static __always_inline int PageHead(struct page *page) +{ + PF_POISONED_CHECK(page); + return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); +} + +__SETPAGEFLAG(Head, head, PF_ANY) +__CLEARPAGEFLAG(Head, head, PF_ANY) +CLEARPAGEFLAG(Head, head, PF_ANY) /** * folio_test_large() - Does this folio contain more than one page? @@ -918,7 +1000,7 @@ PAGE_TYPE_OPS(Guard, guard) extern bool is_free_buddy_page(struct page *page); -__PAGEFLAG(Isolated, isolated, PF_ANY); +PAGEFLAG(Isolated, isolated, PF_ANY); #ifdef CONFIG_MMU #define __PG_MLOCKED (1UL << PG_mlocked) diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 38cace1da7b6..01e16c7696ec 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -26,6 +26,9 @@ void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud); +void __page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd); static inline void page_table_check_alloc(struct page *page, unsigned int order) { @@ -100,6 +103,16 @@ static inline void page_table_check_pud_set(struct mm_struct *mm, __page_table_check_pud_set(mm, addr, pudp, pud); } +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_clear_range(mm, addr, pmd); +} + #else static inline void page_table_check_alloc(struct page *page, unsigned int order) @@ -143,5 +156,11 @@ static inline void page_table_check_pud_set(struct mm_struct *mm, { } +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ +} + #endif /* CONFIG_PAGE_TABLE_CHECK */ #endif /* __LINUX_PAGE_TABLE_CHECK_H */ diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 973fd731a520..83c7248053a1 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -37,8 +37,11 @@ extern unsigned int pageblock_order; #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -/* Huge pages are a constant size */ -#define pageblock_order HUGETLB_PAGE_ORDER +/* + * Huge pages are a constant size, but don't exceed the maximum allocation + * granularity. + */ +#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER - 1) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 270bf5136c34..a8d0b327b066 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -18,6 +18,120 @@ struct folio_batch; +unsigned long invalidate_mapping_pages(struct address_space *mapping, + pgoff_t start, pgoff_t end); + +static inline void invalidate_remote_inode(struct inode *inode) +{ + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) + invalidate_mapping_pages(inode->i_mapping, 0, -1); +} +int invalidate_inode_pages2(struct address_space *mapping); +int invalidate_inode_pages2_range(struct address_space *mapping, + pgoff_t start, pgoff_t end); +int write_inode_now(struct inode *, int sync); +int filemap_fdatawrite(struct address_space *); +int filemap_flush(struct address_space *); +int filemap_fdatawait_keep_errors(struct address_space *mapping); +int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); +int filemap_fdatawait_range_keep_errors(struct address_space *mapping, + loff_t start_byte, loff_t end_byte); + +static inline int filemap_fdatawait(struct address_space *mapping) +{ + return filemap_fdatawait_range(mapping, 0, LLONG_MAX); +} + +bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); +int filemap_write_and_wait_range(struct address_space *mapping, + loff_t lstart, loff_t lend); +int __filemap_fdatawrite_range(struct address_space *mapping, + loff_t start, loff_t end, int sync_mode); +int filemap_fdatawrite_range(struct address_space *mapping, + loff_t start, loff_t end); +int filemap_check_errors(struct address_space *mapping); +void __filemap_set_wb_err(struct address_space *mapping, int err); +int filemap_fdatawrite_wbc(struct address_space *mapping, + struct writeback_control *wbc); + +static inline int filemap_write_and_wait(struct address_space *mapping) +{ + return filemap_write_and_wait_range(mapping, 0, LLONG_MAX); +} + +/** + * filemap_set_wb_err - set a writeback error on an address_space + * @mapping: mapping in which to set writeback error + * @err: error to be set in mapping + * + * When writeback fails in some way, we must record that error so that + * userspace can be informed when fsync and the like are called. We endeavor + * to report errors on any file that was open at the time of the error. Some + * internal callers also need to know when writeback errors have occurred. + * + * When a writeback error occurs, most filesystems will want to call + * filemap_set_wb_err to record the error in the mapping so that it will be + * automatically reported whenever fsync is called on the file. + */ +static inline void filemap_set_wb_err(struct address_space *mapping, int err) +{ + /* Fastpath for common case of no error */ + if (unlikely(err)) + __filemap_set_wb_err(mapping, err); +} + +/** + * filemap_check_wb_err - has an error occurred since the mark was sampled? + * @mapping: mapping to check for writeback errors + * @since: previously-sampled errseq_t + * + * Grab the errseq_t value from the mapping, and see if it has changed "since" + * the given value was sampled. + * + * If it has then report the latest error set, otherwise return 0. + */ +static inline int filemap_check_wb_err(struct address_space *mapping, + errseq_t since) +{ + return errseq_check(&mapping->wb_err, since); +} + +/** + * filemap_sample_wb_err - sample the current errseq_t to test for later errors + * @mapping: mapping to be sampled + * + * Writeback errors are always reported relative to a particular sample point + * in the past. This function provides those sample points. + */ +static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) +{ + return errseq_sample(&mapping->wb_err); +} + +/** + * file_sample_sb_err - sample the current errseq_t to test for later errors + * @file: file pointer to be sampled + * + * Grab the most current superblock-level errseq_t value for the given + * struct file. + */ +static inline errseq_t file_sample_sb_err(struct file *file) +{ + return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err); +} + +/* + * Flush file data before changing attributes. Caller must hold any locks + * required to prevent further writes to this file until we're done setting + * flags. + */ +static inline int inode_drain_writes(struct inode *inode) +{ + inode_dio_wait(inode); + return filemap_write_and_wait(inode->i_mapping); +} + static inline bool mapping_empty(struct address_space *mapping) { return xa_empty(&mapping->i_pages); @@ -192,9 +306,14 @@ static inline void mapping_set_large_folios(struct address_space *mapping) __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } +/* + * Large folio support currently depends on THP. These dependencies are + * being worked on but are not yet fixed. + */ static inline bool mapping_large_folio_support(struct address_space *mapping) { - return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); + return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } static inline int filemap_nr_thps(struct address_space *mapping) @@ -212,7 +331,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping) if (!mapping_large_folio_support(mapping)) atomic_inc(&mapping->nr_thps); #else - WARN_ON_ONCE(1); + WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); #endif } @@ -222,7 +341,7 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) if (!mapping_large_folio_support(mapping)) atomic_dec(&mapping->nr_thps); #else - WARN_ON_ONCE(1); + WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); #endif } @@ -283,16 +402,6 @@ static inline struct inode *folio_inode(struct folio *folio) return folio->mapping->host; } -static inline bool page_cache_add_speculative(struct page *page, int count) -{ - return folio_ref_try_add_rcu((struct folio *)page, count); -} - -static inline bool page_cache_get_speculative(struct page *page) -{ - return page_cache_add_speculative(page, 1); -} - /** * folio_attach_private - Attach private data to a folio. * @folio: Folio to attach data to. @@ -423,6 +532,24 @@ static inline struct folio *filemap_get_folio(struct address_space *mapping, } /** + * filemap_lock_folio - Find and lock a folio. + * @mapping: The address_space to search. + * @index: The page index. + * + * Looks up the page cache entry at @mapping & @index. If a folio is + * present, it is returned locked with an increased refcount. + * + * Context: May sleep. + * Return: A folio or %NULL if there is no folio in the cache for this + * index. Will not return a shadow, swap or DAX entry. + */ +static inline struct folio *filemap_lock_folio(struct address_space *mapping, + pgoff_t index) +{ + return __filemap_get_folio(mapping, index, FGP_LOCK, 0); +} + +/** * find_get_page - find and get a page reference * @mapping: the address_space to search * @offset: the page index @@ -594,13 +721,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, pgoff_t end, unsigned int nr_pages, struct page **pages); -static inline unsigned find_get_pages(struct address_space *mapping, - pgoff_t *start, unsigned int nr_pages, - struct page **pages) -{ - return find_get_pages_range(mapping, start, (pgoff_t)-1, nr_pages, - pages); -} unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, @@ -636,15 +756,15 @@ extern int read_cache_pages(struct address_space *mapping, struct list_head *pages, filler_t *filler, void *data); static inline struct page *read_mapping_page(struct address_space *mapping, - pgoff_t index, void *data) + pgoff_t index, struct file *file) { - return read_cache_page(mapping, index, NULL, data); + return read_cache_page(mapping, index, NULL, file); } static inline struct folio *read_mapping_folio(struct address_space *mapping, - pgoff_t index, void *data) + pgoff_t index, struct file *file) { - return read_cache_folio(mapping, index, NULL, data); + return read_cache_folio(mapping, index, NULL, file); } /* @@ -713,6 +833,17 @@ static inline loff_t folio_file_pos(struct folio *folio) return page_file_offset(&folio->page); } +/* + * Get the offset in PAGE_SIZE (even for hugetlb folios). + * (TODO: hugetlb folios should have ->index in PAGE_SIZE) + */ +static inline pgoff_t folio_pgoff(struct folio *folio) +{ + if (unlikely(folio_test_hugetlb(folio))) + return hugetlb_basepage_index(&folio->page); + return folio->index; +} + extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, unsigned long address); @@ -878,8 +1009,7 @@ static inline void __set_page_dirty(struct page *page, { __folio_mark_dirty(page_folio(page), mapping, warn); } -void folio_account_cleaned(struct folio *folio, struct address_space *mapping, - struct bdi_writeback *wb); +void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); void __folio_cancel_dirty(struct folio *folio); static inline void folio_cancel_dirty(struct folio *folio) { @@ -893,6 +1023,7 @@ static inline void cancel_dirty_page(struct page *page) } bool folio_clear_dirty_for_io(struct folio *folio); bool clear_page_dirty_for_io(struct page *page); +void folio_invalidate(struct folio *folio, size_t offset, size_t length); int __must_check folio_write_one(struct folio *folio); static inline int __must_check write_one_page(struct page *page) { @@ -900,7 +1031,7 @@ static inline int __must_check write_one_page(struct page *page) } int __set_page_dirty_nobuffers(struct page *page); -int __set_page_dirty_no_writeback(struct page *page); +bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); void page_endio(struct page *page, bool is_write, int err); diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 6f7949b2fd8d..abeba356bc3f 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -2,7 +2,7 @@ #ifndef _LINUX_PART_STAT_H #define _LINUX_PART_STAT_H -#include <linux/genhd.h> +#include <linux/blkdev.h> #include <asm/local.h> struct disk_stats { diff --git a/include/linux/pci.h b/include/linux/pci.h index 8253a5413d7c..b957eeb89c7a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -668,6 +668,7 @@ struct pci_bus { struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; + unsigned int unsafe_warn:1; /* warned about RW1C config write */ }; #define to_pci_bus(n) container_of(n, struct pci_bus, dev) @@ -2166,7 +2167,8 @@ void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar); #ifdef CONFIG_PCI_IOV int pci_iov_virtfn_bus(struct pci_dev *dev, int id); int pci_iov_virtfn_devfn(struct pci_dev *dev, int id); - +int pci_iov_vf_id(struct pci_dev *dev); +void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver); int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); void pci_disable_sriov(struct pci_dev *dev); @@ -2194,6 +2196,18 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id) { return -ENOSYS; } + +static inline int pci_iov_vf_id(struct pci_dev *dev) +{ + return -ENOSYS; +} + +static inline void *pci_iov_get_pf_drvdata(struct pci_dev *dev, + struct pci_driver *pf_driver) +{ + return ERR_PTR(-EINVAL); +} + static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index aad54c666407..0178823ce8c2 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -60,6 +60,8 @@ #define PCI_CLASS_BRIDGE_EISA 0x0602 #define PCI_CLASS_BRIDGE_MC 0x0603 #define PCI_CLASS_BRIDGE_PCI 0x0604 +#define PCI_CLASS_BRIDGE_PCI_NORMAL 0x060400 +#define PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE 0x060401 #define PCI_CLASS_BRIDGE_PCMCIA 0x0605 #define PCI_CLASS_BRIDGE_NUBUS 0x0606 #define PCI_CLASS_BRIDGE_CARDBUS 0x0607 @@ -2529,11 +2531,16 @@ #define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff #define PCI_VENDOR_ID_HUAWEI 0x19e5 +#define PCI_DEVICE_ID_HUAWEI_ZIP_VF 0xa251 +#define PCI_DEVICE_ID_HUAWEI_SEC_VF 0xa256 +#define PCI_DEVICE_ID_HUAWEI_HPRE_VF 0xa259 #define PCI_VENDOR_ID_NETRONOME 0x19ee +#define PCI_DEVICE_ID_NETRONOME_NFP3800 0x3800 #define PCI_DEVICE_ID_NETRONOME_NFP4000 0x4000 #define PCI_DEVICE_ID_NETRONOME_NFP5000 0x5000 #define PCI_DEVICE_ID_NETRONOME_NFP6000 0x6000 +#define PCI_DEVICE_ID_NETRONOME_NFP3800_VF 0x3803 #define PCI_DEVICE_ID_NETRONOME_NFP6000_VF 0x6003 #define PCI_VENDOR_ID_QMI 0x1a32 @@ -2561,6 +2568,8 @@ #define PCI_VENDOR_ID_HYGON 0x1d94 +#define PCI_VENDOR_ID_FUNGIBLE 0x1dad + #define PCI_VENDOR_ID_HXT 0x1dbf #define PCI_VENDOR_ID_TEKRAM 0x1de1 diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index add077a81b21..266eb26fb029 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -31,8 +31,7 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, unsigned int mode); -void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported, - struct phylink_link_state *state); +void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 2512e2f9cd4e..0407a38b470a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -26,6 +26,8 @@ */ /* Event uses a 64bit counter */ #define ARMPMU_EVT_64BIT 1 +/* Event uses a 47bit counter */ +#define ARMPMU_EVT_47BIT 2 #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h new file mode 100644 index 000000000000..46f9b6fe306e --- /dev/null +++ b/include/linux/perf/riscv_pmu.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 SiFive + * Copyright (C) 2018 Andes Technology Corporation + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + */ + +#ifndef _ASM_RISCV_PERF_EVENT_H +#define _ASM_RISCV_PERF_EVENT_H + +#include <linux/perf_event.h> +#include <linux/ptrace.h> +#include <linux/interrupt.h> + +#ifdef CONFIG_RISCV_PMU + +/* + * The RISCV_MAX_COUNTERS parameter should be specified. + */ + +#define RISCV_MAX_COUNTERS 64 +#define RISCV_OP_UNSUPP (-EOPNOTSUPP) +#define RISCV_PMU_PDEV_NAME "riscv-pmu" +#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy" + +#define RISCV_PMU_STOP_FLAG_RESET 1 + +struct cpu_hw_events { + /* currently enabled events */ + int n_events; + /* Counter overflow interrupt */ + int irq; + /* currently enabled events */ + struct perf_event *events[RISCV_MAX_COUNTERS]; + /* currently enabled hardware counters */ + DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS); + /* currently enabled firmware counters */ + DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS); +}; + +struct riscv_pmu { + struct pmu pmu; + char *name; + + irqreturn_t (*handle_irq)(int irq_num, void *dev); + + int num_counters; + u64 (*ctr_read)(struct perf_event *event); + int (*ctr_get_idx)(struct perf_event *event); + int (*ctr_get_width)(int idx); + void (*ctr_clear_idx)(struct perf_event *event); + void (*ctr_start)(struct perf_event *event, u64 init_val); + void (*ctr_stop)(struct perf_event *event, unsigned long flag); + int (*event_map)(struct perf_event *event, u64 *config); + + struct cpu_hw_events __percpu *hw_events; + struct hlist_node node; +}; + +#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) +unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); +int riscv_pmu_event_set_period(struct perf_event *event); +uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); +u64 riscv_pmu_event_update(struct perf_event *event); +#ifdef CONFIG_RISCV_PMU_LEGACY +void riscv_pmu_legacy_skip_init(void); +#else +static inline void riscv_pmu_legacy_skip_init(void) {}; +#endif +struct riscv_pmu *riscv_pmu_alloc(void); + +#endif /* CONFIG_RISCV_PMU */ + +#endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 117f230bcdfd..af97dd427501 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -693,18 +693,6 @@ struct perf_event { u64 total_time_running; u64 tstamp; - /* - * timestamp shadows the actual context timing but it can - * be safely used in NMI interrupt context. It reflects the - * context time as it was when the event was last scheduled in, - * or when ctx_sched_in failed to schedule the event because we - * run out of PMC. - * - * ctx_time already accounts for ctx->timestamp. Therefore to - * compute ctx_time for a sample, simply add perf_clock(). - */ - u64 shadow_ctx_time; - struct perf_event_attr attr; u16 header_size; u16 id_header_size; @@ -852,6 +840,7 @@ struct perf_event_context { */ u64 time; u64 timestamp; + u64 timeoffset; /* * These fields let us detect when two contexts have both @@ -875,7 +864,7 @@ struct perf_event_context { #define PERF_NR_CONTEXTS 4 /** - * struct perf_event_cpu_context - per cpu event context structure + * struct perf_cpu_context - per cpu event context structure */ struct perf_cpu_context { struct perf_event_context ctx; @@ -934,6 +923,8 @@ struct bpf_perf_event_data_kern { struct perf_cgroup_info { u64 time; u64 timestamp; + u64 timeoffset; + int active; }; struct perf_cgroup { diff --git a/include/linux/perf_event_api.h b/include/linux/perf_event_api.h new file mode 100644 index 000000000000..c2fd6048b790 --- /dev/null +++ b/include/linux/perf_event_api.h @@ -0,0 +1 @@ +#include <linux/perf_event.h> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index bc8713a76e03..f4f4077b97aa 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } +#define pte_index pte_index #ifndef pmd_index static inline unsigned long pmd_index(unsigned long address) diff --git a/include/linux/pgtable_api.h b/include/linux/pgtable_api.h new file mode 100644 index 000000000000..ff367a4ba8c4 --- /dev/null +++ b/include/linux/pgtable_api.h @@ -0,0 +1 @@ +#include <linux/pgtable.h> diff --git a/include/linux/phy.h b/include/linux/phy.h index 6de8d7a90d78..36ca2b5c2253 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -87,8 +87,8 @@ extern const int phy_10gbit_features_array[1]; * * @PHY_INTERFACE_MODE_NA: Not Applicable - don't touch * @PHY_INTERFACE_MODE_INTERNAL: No interface, MAC and PHY combined - * @PHY_INTERFACE_MODE_MII: Median-independent interface - * @PHY_INTERFACE_MODE_GMII: Gigabit median-independent interface + * @PHY_INTERFACE_MODE_MII: Media-independent interface + * @PHY_INTERFACE_MODE_GMII: Gigabit media-independent interface * @PHY_INTERFACE_MODE_SGMII: Serial gigabit media-independent interface * @PHY_INTERFACE_MODE_TBI: Ten Bit Interface * @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface @@ -1578,6 +1578,7 @@ int genphy_update_link(struct phy_device *phydev); int genphy_read_lpa(struct phy_device *phydev); int genphy_read_status_fixed(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); +int genphy_read_master_slave(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_loopback(struct phy_device *phydev, bool enable); @@ -1661,7 +1662,7 @@ int phy_disable_interrupts(struct phy_device *phydev); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); +void phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_advertise_supported(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 713a0c928b7c..223781622b33 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -86,7 +86,6 @@ enum phylink_op_type { * @type: operation type of PHYLINK instance * @legacy_pre_march2020: driver has not been updated for March 2020 updates * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") - * @pcs_poll: MAC PCS cannot provide link change interrupt * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND @@ -100,7 +99,6 @@ struct phylink_config { struct device *dev; enum phylink_op_type type; bool legacy_pre_march2020; - bool pcs_poll; bool poll_fixed_state; bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, @@ -534,7 +532,6 @@ void phylink_generic_validate(struct phylink_config *config, struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); -void phylink_set_pcs(struct phylink *, struct phylink_pcs *pcs); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); @@ -582,7 +579,6 @@ int phylink_speed_up(struct phylink *pl); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); -void phylink_set_10g_modes(unsigned long *mask); void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c7e627503d2..07481bb87d4e 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -86,4 +86,9 @@ extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pid_idr_init(void); +static inline bool task_is_in_init_pid_ns(struct task_struct *tsk) +{ + return task_active_pid_ns(tsk) == &init_pid_ns; +} + #endif /* _LINUX_PID_NS_H */ diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h index 2b5676ff35be..f922a192fe58 100644 --- a/include/linux/platform_data/brcmfmac.h +++ b/include/linux/platform_data/brcmfmac.h @@ -178,7 +178,7 @@ struct brcmfmac_platform_data { void (*power_off)(void); char *fw_alternative_path; int device_count; - struct brcmfmac_pd_device devices[0]; + struct brcmfmac_pd_device devices[]; }; diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 95e7e5667291..c23554531961 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5708,7 +5708,7 @@ struct ec_response_typec_discovery { uint8_t svid_count; /* Number of SVIDs partner sent */ uint16_t reserved; uint32_t discovery_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ - struct svid_mode_info svids[0]; + struct svid_mode_info svids[]; } __ec_align1; /* USB Type-C commands for AP-controlled device policy. */ diff --git a/include/linux/platform_data/eth_ixp4xx.h b/include/linux/platform_data/eth_ixp4xx.h deleted file mode 100644 index 114b0940729f..000000000000 --- a/include/linux/platform_data/eth_ixp4xx.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PLATFORM_DATA_ETH_IXP4XX -#define __PLATFORM_DATA_ETH_IXP4XX - -#include <linux/types.h> - -#define IXP4XX_ETH_NPEA 0x00 -#define IXP4XX_ETH_NPEB 0x10 -#define IXP4XX_ETH_NPEC 0x20 - -/* Information about built-in Ethernet MAC interfaces */ -struct eth_plat_info { - u8 phy; /* MII PHY ID, 0 - 31 */ - u8 rxq; /* configurable, currently 0 - 31 only */ - u8 txreadyq; - u8 hwaddr[6]; - u8 npe; /* NPE instance used by this interface */ - bool has_mdio; /* If this instance has an MDIO bus */ -}; - -#endif diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h index 773daf7915a3..5df1ace6d2c9 100644 --- a/include/linux/platform_data/spi-s3c64xx.h +++ b/include/linux/platform_data/spi-s3c64xx.h @@ -16,7 +16,6 @@ struct platform_device; * struct s3c64xx_spi_csinfo - ChipSelect description * @fb_delay: Slave specific feedback delay. * Refer to FB_CLK_SEL register definition in SPI chapter. - * @line: Custom 'identity' of the CS line. * * This is per SPI-Slave Chipselect information. * Allocate and initialize one in machine init code and make the @@ -24,7 +23,6 @@ struct platform_device; */ struct s3c64xx_spi_csinfo { u8 fb_delay; - unsigned line; }; /** @@ -43,26 +41,16 @@ struct s3c64xx_spi_info { /** * s3c64xx_spi_set_platdata - SPI Controller configure callback by the board * initialization code. - * @cfg_gpio: Pointer to gpio setup function. * @src_clk_nr: Clock the SPI controller is to use to generate SPI clocks. * @num_cs: Number of elements in the 'cs' array. * * Call this from machine init code for each SPI Controller that * has some chips attached to it. */ -extern void s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); -extern void s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); -extern void s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs); +extern void s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs); /* defined by architecture to configure gpio */ extern int s3c64xx_spi0_cfg_gpio(void); -extern int s3c64xx_spi1_cfg_gpio(void); -extern int s3c64xx_spi2_cfg_gpio(void); extern struct s3c64xx_spi_info s3c64xx_spi0_pdata; -extern struct s3c64xx_spi_info s3c64xx_spi1_pdata; -extern struct s3c64xx_spi_info s3c64xx_spi2_pdata; #endif /*__SPI_S3C64XX_H */ diff --git a/include/linux/platform_data/wan_ixp4xx_hss.h b/include/linux/platform_data/wan_ixp4xx_hss.h deleted file mode 100644 index d525a0feb9e1..000000000000 --- a/include/linux/platform_data/wan_ixp4xx_hss.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PLATFORM_DATA_WAN_IXP4XX_HSS_H -#define __PLATFORM_DATA_WAN_IXP4XX_HSS_H - -#include <linux/types.h> - -/* Information about built-in HSS (synchronous serial) interfaces */ -struct hss_plat_info { - int (*set_clock)(int port, unsigned int clock_type); - int (*open)(int port, void *pdev, - void (*set_carrier_cb)(void *pdev, int carrier)); - void (*close)(int port, void *pdev); - u8 txreadyq; - u32 timer_freq; -}; - -#endif diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/spi-intel.h index 7f53a5c6f35e..a512ec37abbb 100644 --- a/include/linux/platform_data/x86/intel-spi.h +++ b/include/linux/platform_data/x86/spi-intel.h @@ -6,8 +6,8 @@ * Author: Mika Westerberg <mika.westerberg@linux.intel.com> */ -#ifndef INTEL_SPI_PDATA_H -#define INTEL_SPI_PDATA_H +#ifndef SPI_INTEL_PDATA_H +#define SPI_INTEL_PDATA_H enum intel_spi_type { INTEL_SPI_BYT = 1, @@ -19,11 +19,13 @@ enum intel_spi_type { /** * struct intel_spi_boardinfo - Board specific data for Intel SPI driver * @type: Type which this controller is compatible with - * @writeable: The chip is writeable + * @set_writeable: Try to make the chip writeable (optional) + * @data: Data to be passed to @set_writeable can be %NULL */ struct intel_spi_boardinfo { enum intel_spi_type type; - bool writeable; + bool (*set_writeable)(void __iomem *base, void *data); + void *data; }; -#endif /* INTEL_SPI_PDATA_H */ +#endif /* SPI_INTEL_PDATA_H */ diff --git a/include/linux/pm.h b/include/linux/pm.h index f7d2be686359..e65b3ab28377 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -770,11 +770,11 @@ extern int dpm_suspend_late(pm_message_t state); extern int dpm_suspend(pm_message_t state); extern int dpm_prepare(pm_message_t state); -extern void __suspend_report_result(const char *function, void *fn, int ret); +extern void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret); -#define suspend_report_result(fn, ret) \ +#define suspend_report_result(dev, fn, ret) \ do { \ - __suspend_report_result(__func__, fn, ret); \ + __suspend_report_result(__func__, dev, fn, ret); \ } while (0) extern int device_pm_wait_for_dev(struct device *sub, struct device *dev); @@ -814,7 +814,7 @@ static inline int dpm_suspend_start(pm_message_t state) return 0; } -#define suspend_report_result(fn, ret) do {} while (0) +#define suspend_report_result(dev, fn, ret) do {} while (0) static inline int device_pm_wait_for_dev(struct device *a, struct device *b) { diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9f09601c465a..2bff6a10095d 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -567,6 +567,10 @@ static inline void pm_runtime_disable(struct device *dev) * Allow the runtime PM autosuspend mechanism to be used for @dev whenever * requested (or "autosuspend" will be handled as direct runtime-suspend for * it). + * + * NOTE: It's important to undo this with pm_runtime_dont_use_autosuspend() + * at driver exit time unless your driver initially enabled pm_runtime + * with devm_pm_runtime_enable() (which handles it for you). */ static inline void pm_runtime_use_autosuspend(struct device *dev) { diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 5bbcd280bfd2..9cf126c3b27f 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -253,7 +253,7 @@ void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, u64 *newval, u64 *oldval); -void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); +int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); void posixtimer_rearm(struct kernel_siginfo *info); #endif diff --git a/include/linux/psi.h b/include/linux/psi.h index a70ca833c6d7..89784763d19e 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -6,6 +6,7 @@ #include <linux/psi_types.h> #include <linux/sched.h> #include <linux/poll.h> +#include <linux/cgroup-defs.h> struct seq_file; struct css_set; @@ -25,18 +26,17 @@ void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); - -#ifdef CONFIG_CGROUPS -int psi_cgroup_alloc(struct cgroup *cgrp); -void psi_cgroup_free(struct cgroup *cgrp); -void cgroup_move_task(struct task_struct *p, struct css_set *to); - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); +void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); + +#ifdef CONFIG_CGROUPS +int psi_cgroup_alloc(struct cgroup *cgrp); +void psi_cgroup_free(struct cgroup *cgrp); +void cgroup_move_task(struct task_struct *p, struct css_set *to); #endif #else /* CONFIG_PSI */ diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 516c0fe836fd..c7fe7c089718 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -142,8 +142,8 @@ struct psi_trigger { */ u64 last_event_time; - /* Refcounting to prevent premature destruction */ - struct kref refcount; + /* Deferred event(s) from previous ratelimit window */ + bool pending_event; }; struct psi_group { diff --git a/include/linux/pstore.h b/include/linux/pstore.h index eb93a54cff31..e97a8188f0fd 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -14,7 +14,7 @@ #include <linux/errno.h> #include <linux/kmsg_dump.h> #include <linux/mutex.h> -#include <linux/semaphore.h> +#include <linux/spinlock.h> #include <linux/time.h> #include <linux/types.h> @@ -87,7 +87,7 @@ struct pstore_record { * @owner: module which is responsible for this backend driver * @name: name of the backend driver * - * @buf_lock: semaphore to serialize access to @buf + * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer * @bufsize: size of @buf available for crash dump bytes (must match * smallest number of bytes available for writing to a @@ -178,7 +178,7 @@ struct pstore_info { struct module *owner; const char *name; - struct semaphore buf_lock; + spinlock_t buf_lock; char *buf; size_t bufsize; diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 9afd34a2d36c..fefa7790dc46 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -126,6 +126,17 @@ static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, return msgtype; } +/** + * ptp_msg_is_sync - Evaluates whether the given skb is a PTP Sync message + * @skb: packet buffer + * @type: type of the packet (see ptp_classify_raw()) + * + * This function evaluates whether the given skb is a PTP Sync message. + * + * Return: true if sync message, false otherwise + */ +bool ptp_msg_is_sync(struct sk_buff *skb, unsigned int type); + void __init ptp_classifier_init(void); #else static inline void ptp_classifier_init(void) @@ -148,5 +159,9 @@ static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, */ return PTP_MSGTYPE_SYNC; } +static inline bool ptp_msg_is_sync(struct sk_buff *skb, unsigned int type) +{ + return false; +} #endif #endif /* _PTP_CLASSIFY_H_ */ diff --git a/include/linux/ptrace_api.h b/include/linux/ptrace_api.h new file mode 100644 index 000000000000..26e7d275ad8d --- /dev/null +++ b/include/linux/ptrace_api.h @@ -0,0 +1 @@ +#include <linux/ptrace.h> diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 81cad9e1e412..f8335644a01a 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -63,13 +63,21 @@ enum qcom_scm_ice_cipher { extern bool qcom_scm_is_available(void); -extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); -extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); +extern int qcom_scm_set_cold_boot_addr(void *entry); +extern int qcom_scm_set_warm_boot_addr(void *entry); extern void qcom_scm_cpu_power_down(u32 flags); extern int qcom_scm_set_remote_state(u32 state, u32 id); +struct qcom_scm_pas_metadata { + void *ptr; + dma_addr_t phys; + ssize_t size; +}; + extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size); + size_t size, + struct qcom_scm_pas_metadata *ctx); +void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); extern int qcom_scm_pas_auth_and_reset(u32 peripheral); @@ -83,6 +91,7 @@ extern bool qcom_scm_restore_sec_cfg_available(void); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, u32 cp_nonpixel_start, u32 cp_nonpixel_size); @@ -107,6 +116,7 @@ extern bool qcom_scm_hdcp_available(void); extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); +extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, diff --git a/include/linux/quota.h b/include/linux/quota.h index 18ebd39c9487..fd692b4a41d5 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -91,7 +91,7 @@ extern bool qid_valid(struct kqid qid); * * When there is no mapping defined for the user-namespace, type, * qid tuple an invalid kqid is returned. Callers are expected to - * test for and handle handle invalid kqids being returned. + * test for and handle invalid kqids being returned. * Invalid kqids may be tested for using qid_valid(). */ static inline struct kqid make_kqid(struct user_namespace *from, diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 2a9fee8ddae3..51b811b62322 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -11,13 +11,20 @@ struct xor_block_template { struct xor_block_template *next; const char *name; int speed; - void (*do_2)(unsigned long, unsigned long *, unsigned long *); - void (*do_3)(unsigned long, unsigned long *, unsigned long *, - unsigned long *); - void (*do_4)(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); - void (*do_5)(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); + void (*do_2)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_3)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_4)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_5)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); }; #endif diff --git a/include/linux/random.h b/include/linux/random.h index c45b2693e51f..f673fbb838b3 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -1,9 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/linux/random.h - * - * Include file for the random number generator. - */ + #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H @@ -14,14 +10,10 @@ #include <uapi/linux/random.h> -struct random_ready_callback { - struct list_head list; - void (*func)(struct random_ready_callback *rdy); - struct module *owner; -}; +struct notifier_block; -extern void add_device_randomness(const void *, unsigned int); -extern void add_bootloader_randomness(const void *, unsigned int); +extern void add_device_randomness(const void *, size_t); +extern void add_bootloader_randomness(const void *, size_t); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) @@ -36,14 +28,24 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; extern void add_interrupt_randomness(int irq) __latent_entropy; +extern void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy); +#if IS_ENABLED(CONFIG_VMGENID) +extern void add_vmfork_randomness(const void *unique_vm_id, size_t size); +extern int register_random_vmfork_notifier(struct notifier_block *nb); +extern int unregister_random_vmfork_notifier(struct notifier_block *nb); +#else +static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; } +static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; } +#endif -extern void get_random_bytes(void *buf, int nbytes); +extern void get_random_bytes(void *buf, size_t nbytes); extern int wait_for_random_bytes(void); extern int __init rand_initialize(void); extern bool rng_is_initialized(void); -extern int add_random_ready_callback(struct random_ready_callback *rdy); -extern void del_random_ready_callback(struct random_ready_callback *rdy); -extern int __must_check get_random_bytes_arch(void *buf, int nbytes); +extern int register_random_ready_notifier(struct notifier_block *nb); +extern int unregister_random_ready_notifier(struct notifier_block *nb); +extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; @@ -87,7 +89,7 @@ static inline unsigned long get_random_canary(void) /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ -static inline int get_random_bytes_wait(void *buf, int nbytes) +static inline int get_random_bytes_wait(void *buf, size_t nbytes) { int ret = wait_for_random_bytes(); get_random_bytes(buf, nbytes); @@ -158,4 +160,9 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) } #endif +#ifdef CONFIG_SMP +extern int random_prepare_cpu(unsigned int cpu); +extern int random_online_cpu(unsigned int cpu); +#endif + #endif /* _LINUX_RANDOM_H */ diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index bebc911161b6..1468caf001c0 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -2,6 +2,7 @@ #ifndef _LINUX_RANDOMIZE_KSTACK_H #define _LINUX_RANDOMIZE_KSTACK_H +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET #include <linux/kernel.h> #include <linux/jump_label.h> #include <linux/percpu-defs.h> @@ -16,8 +17,20 @@ DECLARE_PER_CPU(u32, kstack_offset); * alignment. Also, since this use is being explicitly masked to a max of * 10 bits, stack-clash style attacks are unlikely. For more details see * "VLAs" in Documentation/process/deprecated.rst + * + * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently + * only with Clang and not GCC). Initializing the unused area on each syscall + * entry is expensive, and generating an implicit call to memset() may also be + * problematic (such as in noinstr functions). Therefore, if the compiler + * supports it (which it should if it initializes allocas), always use the + * "uninitialized" variant of the builtin. */ -void *__builtin_alloca(size_t size); +#if __has_builtin(__builtin_alloca_uninitialized) +#define __kstack_alloca __builtin_alloca_uninitialized +#else +#define __kstack_alloca __builtin_alloca +#endif + /* * Use, at most, 10 bits of entropy. We explicitly cap this to keep the * "VLA" from being unbounded (see above). 10 bits leaves enough room for @@ -36,7 +49,7 @@ void *__builtin_alloca(size_t size); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ + u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ asm volatile("" :: "r"(ptr) : "memory"); \ } \ @@ -50,5 +63,9 @@ void *__builtin_alloca(size_t size); raw_cpu_write(kstack_offset, offset); \ } \ } while (0) +#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ +#define add_random_kstack_offset() do { } while (0) +#define choose_random_kstack_offset(rand) do { } while (0) +#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 88b42eb46406..e7c39c200e2b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -84,7 +84,7 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -extern int rcu_scheduler_active __read_mostly; +extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); @@ -924,7 +924,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * * kvfree_rcu(ptr); * - * where @ptr is a pointer to kvfree(). + * where @ptr is the pointer to be freed by kvfree(). * * Please note, head-less way of freeing is permitted to * use from a context that has to follow might_sleep() diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 858f4d429946..5fed476f977f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void) rcu_tasks_qs(current, (preempt)); \ } while (0) -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +static inline int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return 0; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d669400..9c6cfb742504 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -19,7 +19,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); -int rcu_needs_cpu(u64 basem, u64 *nextevt); +int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); /* @@ -62,7 +62,7 @@ static inline void rcu_irq_exit_check_preempt(void) { } void exit_rcu(void); void rcu_scheduler_starting(void); -extern int rcu_scheduler_active __read_mostly; +extern int rcu_scheduler_active; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 61c56cca95c4..8052d34da782 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -47,11 +47,7 @@ static inline void prepare_to_rcuwait(struct rcuwait *w) rcu_assign_pointer(w->task, current); } -static inline void finish_rcuwait(struct rcuwait *w) -{ - rcu_assign_pointer(w->task, NULL); - __set_current_state(TASK_RUNNING); -} +extern void finish_rcuwait(struct rcuwait *w); #define rcuwait_wait_event(w, condition, state) \ ({ \ diff --git a/include/linux/rcuwait_api.h b/include/linux/rcuwait_api.h new file mode 100644 index 000000000000..f962e28544dd --- /dev/null +++ b/include/linux/rcuwait_api.h @@ -0,0 +1 @@ +#include <linux/rcuwait.h> diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index 60f3453be23e..9ca353ab712b 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -13,6 +13,8 @@ struct ref_tracker_dir { spinlock_t lock; unsigned int quarantine_avail; refcount_t untracked; + refcount_t no_tracker; + bool dead; struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ #endif @@ -26,7 +28,9 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, INIT_LIST_HEAD(&dir->quarantine); spin_lock_init(&dir->lock); dir->quarantine_avail = quarantine_count; + dir->dead = false; refcount_set(&dir->untracked, 1); + refcount_set(&dir->no_tracker, 1); stack_depot_init(); } diff --git a/include/linux/refcount_api.h b/include/linux/refcount_api.h new file mode 100644 index 000000000000..5f032589f568 --- /dev/null +++ b/include/linux/refcount_api.h @@ -0,0 +1 @@ +#include <linux/refcount.h> diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 22652e5fbc38..de81a94d7b30 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -237,6 +237,10 @@ typedef void (*regmap_unlock)(void *); * @reg_stride: The register address stride. Valid register addresses are a * multiple of this value. If set to 0, a value of 1 will be * used. + * @reg_downshift: The number of bits to downshift the register before + * performing any operations. + * @reg_base: Value to be added to every register address before performing any + * operation. * @pad_bits: Number of bits of padding between register and value. * @val_bits: Number of bits in a register value, mandatory. * @@ -360,6 +364,8 @@ struct regmap_config { int reg_bits; int reg_stride; + int reg_downshift; + unsigned int reg_base; int pad_bits; int val_bits; diff --git a/include/linux/resource.h b/include/linux/resource.h index bdf491cbcab7..4fdbc0c3f315 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -8,7 +8,5 @@ struct task_struct; void getrusage(struct task_struct *p, int who, struct rusage *ru); -int do_prlimit(struct task_struct *tsk, unsigned int resource, - struct rlimit *new_rlim, struct rlimit *old_rlim); #endif diff --git a/include/linux/rethook.h b/include/linux/rethook.h new file mode 100644 index 000000000000..c8ac1e5afcd1 --- /dev/null +++ b/include/linux/rethook.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Return hooking with list-based shadow stack. + */ +#ifndef _LINUX_RETHOOK_H +#define _LINUX_RETHOOK_H + +#include <linux/compiler.h> +#include <linux/freelist.h> +#include <linux/kallsyms.h> +#include <linux/llist.h> +#include <linux/rcupdate.h> +#include <linux/refcount.h> + +struct rethook_node; + +typedef void (*rethook_handler_t) (struct rethook_node *, void *, struct pt_regs *); + +/** + * struct rethook - The rethook management data structure. + * @data: The user-defined data storage. + * @handler: The user-defined return hook handler. + * @pool: The pool of struct rethook_node. + * @ref: The reference counter. + * @rcu: The rcu_head for deferred freeing. + * + * Don't embed to another data structure, because this is a self-destructive + * data structure when all rethook_node are freed. + */ +struct rethook { + void *data; + rethook_handler_t handler; + struct freelist_head pool; + refcount_t ref; + struct rcu_head rcu; +}; + +/** + * struct rethook_node - The rethook shadow-stack entry node. + * @freelist: The freelist, linked to struct rethook::pool. + * @rcu: The rcu_head for deferred freeing. + * @llist: The llist, linked to a struct task_struct::rethooks. + * @rethook: The pointer to the struct rethook. + * @ret_addr: The storage for the real return address. + * @frame: The storage for the frame pointer. + * + * You can embed this to your extended data structure to store any data + * on each entry of the shadow stack. + */ +struct rethook_node { + union { + struct freelist_node freelist; + struct rcu_head rcu; + }; + struct llist_node llist; + struct rethook *rethook; + unsigned long ret_addr; + unsigned long frame; +}; + +struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +void rethook_free(struct rethook *rh); +void rethook_add_node(struct rethook *rh, struct rethook_node *node); +struct rethook_node *rethook_try_get(struct rethook *rh); +void rethook_recycle(struct rethook_node *node); +void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount); +unsigned long rethook_find_ret_addr(struct task_struct *tsk, unsigned long frame, + struct llist_node **cur); + +/* Arch dependent code must implement arch_* and trampoline code */ +void arch_rethook_prepare(struct rethook_node *node, struct pt_regs *regs, bool mcount); +void arch_rethook_trampoline(void); + +/** + * is_rethook_trampoline() - Check whether the address is rethook trampoline + * @addr: The address to be checked + * + * Return true if the @addr is the rethook trampoline address. + */ +static inline bool is_rethook_trampoline(unsigned long addr) +{ + return addr == (unsigned long)dereference_symbol_descriptor(arch_rethook_trampoline); +} + +/* If the architecture needs to fixup the return address, implement it. */ +void arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr); + +/* Generic trampoline handler, arch code must prepare asm stub */ +unsigned long rethook_trampoline_handler(struct pt_regs *regs, + unsigned long frame); + +#ifdef CONFIG_RETHOOK +void rethook_flush_task(struct task_struct *tk); +#else +#define rethook_flush_task(tsk) do { } while (0) +#endif + +#endif + diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index c35f3962dc4f..373003ace639 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -308,6 +308,11 @@ static inline bool rfkill_blocked(struct rfkill *rfkill) return false; } +static inline bool rfkill_soft_blocked(struct rfkill *rfkill) +{ + return false; +} + static inline enum rfkill_type rfkill_find_type(const char *name) { return RFKILL_TYPE_ALL; diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e704b1a4c06c..17230c458341 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -11,6 +11,7 @@ #include <linux/rwsem.h> #include <linux/memcontrol.h> #include <linux/highmem.h> +#include <linux/pagemap.h> /* * The anon_vma heads a list of private "related" vmas, to scan if @@ -167,18 +168,19 @@ struct anon_vma *page_get_anon_vma(struct page *page); */ void page_move_anon_rmap(struct page *, struct vm_area_struct *); void page_add_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long, bool); + unsigned long address, bool compound); void do_page_add_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long, int); + unsigned long address, int flags); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long, bool); -void page_add_file_rmap(struct page *, bool); -void page_remove_rmap(struct page *, bool); - + unsigned long address, bool compound); +void page_add_file_rmap(struct page *, struct vm_area_struct *, + bool compound); +void page_remove_rmap(struct page *, struct vm_area_struct *, + bool compound); void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long); + unsigned long address); void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long); + unsigned long address); static inline void page_dup_rmap(struct page *page, bool compound) { @@ -188,11 +190,11 @@ static inline void page_dup_rmap(struct page *page, bool compound) /* * Called from mm/vmscan.c to handle paging out */ -int page_referenced(struct page *, int is_locked, +int folio_referenced(struct folio *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); -void try_to_migrate(struct page *page, enum ttu_flags flags); -void try_to_unmap(struct page *, enum ttu_flags flags); +void try_to_migrate(struct folio *folio, enum ttu_flags flags); +void try_to_unmap(struct folio *, enum ttu_flags flags); int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct page **pages, @@ -200,11 +202,13 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) -/* Look for migarion entries rather than present PTEs */ +/* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) struct page_vma_mapped_walk { - struct page *page; + unsigned long pfn; + unsigned long nr_pages; + pgoff_t pgoff; struct vm_area_struct *vma; unsigned long address; pmd_t *pmd; @@ -213,10 +217,30 @@ struct page_vma_mapped_walk { unsigned int flags; }; +#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ + struct page_vma_mapped_walk name = { \ + .pfn = page_to_pfn(_page), \ + .nr_pages = compound_nr(page), \ + .pgoff = page_to_pgoff(page), \ + .vma = _vma, \ + .address = _address, \ + .flags = _flags, \ + } + +#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ + struct page_vma_mapped_walk name = { \ + .pfn = folio_pfn(_folio), \ + .nr_pages = folio_nr_pages(_folio), \ + .pgoff = folio_pgoff(_folio), \ + .vma = _vma, \ + .address = _address, \ + .flags = _flags, \ + } + static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) { /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */ - if (pvmw->pte && !PageHuge(pvmw->page)) + if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma)) pte_unmap(pvmw->pte); if (pvmw->ptl) spin_unlock(pvmw->ptl); @@ -237,18 +261,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); */ int folio_mkclean(struct folio *); -/* - * called in munlock()/munmap() path to check for other vmas holding - * the page mlocked. - */ -void page_mlock(struct page *page); - -void remove_migration_ptes(struct page *old, struct page *new, bool locked); +void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); /* * Called by memory-failure.c to kill processes. */ -struct anon_vma *page_lock_anon_vma_read(struct page *page); +struct anon_vma *folio_lock_anon_vma_read(struct folio *folio); void page_unlock_anon_vma_read(struct anon_vma *anon_vma); int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); @@ -267,15 +285,15 @@ struct rmap_walk_control { * Return false if page table scanning in rmap_walk should be stopped. * Otherwise, return true. */ - bool (*rmap_one)(struct page *page, struct vm_area_struct *vma, + bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg); - int (*done)(struct page *page); - struct anon_vma *(*anon_lock)(struct page *page); + int (*done)(struct folio *folio); + struct anon_vma *(*anon_lock)(struct folio *folio); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; -void rmap_walk(struct page *page, struct rmap_walk_control *rwc); -void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc); +void rmap_walk_locked(struct folio *folio, const struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ @@ -283,7 +301,7 @@ void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); #define anon_vma_prepare(vma) (0) #define anon_vma_link(vma) do {} while (0) -static inline int page_referenced(struct page *page, int is_locked, +static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags) { @@ -291,7 +309,7 @@ static inline int page_referenced(struct page *page, int is_locked, return 0; } -static inline void try_to_unmap(struct page *page, enum ttu_flags flags) +static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags) { } diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bb9cb84114c1..7f970b16da3a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -134,4 +134,7 @@ extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, int (*vlan_fill)(struct sk_buff *skb, struct net_device *dev, u32 filter_mask)); + +extern void rtnl_offload_xstats_notify(struct net_device *dev); + #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index f9348769e558..efa5c324369a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -230,7 +230,7 @@ extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map * do { \ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ _down_write_nest_lock(sem, &(nest_lock)->dep_map); \ -} while (0); +} while (0) /* * Take/release a lock when not the owner will release it. diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 95df357ec009..dffeb8281c2d 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -28,14 +28,9 @@ struct seq_file; */ struct sbitmap_word { /** - * @depth: Number of bits being used in @word/@cleared - */ - unsigned long depth; - - /** * @word: word holding free bits */ - unsigned long word ____cacheline_aligned_in_smp; + unsigned long word; /** * @cleared: word holding cleared bits @@ -140,7 +135,7 @@ struct sbitmap_queue { /** * @min_shallow_depth: The minimum shallow depth which may be passed to - * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). + * sbitmap_queue_get_shallow() */ unsigned int min_shallow_depth; }; @@ -164,6 +159,14 @@ struct sbitmap_queue { int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node, bool round_robin, bool alloc_hint); +/* sbitmap internal helper */ +static inline unsigned int __map_depth(const struct sbitmap *sb, int index) +{ + if (index == sb->map_nr - 1) + return sb->depth - (index << sb->shift); + return 1U << sb->shift; +} + /** * sbitmap_free() - Free memory used by a &struct sbitmap. * @sb: Bitmap to free. @@ -251,7 +254,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, while (scanned < sb->depth) { unsigned long word; unsigned int depth = min_t(unsigned int, - sb->map[index].depth - nr, + __map_depth(sb, index) - nr, sb->depth - scanned); scanned += depth; @@ -460,7 +463,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int *offset); /** - * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct + * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. @@ -472,8 +475,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int shallow_depth); +int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct @@ -496,32 +499,6 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, } /** - * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct - * sbitmap_queue, limiting the depth used from each word. - * @sbq: Bitmap queue to allocate from. - * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to - * sbitmap_queue_clear()). - * @shallow_depth: The maximum number of bits to allocate from a single word. - * See sbitmap_get_shallow(). - * - * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after - * initializing @sbq. - * - * Return: Non-negative allocated bit number if successful, -1 otherwise. - */ -static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int *cpu, - unsigned int shallow_depth) -{ - int nr; - - *cpu = get_cpu(); - nr = __sbitmap_queue_get_shallow(sbq, shallow_depth); - put_cpu(); - return nr; -} - -/** * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the * minimum shallow depth that will be used. * @sbq: Bitmap queue in question. diff --git a/include/linux/sched.h b/include/linux/sched.h index 508b91d57470..4a6fdd2a679f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -619,10 +619,6 @@ struct sched_dl_entity { * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * - * @dl_boosted tells if we are boosted due to DI. If so we are - * outside bandwidth enforcement mechanism (but only until we - * exit the critical section); - * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. * @@ -942,6 +938,9 @@ struct task_struct { /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd_signal:1; #endif +#ifdef CONFIG_IOMMU_SVA + unsigned pasid_activated:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ @@ -1091,6 +1090,9 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; +#ifdef CONFIG_RT_DELAYED_SIGNALS + struct kernel_siginfo forced_info; +#endif unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; @@ -1485,6 +1487,9 @@ struct task_struct { #ifdef CONFIG_KRETPROBES struct llist_head kretprobe_instances; #endif +#ifdef CONFIG_RETHOOK + struct llist_head rethooks; +#endif #ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH /* @@ -1624,19 +1629,32 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -static inline unsigned int task_state_index(struct task_struct *tsk) +static inline unsigned int __task_state_index(unsigned int tsk_state, + unsigned int tsk_exit_state) { - unsigned int tsk_state = READ_ONCE(tsk->__state); - unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; + unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT; BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); if (tsk_state == TASK_IDLE) state = TASK_REPORT_IDLE; + /* + * We're lying here, but rather than expose a completely new task state + * to userspace, we can make this appear as if the task has gone through + * a regular rt_mutex_lock() call. + */ + if (tsk_state == TASK_RTLOCK_WAIT) + state = TASK_UNINTERRUPTIBLE; + return fls(state); } +static inline unsigned int task_state_index(struct task_struct *tsk) +{ + return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state); +} + static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; @@ -1684,7 +1702,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ @@ -1694,7 +1711,6 @@ extern struct pid *cad_pid; * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ -#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ @@ -2020,7 +2036,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) extern int __cond_resched(void); -#ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) DECLARE_STATIC_CALL(cond_resched, __cond_resched); @@ -2029,6 +2045,14 @@ static __always_inline int _cond_resched(void) return static_call_mod(cond_resched)(); } +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +extern int dynamic_cond_resched(void); + +static __always_inline int _cond_resched(void) +{ + return dynamic_cond_resched(); +} + #else static inline int _cond_resched(void) diff --git a/include/linux/sched/affinity.h b/include/linux/sched/affinity.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/affinity.h @@ -0,0 +1 @@ +#include <linux/sched.h> diff --git a/include/linux/sched/cond_resched.h b/include/linux/sched/cond_resched.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/cond_resched.h @@ -0,0 +1 @@ +#include <linux/sched.h> diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 1aff00b65f3c..7c83d4d5a971 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -6,6 +6,8 @@ * NORMAL/BATCH tasks. */ +#include <linux/sched.h> + #define MAX_DL_PRIO 0 static inline int dl_prio(int prio) diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index cc9f393e2a70..8c15abd67aed 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,54 +5,55 @@ #include <linux/init.h> #include <linux/tick.h> -enum hk_flags { - HK_FLAG_TIMER = 1, - HK_FLAG_RCU = (1 << 1), - HK_FLAG_MISC = (1 << 2), - HK_FLAG_SCHED = (1 << 3), - HK_FLAG_TICK = (1 << 4), - HK_FLAG_DOMAIN = (1 << 5), - HK_FLAG_WQ = (1 << 6), - HK_FLAG_MANAGED_IRQ = (1 << 7), - HK_FLAG_KTHREAD = (1 << 8), +enum hk_type { + HK_TYPE_TIMER, + HK_TYPE_RCU, + HK_TYPE_MISC, + HK_TYPE_SCHED, + HK_TYPE_TICK, + HK_TYPE_DOMAIN, + HK_TYPE_WQ, + HK_TYPE_MANAGED_IRQ, + HK_TYPE_KTHREAD, + HK_TYPE_MAX }; #ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); -extern int housekeeping_any_cpu(enum hk_flags flags); -extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); -extern bool housekeeping_enabled(enum hk_flags flags); -extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); -extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); +extern int housekeeping_any_cpu(enum hk_type type); +extern const struct cpumask *housekeeping_cpumask(enum hk_type type); +extern bool housekeeping_enabled(enum hk_type type); +extern void housekeeping_affine(struct task_struct *t, enum hk_type type); +extern bool housekeeping_test_cpu(int cpu, enum hk_type type); extern void __init housekeeping_init(void); #else -static inline int housekeeping_any_cpu(enum hk_flags flags) +static inline int housekeeping_any_cpu(enum hk_type type) { return smp_processor_id(); } -static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) +static inline const struct cpumask *housekeeping_cpumask(enum hk_type type) { return cpu_possible_mask; } -static inline bool housekeeping_enabled(enum hk_flags flags) +static inline bool housekeeping_enabled(enum hk_type type) { return false; } static inline void housekeeping_affine(struct task_struct *t, - enum hk_flags flags) { } + enum hk_type type) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ -static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) +static inline bool housekeeping_cpu(int cpu, enum hk_type type) { #ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overridden)) - return housekeeping_test_cpu(cpu, flags); + return housekeeping_test_cpu(cpu, type); #endif return true; } diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index aa5f09ca5bcf..a80356e9dc69 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include <linux/mm_types.h> #include <linux/gfp.h> #include <linux/sync_core.h> +#include <linux/ioasid.h> /* * Routines for handling mm_structs @@ -433,4 +434,29 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm) } #endif +#ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} + +/* Associate a PASID with an mm_struct: */ +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) +{ + mm->pasid = pasid; +} + +static inline void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} +#else +static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/linux/sched/posix-timers.h b/include/linux/sched/posix-timers.h new file mode 100644 index 000000000000..523a381d6c88 --- /dev/null +++ b/include/linux/sched/posix-timers.h @@ -0,0 +1 @@ +#include <linux/posix-timers.h> diff --git a/include/linux/sched/rseq_api.h b/include/linux/sched/rseq_api.h new file mode 100644 index 000000000000..cf2af72693e1 --- /dev/null +++ b/include/linux/sched/rseq_api.h @@ -0,0 +1 @@ +#include <linux/rseq.h> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c19dd5a2c05c..c1076b5e17fb 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -23,6 +23,16 @@ enum sched_tunable_scaling { SCHED_TUNABLESCALING_END, }; +#define NUMA_BALANCING_DISABLED 0x0 +#define NUMA_BALANCING_NORMAL 0x1 +#define NUMA_BALANCING_MEMORY_TIERING 0x2 + +#ifdef CONFIG_NUMA_BALANCING +extern int sysctl_numa_balancing_mode; +#else +#define sysctl_numa_balancing_mode 0 +#endif + /* * control realtime throttling: * @@ -45,10 +55,6 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default; extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif -#ifdef CONFIG_SCHED_AUTOGROUP -extern unsigned int sysctl_sched_autogroup_enabled; -#endif - extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index b9198a1b3a84..e84e54d1b490 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_post_fork(struct task_struct *p, - struct kernel_clone_args *kargs); +extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); diff --git a/include/linux/sched/task_flags.h b/include/linux/sched/task_flags.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/task_flags.h @@ -0,0 +1 @@ +#include <linux/sched.h> diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index d10150587d81..892562ebbd3a 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk) static inline void put_task_stack(struct task_struct *tsk) {} #endif +void exit_task_stack_account(struct task_struct *tsk); + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) diff --git a/include/linux/sched/thread_info_api.h b/include/linux/sched/thread_info_api.h new file mode 100644 index 000000000000..2c60fbc16c08 --- /dev/null +++ b/include/linux/sched/thread_info_api.h @@ -0,0 +1 @@ +#include <linux/thread_info.h> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 8054641c0a7b..56cffe42abbc 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -93,6 +93,7 @@ struct sched_domain { unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int imbalance_pct; /* No balance until over watermark */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ + unsigned int imb_numa_nr; /* Nr running tasks that allows a NUMA imbalance */ int nohz_idle; /* NOHZ IDLE status */ int flags; /* See SD_* */ diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 835ee87ed792..cb41c5edb4d4 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -5,6 +5,8 @@ #ifndef LINUX_SCHED_CLOCK #define LINUX_SCHED_CLOCK +#include <linux/types.h> + #ifdef CONFIG_GENERIC_SCHED_CLOCK /** * struct clock_read_data - data required to read from sched_clock() diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 80e781c51ddc..b87551f41f9f 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -42,6 +42,7 @@ struct scmi_revision_info { struct scmi_clock_info { char name[SCMI_MAX_STR_SIZE]; + unsigned int enable_latency; bool rate_discrete; union { struct { @@ -82,6 +83,9 @@ struct scmi_clk_proto_ops { u64 rate); int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id); int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); + int (*enable_atomic)(const struct scmi_protocol_handle *ph, u32 clk_id); + int (*disable_atomic)(const struct scmi_protocol_handle *ph, + u32 clk_id); }; /** @@ -612,6 +616,15 @@ struct scmi_notify_ops { * @devm_protocol_get: devres managed method to acquire a protocol and get specific * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol + * @is_transport_atomic: method to check if the underlying transport for this + * instance handle is configured to support atomic + * transactions for commands. + * Some users of the SCMI stack in the upper layers could + * be interested to know if they can assume SCMI + * command transactions associated to this handle will + * never sleep and act accordingly. + * An optional atomic threshold value could be returned + * where configured. * @notify_ops: pointer to set of notifications related operations */ struct scmi_handle { @@ -622,6 +635,8 @@ struct scmi_handle { (*devm_protocol_get)(struct scmi_device *sdev, u8 proto, struct scmi_protocol_handle **ph); void (*devm_protocol_put)(struct scmi_device *sdev, u8 proto); + bool (*is_transport_atomic)(const struct scmi_handle *handle, + unsigned int *atomic_threshold); const struct scmi_notify_ops *notify_ops; }; diff --git a/include/linux/security.h b/include/linux/security.h index 6d72772182c8..25b3ef71f495 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1422,6 +1422,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); +int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1641,6 +1643,12 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *newsk) { } + +static inline int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/seqlock_api.h b/include/linux/seqlock_api.h new file mode 100644 index 000000000000..be91e7d3b826 --- /dev/null +++ b/include/linux/seqlock_api.h @@ -0,0 +1 @@ +#include <linux/seqlock.h> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index e65b80ed09e7..ab51d3cd39bd 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -24,6 +24,7 @@ struct shmem_inode_info { struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ atomic_t stop_eviction; /* hold when working on inode */ + struct timespec64 i_crtime; /* file creation time */ struct inode vfs_inode; }; diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 1ac79bcee2bb..84aa448d8bb3 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -47,6 +47,8 @@ #define SZ_8G _AC(0x200000000, ULL) #define SZ_16G _AC(0x400000000, ULL) #define SZ_32G _AC(0x800000000, ULL) + +#define SZ_1T _AC(0x10000000000, ULL) #define SZ_64T _AC(0x400000000000, ULL) #endif /* __LINUX_SIZES_H__ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bf11e1fbd69b..3a30cae8b0a5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,12 +314,136 @@ struct sk_buff; * used to translate the reason to string. */ enum skb_drop_reason { - SKB_DROP_REASON_NOT_SPECIFIED, - SKB_DROP_REASON_NO_SOCKET, - SKB_DROP_REASON_PKT_TOO_SMALL, - SKB_DROP_REASON_TCP_CSUM, - SKB_DROP_REASON_TCP_FILTER, - SKB_DROP_REASON_UDP_CSUM, + SKB_NOT_DROPPED_YET = 0, + SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ + SKB_DROP_REASON_NO_SOCKET, /* socket not found */ + SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ + SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ + SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ + SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ + SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ + SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current + * host (interface is in promisc + * mode) + */ + SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ + SKB_DROP_REASON_IP_INHDR, /* there is something wrong with + * IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ + SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. + * see the document for rp_filter + * in ip-sysctl.rst for more + * information + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 + * is multicast, but L3 is + * unicast. + */ + SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ + SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ + SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ + SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as + * udp packet drop out of + * udp_memory_allocated. + */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one + * expected, corresponding + * to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not + * expecting one, corresponding + * to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, + * corresponding to + * LINUX_MIB_TCPMD5FAILURE + */ + SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket + * backlog (see + * LINUX_MIB_TCPBACKLOGDROP) + */ + SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ + SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already + * received before (spurious retrans + * may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, + * the seq of the first byte exceed + * the right edges of receive + * window + */ + SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in + * the ofo queue, corresponding to + * LINUX_MIB_TCPOFOMERGE + */ + SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by + * BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh + * entry + */ + SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh + * entry is full + */ + SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ + SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ + SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet + * outputting (failed to enqueue to + * current qdisc) + */ + SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to + * the per CPU backlog queue. This + * can be caused by backlog queue + * full (see netdev_max_backlog in + * net.rst) or RPS flow limit + */ + SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ + SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ + SKB_DROP_REASON_PTYPE_ABSENT, /* not packet_type found to handle + * the skb. For an etner packet, + * this means that L3 protocol is + * not supported + */ + SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation + * error + */ + SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from + * user space, e.g., via + * zerocopy_sg_from_iter() + * or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_DEV_HDR, /* device driver specific + * header/metadata is invalid + */ + /* the device is not ready to xmit/recv due to any of its data + * structure that is not up/ready/initialized, e.g., the IFF_UP is + * not set, or driver specific tun->tfiles[txq] is not initialized + */ + SKB_DROP_REASON_DEV_READY, + SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ + SKB_DROP_REASON_NOMEM, /* error due to OOM */ + SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header + * from networking data, e.g., failed + * to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly + * attached to tun/tap, e.g., via + * TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented + * at tun/tap, e.g., check_filter() + */ SKB_DROP_REASON_MAX, }; @@ -557,6 +681,7 @@ struct skb_shared_info { * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; + unsigned int xdp_frags_size; /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ @@ -720,6 +845,10 @@ typedef unsigned char *sk_buff_data_t; * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required + * @mono_delivery_time: When set, skb->tstamp has the + * delivery_time in mono clock base (i.e. EDT). Otherwise, the + * skb->tstamp has the (rcv) timestamp at ingress and + * delivery_time at egress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @secmark: security marking @@ -862,8 +991,12 @@ struct sk_buff { __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ __u8 csum_complete_sw:1; __u8 csum_level:2; - __u8 csum_not_inet:1; __u8 dst_pending_confirm:1; + __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_skip_classify:1; + __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ +#endif #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif @@ -875,10 +1008,6 @@ struct sk_buff { __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; #endif -#ifdef CONFIG_NET_CLS_ACT - __u8 tc_skip_classify:1; - __u8 tc_at_ingress:1; -#endif __u8 redirected:1; #ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; @@ -890,6 +1019,7 @@ struct sk_buff { __u8 decrypted:1; #endif __u8 slow_gro:1; + __u8 csum_not_inet:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -964,11 +1094,17 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move pkt_vlan_present around you also must adapt these constants */ +/* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time + * around, you also must adapt these constants. + */ #ifdef __BIG_ENDIAN_BITFIELD #define PKT_VLAN_PRESENT_BIT 7 +#define TC_AT_INGRESS_MASK (1 << 0) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) #else #define PKT_VLAN_PRESENT_BIT 0 +#define TC_AT_INGRESS_MASK (1 << 7) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif #define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) @@ -1115,10 +1251,16 @@ static inline void kfree_skb(struct sk_buff *skb) } void skb_release_head_state(struct sk_buff *skb); -void kfree_skb_list(struct sk_buff *segs); +void kfree_skb_list_reason(struct sk_buff *segs, + enum skb_drop_reason reason); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); +static inline void kfree_skb_list(struct sk_buff *segs) +{ + kfree_skb_list_reason(segs, SKB_DROP_REASON_NOT_SPECIFIED); +} + #ifdef CONFIG_TRACEPOINTS void consume_skb(struct sk_buff *skb); #else @@ -1475,6 +1617,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = offset; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1485,6 +1632,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = skb->head + offset; +} #endif /* Internal */ @@ -1724,19 +1876,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +/* This variant of skb_unclone() makes sure skb->truesize + * and skb_end_offset() are not changed, whenever a new skb->head is needed. + * + * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) + * when various debugging features are in place. + */ +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); - if (skb_cloned(skb)) { - unsigned int save = skb->truesize; - int res; - - res = pskb_expand_head(skb, 0, 0, pri); - skb->truesize = save; - return res; - } + if (skb_cloned(skb)) + return __skb_unclone_keeptruesize(skb, pri); return 0; } @@ -3891,6 +4043,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); + skb->mono_delivery_time = 0; } static inline ktime_t net_timedelta(ktime_t t) @@ -3898,8 +4051,53 @@ static inline ktime_t net_timedelta(ktime_t t) return ktime_sub(ktime_get_real(), t); } -static inline ktime_t net_invalid_timestamp(void) +static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, + bool mono) +{ + skb->tstamp = kt; + skb->mono_delivery_time = kt && mono; +} + +DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); + +/* It is used in the ingress path to clear the delivery_time. + * If needed, set the skb->tstamp to the (rcv) timestamp. + */ +static inline void skb_clear_delivery_time(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) { + skb->mono_delivery_time = 0; + if (static_branch_unlikely(&netstamp_needed_key)) + skb->tstamp = ktime_get_real(); + else + skb->tstamp = 0; + } +} + +static inline void skb_clear_tstamp(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) + return; + + skb->tstamp = 0; +} + +static inline ktime_t skb_tstamp(const struct sk_buff *skb) +{ + if (skb->mono_delivery_time) + return 0; + + return skb->tstamp; +} + +static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { + if (!skb->mono_delivery_time && skb->tstamp) + return skb->tstamp; + + if (static_branch_unlikely(&netstamp_needed_key) || cond) + return ktime_get_real(); + return 0; } @@ -4759,7 +4957,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; if (skb->from_ingress) - skb->tstamp = 0; + skb_clear_tstamp(skb); #endif } diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 18a717fe62eb..c5a2d6f50f25 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -29,7 +29,7 @@ struct sk_msg_sg { u32 end; u32 size; u32 copybreak; - unsigned long copy; + DECLARE_BITMAP(copy, MAX_MSG_FRAGS + 2); /* The extra two elements: * 1) used for chaining the front and sections when the list becomes * partitioned (e.g. end < start). The crypto APIs require the @@ -38,7 +38,6 @@ struct sk_msg_sg { */ struct scatterlist data[MAX_MSG_FRAGS + 2]; }; -static_assert(BITS_PER_LONG >= NR_MSG_FRAG_IDS); /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ struct sk_msg { @@ -171,11 +170,6 @@ static inline u32 sk_msg_iter_dist(u32 start, u32 end) #define sk_msg_iter_next(msg, which) \ sk_msg_iter_var_next(msg->sg.which) -static inline void sk_msg_clear_meta(struct sk_msg *msg) -{ - memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy)); -} - static inline void sk_msg_init(struct sk_msg *msg) { BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS); @@ -234,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) { struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); - if (test_bit(msg->sg.start, &msg->sg.copy)) { + if (test_bit(msg->sg.start, msg->sg.copy)) { msg->data = NULL; msg->data_end = NULL; } else { @@ -253,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, sg_set_page(sge, page, len, offset); sg_unmark_end(sge); - __set_bit(msg->sg.end, &msg->sg.copy); + __set_bit(msg->sg.end, msg->sg.copy); msg->sg.size += len; sk_msg_iter_next(msg, end); } @@ -262,9 +256,9 @@ static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) { do { if (copy_state) - __set_bit(i, &msg->sg.copy); + __set_bit(i, msg->sg.copy); else - __clear_bit(i, &msg->sg.copy); + __clear_bit(i, msg->sg.copy); sk_msg_iter_var_next(i); if (i == msg->sg.end) break; @@ -310,21 +304,16 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } -static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) -{ - if (msg->skb) - sock_drop(psock->sk, msg->skb); - kfree(msg); -} - static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) list_add_tail(&msg->list, &psock->ingress_msg); - else - drop_sk_msg(psock, msg); + else { + sk_msg_free(psock->sk, msg); + kfree(msg); + } spin_unlock_bh(&psock->ingress_lock); } diff --git a/include/linux/slab.h b/include/linux/slab.h index 37bde99b74af..373b3ef99f4e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -117,9 +117,6 @@ #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ -/* Slab deactivation flag */ -#define SLAB_DEACTIVATED ((slab_flags_t __force)0x10000000U) - /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * @@ -135,6 +132,7 @@ #include <linux/kasan.h> +struct list_lru; struct mem_cgroup; /* * struct kmem_cache related prototypes @@ -416,6 +414,8 @@ static __always_inline unsigned int __kmalloc_index(size_t size, void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; +void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, + gfp_t gfpflags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *s, void *objp); /* @@ -660,8 +660,7 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) - __alloc_size(1); +extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, _RET_IP_) diff --git a/include/linux/soc/ixp4xx/cpu.h b/include/linux/soc/ixp4xx/cpu.h index 88bd8de0e803..f526ac33afea 100644 --- a/include/linux/soc/ixp4xx/cpu.h +++ b/include/linux/soc/ixp4xx/cpu.h @@ -9,6 +9,7 @@ #define __SOC_IXP4XX_CPU_H__ #include <linux/io.h> +#include <linux/regmap.h> #ifdef CONFIG_ARM #include <asm/cputype.h> #endif @@ -23,6 +24,9 @@ #define IXP46X_PROCESSOR_ID_VALUE 0x69054200 /* including IXP455 */ #define IXP46X_PROCESSOR_ID_MASK 0xfffffff0 +/* Feature register in the expansion bus controller */ +#define IXP4XX_EXP_CNFG2 0x2c + /* "fuse" bits of IXP_EXP_CFG2 */ /* All IXP4xx CPUs */ #define IXP4XX_FEATURE_RCOMP (1 << 0) @@ -86,21 +90,31 @@ IXP43X_PROCESSOR_ID_VALUE) #define cpu_is_ixp46x() ((read_cpuid_id() & IXP46X_PROCESSOR_ID_MASK) == \ IXP46X_PROCESSOR_ID_VALUE) +static inline u32 cpu_ixp4xx_features(struct regmap *rmap) +{ + u32 val; -u32 ixp4xx_read_feature_bits(void); -void ixp4xx_write_feature_bits(u32 value); + regmap_read(rmap, IXP4XX_EXP_CNFG2, &val); + /* For some reason this register is inverted */ + val = ~val; + if (cpu_is_ixp42x_rev_a0()) + return IXP42X_FEATURE_MASK & ~(IXP4XX_FEATURE_RCOMP | + IXP4XX_FEATURE_AES); + if (cpu_is_ixp42x()) + return val & IXP42X_FEATURE_MASK; + if (cpu_is_ixp43x()) + return val & IXP43X_FEATURE_MASK; + return val & IXP46X_FEATURE_MASK; +} #else #define cpu_is_ixp42x_rev_a0() 0 #define cpu_is_ixp42x() 0 #define cpu_is_ixp43x() 0 #define cpu_is_ixp46x() 0 -static inline u32 ixp4xx_read_feature_bits(void) +static inline u32 cpu_ixp4xx_features(struct regmap *rmap) { return 0; } -static inline void ixp4xx_write_feature_bits(u32 value) -{ -} #endif #endif /* _ASM_ARCH_CPU_H */ diff --git a/include/linux/soc/ixp4xx/npe.h b/include/linux/soc/ixp4xx/npe.h index 2a91f465d456..9efeac777da1 100644 --- a/include/linux/soc/ixp4xx/npe.h +++ b/include/linux/soc/ixp4xx/npe.h @@ -3,6 +3,7 @@ #define __IXP4XX_NPE_H #include <linux/kernel.h> +#include <linux/regmap.h> extern const char *npe_names[]; @@ -17,6 +18,7 @@ struct npe_regs { struct npe { struct npe_regs __iomem *regs; + struct regmap *rmap; int id; int valid; }; diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 4615a228da51..50804ac748bd 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,88 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8195_TOP_AXI_PROT_EN_STA1 0x228 +#define MT8195_TOP_AXI_PROT_EN_1_STA1 0x258 +#define MT8195_TOP_AXI_PROT_EN_SET 0x2a0 +#define MT8195_TOP_AXI_PROT_EN_CLR 0x2a4 +#define MT8195_TOP_AXI_PROT_EN_1_SET 0x2a8 +#define MT8195_TOP_AXI_PROT_EN_1_CLR 0x2ac +#define MT8195_TOP_AXI_PROT_EN_MM_SET 0x2d4 +#define MT8195_TOP_AXI_PROT_EN_MM_CLR 0x2d8 +#define MT8195_TOP_AXI_PROT_EN_MM_STA1 0x2ec +#define MT8195_TOP_AXI_PROT_EN_2_SET 0x714 +#define MT8195_TOP_AXI_PROT_EN_2_CLR 0x718 +#define MT8195_TOP_AXI_PROT_EN_2_STA1 0x724 +#define MT8195_TOP_AXI_PROT_EN_VDNR_SET 0xb84 +#define MT8195_TOP_AXI_PROT_EN_VDNR_CLR 0xb88 +#define MT8195_TOP_AXI_PROT_EN_VDNR_STA1 0xb90 +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_SET 0xba4 +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_CLR 0xba8 +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_STA1 0xbb0 +#define MT8195_TOP_AXI_PROT_EN_VDNR_2_SET 0xbb8 +#define MT8195_TOP_AXI_PROT_EN_VDNR_2_CLR 0xbbc +#define MT8195_TOP_AXI_PROT_EN_VDNR_2_STA1 0xbc4 +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_SET 0xbcc +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_CLR 0xbd0 +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_STA1 0xbd8 +#define MT8195_TOP_AXI_PROT_EN_MM_2_SET 0xdcc +#define MT8195_TOP_AXI_PROT_EN_MM_2_CLR 0xdd0 +#define MT8195_TOP_AXI_PROT_EN_MM_2_STA1 0xdd8 + +#define MT8195_TOP_AXI_PROT_EN_VDOSYS0 BIT(6) +#define MT8195_TOP_AXI_PROT_EN_VPPSYS0 BIT(10) +#define MT8195_TOP_AXI_PROT_EN_MFG1 BIT(11) +#define MT8195_TOP_AXI_PROT_EN_MFG1_2ND GENMASK(22, 21) +#define MT8195_TOP_AXI_PROT_EN_VPPSYS0_2ND BIT(23) +#define MT8195_TOP_AXI_PROT_EN_1_MFG1 GENMASK(20, 19) +#define MT8195_TOP_AXI_PROT_EN_1_CAM BIT(22) +#define MT8195_TOP_AXI_PROT_EN_2_CAM BIT(0) +#define MT8195_TOP_AXI_PROT_EN_2_MFG1_2ND GENMASK(6, 5) +#define MT8195_TOP_AXI_PROT_EN_2_MFG1 BIT(7) +#define MT8195_TOP_AXI_PROT_EN_2_AUDIO (BIT(9) | BIT(11)) +#define MT8195_TOP_AXI_PROT_EN_2_ADSP (BIT(12) | GENMASK(16, 14)) +#define MT8195_TOP_AXI_PROT_EN_MM_CAM (BIT(0) | BIT(2) | BIT(4)) +#define MT8195_TOP_AXI_PROT_EN_MM_IPE BIT(1) +#define MT8195_TOP_AXI_PROT_EN_MM_IMG BIT(3) +#define MT8195_TOP_AXI_PROT_EN_MM_VDOSYS0 GENMASK(21, 17) +#define MT8195_TOP_AXI_PROT_EN_MM_VPPSYS1 GENMASK(8, 5) +#define MT8195_TOP_AXI_PROT_EN_MM_VENC (BIT(9) | BIT(11)) +#define MT8195_TOP_AXI_PROT_EN_MM_VENC_CORE1 (BIT(10) | BIT(12)) +#define MT8195_TOP_AXI_PROT_EN_MM_VDEC0 BIT(13) +#define MT8195_TOP_AXI_PROT_EN_MM_VDEC1 BIT(14) +#define MT8195_TOP_AXI_PROT_EN_MM_VDOSYS1_2ND BIT(22) +#define MT8195_TOP_AXI_PROT_EN_MM_VPPSYS1_2ND BIT(23) +#define MT8195_TOP_AXI_PROT_EN_MM_CAM_2ND BIT(24) +#define MT8195_TOP_AXI_PROT_EN_MM_IMG_2ND BIT(25) +#define MT8195_TOP_AXI_PROT_EN_MM_VENC_2ND BIT(26) +#define MT8195_TOP_AXI_PROT_EN_MM_WPESYS BIT(27) +#define MT8195_TOP_AXI_PROT_EN_MM_VDEC0_2ND BIT(28) +#define MT8195_TOP_AXI_PROT_EN_MM_VDEC1_2ND BIT(29) +#define MT8195_TOP_AXI_PROT_EN_MM_VDOSYS1 GENMASK(31, 30) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VPPSYS0_2ND (GENMASK(1, 0) | BIT(4) | BIT(11)) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VENC BIT(2) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VENC_CORE1 (BIT(3) | BIT(15)) +#define MT8195_TOP_AXI_PROT_EN_MM_2_CAM (BIT(5) | BIT(17)) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VPPSYS1 (GENMASK(7, 6) | BIT(18)) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VPPSYS0 GENMASK(9, 8) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDOSYS1 BIT(10) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDEC2_2ND BIT(12) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDEC0_2ND BIT(13) +#define MT8195_TOP_AXI_PROT_EN_MM_2_WPESYS_2ND BIT(14) +#define MT8195_TOP_AXI_PROT_EN_MM_2_IPE BIT(16) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDEC2 BIT(21) +#define MT8195_TOP_AXI_PROT_EN_MM_2_VDEC0 BIT(22) +#define MT8195_TOP_AXI_PROT_EN_MM_2_WPESYS GENMASK(24, 23) +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_EPD_TX BIT(1) +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_DP_TX BIT(2) +#define MT8195_TOP_AXI_PROT_EN_VDNR_PCIE_MAC_P0 (BIT(11) | BIT(28)) +#define MT8195_TOP_AXI_PROT_EN_VDNR_PCIE_MAC_P1 (BIT(12) | BIT(29)) +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_PCIE_MAC_P0 BIT(13) +#define MT8195_TOP_AXI_PROT_EN_VDNR_1_PCIE_MAC_P1 BIT(14) +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_MFG1 (BIT(17) | BIT(19)) +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_VPPSYS0 BIT(20) +#define MT8195_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_VDOSYS0 BIT(21) + #define MT8192_TOP_AXI_PROT_EN_STA1 0x228 #define MT8192_TOP_AXI_PROT_EN_1_STA1 0x258 #define MT8192_TOP_AXI_PROT_EN_SET 0x2a0 @@ -58,6 +140,54 @@ #define MT8192_TOP_AXI_PROT_EN_MM_2_MDP_2ND BIT(13) #define MT8192_TOP_AXI_PROT_EN_VDNR_CAM BIT(21) +#define MT8186_TOP_AXI_PROT_EN_SET (0x2A0) +#define MT8186_TOP_AXI_PROT_EN_CLR (0x2A4) +#define MT8186_TOP_AXI_PROT_EN_STA (0x228) +#define MT8186_TOP_AXI_PROT_EN_1_SET (0x2A8) +#define MT8186_TOP_AXI_PROT_EN_1_CLR (0x2AC) +#define MT8186_TOP_AXI_PROT_EN_1_STA (0x258) +#define MT8186_TOP_AXI_PROT_EN_2_SET (0x2B0) +#define MT8186_TOP_AXI_PROT_EN_2_CLR (0x2B4) +#define MT8186_TOP_AXI_PROT_EN_2_STA (0x26C) +#define MT8186_TOP_AXI_PROT_EN_3_SET (0x2B8) +#define MT8186_TOP_AXI_PROT_EN_3_CLR (0x2BC) +#define MT8186_TOP_AXI_PROT_EN_3_STA (0x2C8) + +/* MFG1 */ +#define MT8186_TOP_AXI_PROT_EN_1_MFG1_STEP1 (GENMASK(28, 27)) +#define MT8186_TOP_AXI_PROT_EN_MFG1_STEP2 (GENMASK(22, 21)) +#define MT8186_TOP_AXI_PROT_EN_MFG1_STEP3 (BIT(25)) +#define MT8186_TOP_AXI_PROT_EN_1_MFG1_STEP4 (BIT(29)) +/* DIS */ +#define MT8186_TOP_AXI_PROT_EN_1_DIS_STEP1 (GENMASK(12, 11)) +#define MT8186_TOP_AXI_PROT_EN_DIS_STEP2 (GENMASK(2, 1) | GENMASK(11, 10)) +/* IMG */ +#define MT8186_TOP_AXI_PROT_EN_1_IMG_STEP1 (BIT(23)) +#define MT8186_TOP_AXI_PROT_EN_1_IMG_STEP2 (BIT(15)) +/* IPE */ +#define MT8186_TOP_AXI_PROT_EN_1_IPE_STEP1 (BIT(24)) +#define MT8186_TOP_AXI_PROT_EN_1_IPE_STEP2 (BIT(16)) +/* CAM */ +#define MT8186_TOP_AXI_PROT_EN_1_CAM_STEP1 (GENMASK(22, 21)) +#define MT8186_TOP_AXI_PROT_EN_1_CAM_STEP2 (GENMASK(14, 13)) +/* VENC */ +#define MT8186_TOP_AXI_PROT_EN_1_VENC_STEP1 (BIT(31)) +#define MT8186_TOP_AXI_PROT_EN_1_VENC_STEP2 (BIT(19)) +/* VDEC */ +#define MT8186_TOP_AXI_PROT_EN_1_VDEC_STEP1 (BIT(30)) +#define MT8186_TOP_AXI_PROT_EN_1_VDEC_STEP2 (BIT(17)) +/* WPE */ +#define MT8186_TOP_AXI_PROT_EN_2_WPE_STEP1 (BIT(17)) +#define MT8186_TOP_AXI_PROT_EN_2_WPE_STEP2 (BIT(16)) +/* CONN_ON */ +#define MT8186_TOP_AXI_PROT_EN_1_CONN_ON_STEP1 (BIT(18)) +#define MT8186_TOP_AXI_PROT_EN_CONN_ON_STEP2 (BIT(14)) +#define MT8186_TOP_AXI_PROT_EN_CONN_ON_STEP3 (BIT(13)) +#define MT8186_TOP_AXI_PROT_EN_CONN_ON_STEP4 (BIT(16)) +/* ADSP_TOP */ +#define MT8186_TOP_AXI_PROT_EN_3_ADSP_TOP_STEP1 (GENMASK(12, 11)) +#define MT8186_TOP_AXI_PROT_EN_3_ADSP_TOP_STEP2 (GENMASK(1, 0)) + #define MT8183_TOP_AXI_PROT_EN_STA1 0x228 #define MT8183_TOP_AXI_PROT_EN_STA1_1 0x258 #define MT8183_TOP_AXI_PROT_EN_SET 0x2a0 @@ -147,6 +277,9 @@ #define INFRA_TOPAXI_PROTECTEN_SET 0x0260 #define INFRA_TOPAXI_PROTECTEN_CLR 0x0264 +#define MT8192_INFRA_CTRL 0x290 +#define MT8192_INFRA_CTRL_DISABLE_MFG2ACP BIT(9) + #define REG_INFRA_MISC 0xf00 #define F_DDR_4GB_SUPPORT_EN BIT(13) diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 9e8fd92c96b7..0bc21ee58fac 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -35,7 +35,12 @@ #define LLCC_WRCACHE 31 #define LLCC_CVPFW 32 #define LLCC_CPUSS1 33 +#define LLCC_CAMEXP0 34 +#define LLCC_CPUMTE 35 #define LLCC_CPUHWT 36 +#define LLCC_MDMCLAD2 37 +#define LLCC_CAMEXP1 38 +#define LLCC_AENPU 45 /** * struct llcc_slice_desc - Cache slice descriptor @@ -83,7 +88,7 @@ struct llcc_edac_reg_data { * @bitmap: Bit map to track the active slice ids * @offsets: Pointer to the bank offsets array * @ecc_irq: interrupt for llcc cache error detection and reporting - * @major_version: Indicates the LLCC major version + * @version: Indicates the LLCC version */ struct llcc_drv_data { struct regmap *regmap; @@ -96,7 +101,7 @@ struct llcc_drv_data { unsigned long *bitmap; u32 *offsets; int ecc_irq; - u32 major_version; + u32 version; }; #if IS_ENABLED(CONFIG_QCOM_LLCC) diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index afd47217996b..9e8e60421192 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -10,10 +10,14 @@ struct device; struct firmware; +struct qcom_scm_pas_metadata; #if IS_ENABLED(CONFIG_QCOM_MDT_LOADER) ssize_t qcom_mdt_get_size(const struct firmware *fw); +int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, + const char *fw_name, int pas_id, phys_addr_t mem_phys, + struct qcom_scm_pas_metadata *pas_metadata_ctx); int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, phys_addr_t mem_phys, size_t mem_size, @@ -23,7 +27,8 @@ int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base); -void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len); +void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len, + const char *fw_name, struct device *dev); #else /* !IS_ENABLED(CONFIG_QCOM_MDT_LOADER) */ @@ -32,6 +37,13 @@ static inline ssize_t qcom_mdt_get_size(const struct firmware *fw) return -ENODEV; } +static inline int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, + const char *fw_name, int pas_id, phys_addr_t mem_phys, + struct qcom_scm_pas_metadata *pas_metadata_ctx) +{ + return -ENODEV; +} + static inline int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, phys_addr_t mem_phys, @@ -51,7 +63,8 @@ static inline int qcom_mdt_load_no_init(struct device *dev, } static inline void *qcom_mdt_read_metadata(const struct firmware *fw, - size_t *data_len) + size_t *data_len, const char *fw_name, + struct device *dev) { return ERR_PTR(-ENODEV); } diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 0aad7009b50e..bd0d11af76c5 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -645,7 +645,7 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, static inline struct ti_sci_resource * devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, - u32 dev_id, u32 sub_type); + u32 dev_id, u32 sub_type) { return ERR_PTR(-EINVAL); } diff --git a/include/linux/socket.h b/include/linux/socket.h index 8ef26d89ef49..6f85f5d957ef 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -366,6 +366,7 @@ struct ucred { #define SOL_XDP 283 #define SOL_MPTCP 284 #define SOL_MCTP 285 +#define SOL_SMC 286 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/linux/softirq.h b/include/linux/softirq.h new file mode 100644 index 000000000000..c73d7dcb4cb5 --- /dev/null +++ b/include/linux/softirq.h @@ -0,0 +1 @@ +#include <linux/interrupt.h> diff --git a/include/linux/sort.h b/include/linux/sort.h index b5898725fe9d..e163287ac6c1 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -6,7 +6,7 @@ void sort_r(void *base, size_t num, size_t size, cmp_r_func_t cmp_func, - swap_func_t swap_func, + swap_r_func_t swap_func, const void *priv); void sort(void *base, size_t num, size_t size, diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index ca74dce36706..4658e7801b42 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -42,7 +42,6 @@ struct pxa2xx_spi_chip { u8 rx_threshold; u8 dma_burst_size; u32 timeout; - int gpio_cs; }; #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) diff --git a/include/linux/spi/s3c24xx.h b/include/linux/spi/s3c24xx.h index 440a71593162..9b8bb22d5b0c 100644 --- a/include/linux/spi/s3c24xx.h +++ b/include/linux/spi/s3c24xx.h @@ -10,14 +10,9 @@ #define __LINUX_SPI_S3C24XX_H __FILE__ struct s3c2410_spi_info { - int pin_cs; /* simple gpio cs */ unsigned int num_cs; /* total chipselects */ int bus_num; /* bus number to use. */ - unsigned int use_fiq:1; /* use fiq */ - - void (*gpio_setup)(struct s3c2410_spi_info *spi, int enable); - void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol); }; extern int s3c24xx_set_fiq(unsigned int irq, u32 *ack_ptr, bool on); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7ab3fed7b804..7d005fa4631c 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -16,6 +16,7 @@ #include <linux/gpio/consumer.h> #include <uapi/linux/spi/spi.h> +#include <linux/acpi.h> struct dma_chan; struct software_node; @@ -136,9 +137,6 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); * for driver coldplugging, and in uevents used for hotplugging * @driver_override: If the name of a driver is written to this attribute, then * the device will bind to the named driver and only the named driver. - * @cs_gpio: LEGACY: gpio number of the chipselect line (optional, -ENOENT when - * not using a GPIO line) use cs_gpiod in new drivers by opting in on - * the spi_master. * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when * not using a GPIO line) * @word_delay: delay to be inserted between consecutive @@ -185,7 +183,6 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - int cs_gpio; /* LEGACY: chip select gpio */ struct gpio_desc *cs_gpiod; /* chip select gpio desc */ struct spi_delay word_delay; /* inter-word delay */ /* CS delays */ @@ -280,7 +277,7 @@ struct spi_message; struct spi_driver { const struct spi_device_id *id_table; int (*probe)(struct spi_device *spi); - int (*remove)(struct spi_device *spi); + void (*remove)(struct spi_device *spi); void (*shutdown)(struct spi_device *spi); struct device_driver driver; }; @@ -373,7 +370,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @cur_msg_prepared: spi_prepare_message was called for the currently * in-flight message * @cur_msg_mapped: message has been mapped for DMA - * @last_cs_enable: was enable true on the last call to set_cs. + * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip + * selected * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy @@ -417,17 +415,12 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * controller has native support for memory like operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller - * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per - * CS number. Any individual value may be -ENOENT for CS lines that - * are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods - * in new drivers. * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab - * GPIO descriptors rather than using global GPIO numbers grabbed by the - * driver. This will fill in @cs_gpiods and @cs_gpios should not be used, - * and SPI devices will have the cs_gpiod assigned rather than cs_gpio. + * GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have + * the cs_gpiod assigned if a GPIO line is found for the chipselect. * @unused_native_cs: When cs_gpiods is used, spi_register_controller() will * fill in this field with the first unused native CS, to be used by SPI * controller drivers that need to drive a native CS when using GPIO CS. @@ -611,7 +604,7 @@ struct spi_controller { bool auto_runtime_pm; bool cur_msg_prepared; bool cur_msg_mapped; - bool last_cs_enable; + char last_cs; bool last_cs_mode_high; bool fallback; struct completion xfer_completion; @@ -641,7 +634,6 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; /* gpio chip select */ - int *cs_gpios; struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; s8 unused_native_cs; @@ -759,6 +751,13 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); +#if IS_ENABLED(CONFIG_ACPI) +extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev, + int index); +int acpi_spi_count_resources(struct acpi_device *adev); +#endif + /* * SPI resource management while processing a SPI message */ @@ -1452,8 +1451,20 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). + * + * You can also use spi_alloc_device() and spi_add_device() to use a two + * stage registration sequence for each spi_device. This gives the caller + * some more control over the spi_device structure before it is registered, + * but requires that caller to initialize fields that would otherwise + * be defined using the board info. */ extern struct spi_device * +spi_alloc_device(struct spi_controller *ctlr); + +extern int +spi_add_device(struct spi_device *spi); + +extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); extern void spi_unregister_device(struct spi_device *spi); diff --git a/include/linux/spinlock_api.h b/include/linux/spinlock_api.h new file mode 100644 index 000000000000..6338b27f98df --- /dev/null +++ b/include/linux/spinlock_api.h @@ -0,0 +1 @@ +#include <linux/spinlock.h> diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h index 15ba0df1ee0d..28c145a51e57 100644 --- a/include/linux/ssb/ssb_driver_gige.h +++ b/include/linux/ssb/ssb_driver_gige.h @@ -95,7 +95,7 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) struct ssb_gige *dev = pdev_to_ssb_gige(pdev); if (dev) return (dev->dev->bus->chip_id == 0x4785); - return 0; + return false; } /* Get the device MAC address */ diff --git a/include/linux/stddef.h b/include/linux/stddef.h index ca507bd5f808..929d67710cc5 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -13,11 +13,7 @@ enum { }; #undef offsetof -#ifdef __compiler_offsetof -#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER) -#else -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) -#endif +#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) /** * sizeof_field() - Report the size of a struct field in bytes diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 7a22921c9db7..4d72258d42fd 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -106,4 +106,24 @@ void kfree_strarray(char **array, size_t n); char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n); +static inline const char *str_yes_no(bool v) +{ + return v ? "yes" : "no"; +} + +static inline const char *str_on_off(bool v) +{ + return v ? "on" : "off"; +} + +static inline const char *str_enable_disable(bool v) +{ + return v ? "enable" : "disable"; +} + +static inline const char *str_enabled_disabled(bool v) +{ + return v ? "enabled" : "disabled"; +} + #endif diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f35c22b3355f..a5dda4987e8b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -52,24 +52,6 @@ struct svc_pool { unsigned long sp_flags; } ____cacheline_aligned_in_smp; -struct svc_serv; - -struct svc_serv_ops { - /* Callback to use when last thread exits. */ - void (*svo_shutdown)(struct svc_serv *, struct net *); - - /* function for service threads to run */ - int (*svo_function)(void *); - - /* queue up a transport for servicing */ - void (*svo_enqueue_xprt)(struct svc_xprt *); - - /* optional module to count when adding threads. - * Thread function must call module_put_and_kthread_exit() to exit. - */ - struct module *svo_module; -}; - /* * RPC service. * @@ -102,7 +84,8 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - const struct svc_serv_ops *sv_ops; /* server operations */ + int (*sv_threadfn)(void *data); + #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -503,7 +486,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - const struct svc_serv_ops *); + int (*threadfn)(void *data)); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_rqst_replace_page(struct svc_rqst *rqstp, @@ -511,10 +494,9 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - const struct svc_serv_ops *); + int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 571f605bc91e..d42a75b3be10 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -28,6 +28,7 @@ struct svc_xprt_ops { void (*xpo_free)(struct svc_xprt *); void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); + void (*xpo_start_tls)(struct svc_xprt *); }; struct svc_xprt_class { @@ -88,6 +89,7 @@ struct svc_xprt { struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; + netns_tracker ns_tracker; const struct cred *xpt_cred; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ @@ -127,15 +129,16 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, const char *, struct net *, - const int, const unsigned short, int, - const struct cred *); +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags, + const struct cred *cred); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); void svc_xprt_received(struct svc_xprt *xprt); -void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); -void svc_close_xprt(struct svc_xprt *xprt); +void svc_xprt_close(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b519609af1d0..4417f667c757 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 955ea4d7af0b..3cdc8d878d81 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -284,6 +284,7 @@ struct rpc_xprt { } stat; struct net *xprt_net; + netns_tracker ns_tracker; const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5785d909c321..300273ff40cc 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -430,15 +430,7 @@ struct platform_hibernation_ops { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ -extern void __register_nosave_region(unsigned long b, unsigned long e, int km); -static inline void __init register_nosave_region(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 0); -} -static inline void __init register_nosave_region_late(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 1); -} +extern void register_nosave_region(unsigned long b, unsigned long e); extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); @@ -458,7 +450,6 @@ int pfn_is_nosave(unsigned long pfn); int hibernate_quiet_exec(int (*func)(void *data), void *data); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} @@ -506,14 +497,14 @@ extern void ksys_sync_helper(void); /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; -extern unsigned int pm_wakeup_irq; extern suspend_state_t pm_suspend_target_state; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); extern void pm_system_cancel_wakeup(void); -extern void pm_wakeup_clear(bool reset); +extern void pm_wakeup_clear(unsigned int irq_number); extern void pm_system_irq_wakeup(unsigned int irq_number); +extern unsigned int pm_wakeup_irq(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); diff --git a/include/linux/swait_api.h b/include/linux/swait_api.h new file mode 100644 index 000000000000..1eeaaaaa5ea7 --- /dev/null +++ b/include/linux/swait_api.h @@ -0,0 +1 @@ +#include <linux/swait.h> diff --git a/include/linux/swap.h b/include/linux/swap.h index 1d38d9475c4d..27093b477c5f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -328,15 +328,18 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); -void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); +void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); +extern struct list_lru shadow_nodes; #define mapping_set_update(xas, mapping) do { \ - if (!dax_mapping(mapping) && !shmem_mapping(mapping)) \ + if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ xas_set_update(xas, workingset_update_node); \ + xas_set_lru(xas, &shadow_nodes); \ + } \ } while (0) /* linux/mm/page_alloc.c */ @@ -372,7 +375,6 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); -extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); @@ -384,7 +386,6 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); -extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, @@ -395,7 +396,7 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; -extern int remove_mapping(struct address_space *mapping, struct page *page); +long remove_mapping(struct address_space *mapping, struct folio *folio); extern unsigned long reclaim_pages(struct list_head *page_list); #ifdef CONFIG_NUMA @@ -427,7 +428,7 @@ extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern void end_swap_bio_write(struct bio *bio); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func); -extern int swap_set_page_dirty(struct page *page); +bool swap_dirty_folio(struct address_space *mapping, struct folio *folio); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); @@ -514,7 +515,6 @@ extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); -extern bool reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); @@ -680,9 +680,6 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -#define reuse_swap_page(page) \ - (page_trans_huge_mapcount(page) == 1) - static inline int try_to_free_swap(struct page *page) { return 0; @@ -741,7 +738,7 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) #endif #ifdef CONFIG_MEMCG_SWAP -extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); +void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) { @@ -761,7 +758,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct page *page); #else -static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) +static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) { } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 819c0cb00b6d..a34b0f9a9972 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -290,10 +290,6 @@ static inline void addr_limit_user_check(void) return; #endif - if (CHECK_DATA_CORRUPTION(uaccess_kernel(), - "Invalid address limit on user-mode return")) - force_sig(SIGKILL); - #ifdef TIF_FSCHECK clear_thread_flag(TIF_FSCHECK); #endif diff --git a/include/linux/syscalls_api.h b/include/linux/syscalls_api.h new file mode 100644 index 000000000000..23e012b04db4 --- /dev/null +++ b/include/linux/syscalls_api.h @@ -0,0 +1 @@ +#include <linux/syscalls.h> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 180adf7da785..6353d6db69b2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -265,7 +265,7 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * return NULL; } -static inline struct sysctl_header *register_sysctl_mount_point(const char *path) +static inline struct ctl_table_header *register_sysctl_mount_point(const char *path) { return NULL; } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 78b91bb92f0d..1168302b7927 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -394,6 +394,7 @@ struct tcp_sock { bool is_mptcp; #endif #if IS_ENABLED(CONFIG_SMC) + bool (*smc_hs_congested)(const struct sock *sk); bool syn_smc; /* SYN includes SMC */ #endif diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 5e1533ee3785..911cad324acc 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2015-2016, Linaro Limited + * Copyright (c) 2015-2022 Linaro Limited */ #ifndef __TEE_DRV_H @@ -20,14 +20,11 @@ * specific TEE driver. */ -#define TEE_SHM_MAPPED BIT(0) /* Memory mapped by the kernel */ -#define TEE_SHM_DMA_BUF BIT(1) /* Memory with dma-buf handle */ -#define TEE_SHM_EXT_DMA_BUF BIT(2) /* Memory with dma-buf handle */ -#define TEE_SHM_REGISTER BIT(3) /* Memory registered in secure world */ -#define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */ -#define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */ -#define TEE_SHM_KERNEL_MAPPED BIT(6) /* Memory mapped in kernel space */ -#define TEE_SHM_PRIV BIT(7) /* Memory private to TEE driver */ +#define TEE_SHM_DYNAMIC BIT(0) /* Dynamic shared memory registered */ + /* in secure world */ +#define TEE_SHM_USER_MAPPED BIT(1) /* Memory mapped in user space */ +#define TEE_SHM_POOL BIT(2) /* Memory allocated from pool */ +#define TEE_SHM_PRIV BIT(3) /* Memory private to TEE driver */ struct device; struct tee_device; @@ -221,92 +218,39 @@ struct tee_shm { }; /** - * struct tee_shm_pool_mgr - shared memory manager + * struct tee_shm_pool - shared memory pool * @ops: operations * @private_data: private data for the shared memory manager */ -struct tee_shm_pool_mgr { - const struct tee_shm_pool_mgr_ops *ops; +struct tee_shm_pool { + const struct tee_shm_pool_ops *ops; void *private_data; }; /** - * struct tee_shm_pool_mgr_ops - shared memory pool manager operations + * struct tee_shm_pool_ops - shared memory pool operations * @alloc: called when allocating shared memory * @free: called when freeing shared memory - * @destroy_poolmgr: called when destroying the pool manager + * @destroy_pool: called when destroying the pool */ -struct tee_shm_pool_mgr_ops { - int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm, - size_t size); - void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm); - void (*destroy_poolmgr)(struct tee_shm_pool_mgr *poolmgr); +struct tee_shm_pool_ops { + int (*alloc)(struct tee_shm_pool *pool, struct tee_shm *shm, + size_t size, size_t align); + void (*free)(struct tee_shm_pool *pool, struct tee_shm *shm); + void (*destroy_pool)(struct tee_shm_pool *pool); }; -/** - * tee_shm_pool_alloc() - Create a shared memory pool from shm managers - * @priv_mgr: manager for driver private shared memory allocations - * @dmabuf_mgr: manager for dma-buf shared memory allocations - * - * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied - * in @dmabuf, others will use the range provided by @priv. - * - * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. - */ -struct tee_shm_pool *tee_shm_pool_alloc(struct tee_shm_pool_mgr *priv_mgr, - struct tee_shm_pool_mgr *dmabuf_mgr); - /* - * tee_shm_pool_mgr_alloc_res_mem() - Create a shm manager for reserved - * memory + * tee_shm_pool_alloc_res_mem() - Create a shm manager for reserved memory * @vaddr: Virtual address of start of pool * @paddr: Physical address of start of pool * @size: Size in bytes of the pool * - * @returns pointer to a 'struct tee_shm_pool_mgr' or an ERR_PTR on failure. - */ -struct tee_shm_pool_mgr *tee_shm_pool_mgr_alloc_res_mem(unsigned long vaddr, - phys_addr_t paddr, - size_t size, - int min_alloc_order); - -/** - * tee_shm_pool_mgr_destroy() - Free a shared memory manager - */ -static inline void tee_shm_pool_mgr_destroy(struct tee_shm_pool_mgr *poolm) -{ - poolm->ops->destroy_poolmgr(poolm); -} - -/** - * struct tee_shm_pool_mem_info - holds information needed to create a shared - * memory pool - * @vaddr: Virtual address of start of pool - * @paddr: Physical address of start of pool - * @size: Size in bytes of the pool - */ -struct tee_shm_pool_mem_info { - unsigned long vaddr; - phys_addr_t paddr; - size_t size; -}; - -/** - * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved - * memory range - * @priv_info: Information for driver private shared memory pool - * @dmabuf_info: Information for dma-buf shared memory pool - * - * Start and end of pools will must be page aligned. - * - * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied - * in @dmabuf, others will use the range provided by @priv. - * * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. */ -struct tee_shm_pool * -tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, - struct tee_shm_pool_mem_info *dmabuf_info); +struct tee_shm_pool *tee_shm_pool_alloc_res_mem(unsigned long vaddr, + phys_addr_t paddr, size_t size, + int min_alloc_order); /** * tee_shm_pool_free() - Free a shared memory pool @@ -315,7 +259,10 @@ tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, * The must be no remaining shared memory allocated from this pool when * this function is called. */ -void tee_shm_pool_free(struct tee_shm_pool *pool); +static inline void tee_shm_pool_free(struct tee_shm_pool *pool) +{ + pool->ops->destroy_pool(pool); +} /** * tee_get_drvdata() - Return driver_data pointer @@ -323,43 +270,20 @@ void tee_shm_pool_free(struct tee_shm_pool *pool); */ void *tee_get_drvdata(struct tee_device *teedev); -/** - * tee_shm_alloc() - Allocate shared memory - * @ctx: Context that allocates the shared memory - * @size: Requested size of shared memory - * @flags: Flags setting properties for the requested shared memory. - * - * Memory allocated as global shared memory is automatically freed when the - * TEE file pointer is closed. The @flags field uses the bits defined by - * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If - * TEE_SHM_DMA_BUF global shared memory will be allocated and associated - * with a dma-buf handle, else driver private memory. - * - * @returns a pointer to 'struct tee_shm' - */ -struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); +struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size); struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); -/** - * tee_shm_register() - Register shared memory buffer - * @ctx: Context that registers the shared memory - * @addr: Address is userspace of the shared buffer - * @length: Length of the shared buffer - * @flags: Flags setting properties for the requested shared memory. - * - * @returns a pointer to 'struct tee_shm' - */ -struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, - size_t length, u32 flags); +struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, + void *addr, size_t length); /** - * tee_shm_is_registered() - Check if shared memory object in registered in TEE + * tee_shm_is_dynamic() - Check if shared memory object is of the dynamic kind * @shm: Shared memory handle - * @returns true if object is registered in TEE + * @returns true if object is dynamic shared memory */ -static inline bool tee_shm_is_registered(struct tee_shm *shm) +static inline bool tee_shm_is_dynamic(struct tee_shm *shm) { - return shm && (shm->flags & TEE_SHM_REGISTER); + return shm && (shm->flags & TEE_SHM_DYNAMIC); } /** diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 73a6f34b3847..9f392ec76f2b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -209,9 +209,12 @@ __bad_copy_from(void); extern void __compiletime_error("copy destination size is too small") __bad_copy_to(void); +void __copy_overflow(int size, unsigned long count); + static inline void copy_overflow(int size, unsigned long count) { - WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); + if (IS_ENABLED(CONFIG_BUG)) + __copy_overflow(size, count); } static __always_inline __must_check bool diff --git a/include/linux/topology.h b/include/linux/topology.h index a6e201758ae9..f19bc3626297 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -211,6 +211,9 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_drawer_id #define topology_drawer_id(cpu) ((void)(cpu), -1) #endif +#ifndef topology_ppin +#define topology_ppin(cpu) ((void)(cpu), 0ull) +#endif #ifndef topology_sibling_cpumask #define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70c069aef02c..e6e95a9f07a5 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -15,6 +15,7 @@ struct array_buffer; struct tracer; struct dentry; struct bpf_prog; +union bpf_attr; const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, @@ -315,6 +316,7 @@ enum { TRACE_EVENT_FL_KPROBE_BIT, TRACE_EVENT_FL_UPROBE_BIT, TRACE_EVENT_FL_EPROBE_BIT, + TRACE_EVENT_FL_CUSTOM_BIT, }; /* @@ -328,6 +330,9 @@ enum { * KPROBE - Event is a kprobe * UPROBE - Event is a uprobe * EPROBE - Event is an event probe + * CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint) + * This is set when the custom event has not been attached + * to a tracepoint yet, then it is cleared when it is. */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -339,6 +344,7 @@ enum { TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT), + TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT), }; #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) @@ -440,7 +446,9 @@ static inline bool bpf_prog_array_valid(struct trace_event_call *call) static inline const char * trace_event_name(struct trace_event_call *call) { - if (call->flags & TRACE_EVENT_FL_TRACEPOINT) + if (call->flags & TRACE_EVENT_FL_CUSTOM) + return call->name; + else if (call->flags & TRACE_EVENT_FL_TRACEPOINT) return call->tp ? call->tp->name : NULL; else return call->name; @@ -699,6 +707,8 @@ event_triggers_post_call(struct trace_event_file *file, bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); +bool __trace_trigger_soft_disabled(struct trace_event_file *file); + /** * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test @@ -708,20 +718,20 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); * triggers that require testing the fields, it will return true, * otherwise false. */ -static inline bool +static __always_inline bool trace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; - if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { - if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL, NULL, NULL); - if (eflags & EVENT_FILE_FL_SOFT_DISABLED) - return true; - if (eflags & EVENT_FILE_FL_PID_FILTER) - return trace_event_ignore_this_pid(file); - } - return false; + if (likely(!(eflags & (EVENT_FILE_FL_TRIGGER_MODE | + EVENT_FILE_FL_SOFT_DISABLED | + EVENT_FILE_FL_PID_FILTER)))) + return false; + + if (likely(eflags & EVENT_FILE_FL_TRIGGER_COND)) + return false; + + return __trace_trigger_soft_disabled(file); } #ifdef CONFIG_BPF_EVENTS @@ -736,6 +746,7 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, u64 *probe_addr); +int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -777,6 +788,11 @@ static inline int bpf_get_perf_event_info(const struct perf_event *event, { return -EOPNOTSUPP; } +static inline int +bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif enum { @@ -901,3 +917,18 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, #endif #endif /* _LINUX_TRACE_EVENT_H */ + +/* + * Note: we keep the TRACE_CUSTOM_EVENT outside the include file ifdef protection. + * This is due to the way trace custom events work. If a file includes two + * trace event headers under one "CREATE_CUSTOM_TRACE_EVENTS" the first include + * will override the TRACE_CUSTOM_EVENT and break the second include. + */ + +#ifndef TRACE_CUSTOM_EVENT + +#define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) +#define TRACE_CUSTOM_EVENT(name, proto, args, struct, assign, print) + +#endif /* ifdef TRACE_CUSTOM_EVENT (see note above) */ diff --git a/include/linux/types.h b/include/linux/types.h index ac825ad90e44..ea8cf60a8a79 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -226,6 +226,7 @@ struct callback_head { typedef void (*rcu_callback_t)(struct rcu_head *head); typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func); +typedef void (*swap_r_func_t)(void *a, void *b, int size, const void *priv); typedef void (*swap_func_t)(void *a, void *b, int size); typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv); diff --git a/include/linux/u64_stats_sync_api.h b/include/linux/u64_stats_sync_api.h new file mode 100644 index 000000000000..c72ca63da44b --- /dev/null +++ b/include/linux/u64_stats_sync_api.h @@ -0,0 +1 @@ +#include <linux/u64_stats_sync.h> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ac0394087f7d..546179418ffa 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -10,46 +10,6 @@ #include <asm/uaccess.h> -#ifdef CONFIG_SET_FS -/* - * Force the uaccess routines to be wired up for actual userspace access, - * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone - * using force_uaccess_end below. - */ -static inline mm_segment_t force_uaccess_begin(void) -{ - mm_segment_t fs = get_fs(); - - set_fs(USER_DS); - return fs; -} - -static inline void force_uaccess_end(mm_segment_t oldfs) -{ - set_fs(oldfs); -} -#else /* CONFIG_SET_FS */ -typedef struct { - /* empty dummy */ -} mm_segment_t; - -#ifndef TASK_SIZE_MAX -#define TASK_SIZE_MAX TASK_SIZE -#endif - -#define uaccess_kernel() (false) -#define user_addr_max() (TASK_SIZE_MAX) - -static inline mm_segment_t force_uaccess_begin(void) -{ - return (mm_segment_t) { }; -} - -static inline void force_uaccess_end(mm_segment_t oldfs) -{ -} -#endif /* CONFIG_SET_FS */ - /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and @@ -368,6 +328,25 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count); long strnlen_user_nofault(const void __user *unsafe_addr, long count); +#ifndef __get_kernel_nofault +#define __get_kernel_nofault(dst, src, type, label) \ +do { \ + type __user *p = (type __force __user *)(src); \ + type data; \ + if (__get_user(data, p)) \ + goto label; \ + *(type *)dst = data; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, label) \ +do { \ + type __user *p = (type __force __user *)(dst); \ + type data = *(type *)src; \ + if (__put_user(data, p)) \ + goto label; \ +} while (0) +#endif + /** * get_kernel_nofault(): safely attempt to read from a location * @val: read into this variable @@ -401,8 +380,6 @@ static inline void user_access_restore(unsigned long flags) { } #endif #ifdef CONFIG_HARDENED_USERCOPY -void usercopy_warn(const char *name, const char *detail, bool to_user, - unsigned long offset, unsigned long len); void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); diff --git a/include/linux/udp.h b/include/linux/udp.h index ae66dadd8543..254a2654400f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -23,11 +23,6 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) return (struct udphdr *)skb_transport_header(skb); } -static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb) -{ - return (struct udphdr *)skb_inner_transport_header(skb); -} - #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) diff --git a/include/linux/uio.h b/include/linux/uio.h index 1198a2bfc9bf..739285fe5a2f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -273,6 +273,23 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) i->count = count; } +static inline int +iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) +{ + size_t shorted = 0; + int npages; + + if (iov_iter_count(i) > max_bytes) { + shorted = iov_iter_count(i) - max_bytes; + iov_iter_truncate(i, max_bytes); + } + npages = iov_iter_npages(i, INT_MAX); + if (shorted) + iov_iter_reexpand(i, iov_iter_count(i) + shorted); + + return npages; +} + struct csum_state { __wsum csum; size_t off; diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index 031f148ab373..b5deafd91f67 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -92,6 +92,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node) static inline void usb_role_switch_put(struct usb_role_switch *sw) { } static inline struct usb_role_switch * +usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct usb_role_switch * usb_role_switch_register(struct device *parent, const struct usb_role_switch_desc *desc) { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2de442ececae..721089bb4c84 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -401,18 +401,24 @@ static inline int vdpa_reset(struct vdpa_device *vdev) return ret; } -static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features, bool locked) +static inline int vdpa_set_features_unlocked(struct vdpa_device *vdev, u64 features) { const struct vdpa_config_ops *ops = vdev->config; int ret; - if (!locked) - mutex_lock(&vdev->cf_mutex); - vdev->features_valid = true; ret = ops->set_driver_features(vdev, features); - if (!locked) - mutex_unlock(&vdev->cf_mutex); + + return ret; +} + +static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) +{ + int ret; + + mutex_lock(&vdev->cf_mutex); + ret = vdpa_set_features_unlocked(vdev, features); + mutex_unlock(&vdev->cf_mutex); return ret; } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 76191d7abed1..66dda06ec42d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -33,6 +33,7 @@ struct vfio_device { struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; + unsigned int migration_flags; /* Members below here are private, not for driver use */ refcount_t refcount; @@ -55,6 +56,17 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl + * @migration_set_state: Optional callback to change the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + * The returned FD is used for data transfer according to the FSM + * definition. The driver is responsible to ensure that FD reaches end + * of stream or error whenever the migration FSM leaves a data transfer + * state or before close_device() returns. + * @migration_get_state: Optional callback to get the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. */ struct vfio_device_ops { char *name; @@ -69,8 +81,44 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + int (*device_feature)(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz); + struct file *(*migration_set_state)( + struct vfio_device *device, + enum vfio_device_mig_state new_state); + int (*migration_get_state)(struct vfio_device *device, + enum vfio_device_mig_state *curr_state); }; +/** + * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl + * @flags: Arg from the device_feature op + * @argsz: Arg from the device_feature op + * @supported_ops: Combination of VFIO_DEVICE_FEATURE_GET and SET the driver + * supports + * @minsz: Minimum data size the driver accepts + * + * For use in a driver's device_feature op. Checks that the inputs to the + * VFIO_DEVICE_FEATURE ioctl are correct for the driver's feature. Returns 1 if + * the driver should execute the get or set, otherwise the relevant + * value should be returned. + */ +static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, + size_t minsz) +{ + if ((flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)) & + ~supported_ops) + return -EINVAL; + if (flags & VFIO_DEVICE_FEATURE_PROBE) + return 0; + /* Without PROBE one of GET or SET must be requested */ + if (!(flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET))) + return -EINVAL; + if (argsz < minsz) + return -EINVAL; + return 1; +} + void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); void vfio_uninit_group_dev(struct vfio_device *device); @@ -82,6 +130,11 @@ extern void vfio_device_put(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); +int vfio_mig_get_next_state(struct vfio_device *device, + enum vfio_device_mig_state cur_fsm, + enum vfio_device_mig_state new_fsm, + enum vfio_device_mig_state *next_fsm); + /* * External user API */ diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index ef9a44b6cf5d..74a4a0f17b28 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -159,8 +159,17 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +#ifdef CONFIG_VFIO_PCI_VGA extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +#else +static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite) +{ + return -EINVAL; +} +#endif extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, uint64_t data, int count, int fd); @@ -220,6 +229,8 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn); extern const struct pci_error_handlers vfio_pci_core_err_handlers; long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg); +int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz); ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos); ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, @@ -230,6 +241,8 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); +pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); static inline bool vfio_pci_is_vga(struct pci_dev *pdev) { diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 72292a62cd90..5464f398912a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); -int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 4d107ad31149..dafdc7f48c01 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -64,8 +64,9 @@ struct virtio_shm_region { * Returns the first 64 feature bits (all we currently need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device - * This gives the final feature bits for the device: it can change + * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. + * Note: despite the name this can be called any number of times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 7b2363388bfa..16a0a4fd000b 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -129,6 +129,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, +#ifdef CONFIG_KSM + KSM_SWPIN_COPY, +#endif #endif #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 880227b9f044..3b1df7da402d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -35,17 +35,6 @@ struct notifier_block; /* in notifier.h */ #define VM_DEFER_KMEMLEAK 0 #endif -/* - * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. - * - * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after - * shadow memory has been mapped. It's used to handle allocation errors so that - * we don't try to poison shadow on free if it was never allocated. - * - * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to - * determine which allocations need the module shadow freed. - */ - /* bits [20..32] reserved for arch specific ioremap internals */ /* @@ -80,8 +69,8 @@ struct vmap_area { /* * The following two variables can be packed, because * a vmap_area object can be either: - * 1) in "free" tree (root is vmap_area_root) - * 2) or "busy" tree (root is free_vmap_area_root) + * 1) in "free" tree (root is free_vmap_area_root) + * 2) or "busy" tree (root is vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ @@ -126,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size) } #endif +#ifndef arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return prot; +} +#endif + /* * Highlevel APIs for driver use */ @@ -159,6 +155,11 @@ void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); void *vmalloc_no_huge(unsigned long size) __alloc_size(1); +extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); +extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2); + extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/include/linux/wait_api.h b/include/linux/wait_api.h new file mode 100644 index 000000000000..4e930548935a --- /dev/null +++ b/include/linux/wait_api.h @@ -0,0 +1 @@ +#include <linux/wait.h> diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index c994d1b2cdba..3b9a40ae8bdb 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -28,7 +28,8 @@ struct watch_type_filter { struct watch_filter { union { struct rcu_head rcu; - unsigned long type_filter[2]; /* Bitmask of accepted types */ + /* Bitmask of accepted types */ + DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ struct watch_type_filter filters[]; diff --git a/include/linux/workqueue_api.h b/include/linux/workqueue_api.h new file mode 100644 index 000000000000..77debb5d2760 --- /dev/null +++ b/include/linux/workqueue_api.h @@ -0,0 +1 @@ +#include <linux/workqueue.h> diff --git a/include/linux/wwan.h b/include/linux/wwan.h index afb3334ec8c5..5ce2acf444fb 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -174,11 +174,13 @@ void wwan_unregister_ops(struct device *parent); #ifdef CONFIG_WWAN_DEBUGFS struct dentry *wwan_get_debugfs_dir(struct device *parent); +void wwan_put_debugfs_dir(struct dentry *dir); #else static inline struct dentry *wwan_get_debugfs_dir(struct device *parent) { return ERR_PTR(-ENODEV); } +static inline void wwan_put_debugfs_dir(struct dentry *dir) {} #endif #endif /* __WWAN_H */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index d6d5da6ed735..bb52b786be1b 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1317,6 +1317,7 @@ struct xa_state { struct xa_node *xa_node; struct xa_node *xa_alloc; xa_update_node_t xa_update; + struct list_lru *xa_lru; }; /* @@ -1336,7 +1337,8 @@ struct xa_state { .xa_pad = 0, \ .xa_node = XAS_RESTART, \ .xa_alloc = NULL, \ - .xa_update = NULL \ + .xa_update = NULL, \ + .xa_lru = NULL, \ } /** @@ -1631,6 +1633,11 @@ static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update) xas->xa_update = update; } +static inline void xas_set_lru(struct xa_state *xas, struct list_lru *lru) +{ + xas->xa_lru = lru; +} + /** * xas_next_entry() - Advance iterator to next present entry. * @xas: XArray operation state. |