diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2024-07-16 00:03:44 +0300 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2024-07-16 00:03:44 +0300 |
| commit | a23e1966932464e1c5226cb9ac4ce1d5fc10ba22 (patch) | |
| tree | bf5f1b57faa01ca31656bfc48c7d6b6f0bc39189 /include/linux | |
| parent | 7c7b1be19b228b450c2945ec379d7fc6bfef9852 (diff) | |
| parent | f3efefb6fdcce604413135bd8d4c5568e53a1f13 (diff) | |
| download | linux-a23e1966932464e1c5226cb9ac4ce1d5fc10ba22.tar.xz | |
Merge branch 'next' into for-linus
Prepare input updates for 6.11 merge window.
Diffstat (limited to 'include/linux')
831 files changed, 30329 insertions, 12001 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 641dc4843987..34829f2c517a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -15,6 +15,7 @@ #include <linux/mod_devicetable.h> #include <linux/property.h> #include <linux/uuid.h> +#include <linux/node.h> struct irq_domain; struct irq_domain_ops; @@ -30,6 +31,7 @@ struct irq_domain_ops; #include <linux/dynamic_debug.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/fw_table.h> #include <acpi/acpi_bus.h> #include <acpi/acpi_drivers.h> @@ -37,6 +39,16 @@ struct irq_domain_ops; #include <acpi/acpi_io.h> #include <asm/acpi.h> +#ifdef CONFIG_ACPI_TABLE_LIB +#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) +#define __init_or_acpilib +#define __initdata_or_acpilib +#else +#define EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_acpilib __init +#define __initdata_or_acpilib __initdata +#endif + static inline acpi_handle acpi_device_handle(struct acpi_device *adev) { return adev ? adev->handle : NULL; @@ -119,21 +131,8 @@ enum acpi_address_range_id { /* Table Handlers */ -union acpi_subtable_headers { - struct acpi_subtable_header common; - struct acpi_hmat_structure hmat; - struct acpi_prmt_module_header prmt; - struct acpi_cedt_header cedt; -}; - typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); -typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, - const unsigned long end); - -typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, - void *arg, const unsigned long end); - /* Debugger support */ struct acpi_debugger_ops { @@ -207,14 +206,6 @@ static inline int acpi_debugger_notify_command_complete(void) (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) -struct acpi_subtable_proc { - int id; - acpi_tbl_entry_handler handler; - acpi_tbl_entry_handler_arg handler_arg; - void *arg; - int count; -}; - void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); @@ -229,16 +220,6 @@ void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); -#ifdef CONFIG_ACPI_TABLE_LIB -#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) -#define __init_or_acpilib -#define __initdata_or_acpilib -#else -#define EXPORT_SYMBOL_ACPI_LIB(x) -#define __init_or_acpilib __init -#define __initdata_or_acpilib __initdata -#endif - int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init_or_acpilib acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, @@ -256,10 +237,15 @@ acpi_table_parse_cedt(enum acpi_cedt_type id, int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); +static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) +{ + return gicc->flags & ACPI_MADT_ENABLED; +} + /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); -#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void @@ -439,6 +425,23 @@ extern int acpi_blacklisted(void); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); +#ifdef CONFIG_ACPI_THERMAL_LIB +int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp); +int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp); +int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp); +int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); +#endif + +#ifdef CONFIG_ACPI_HMAT +int acpi_get_genport_coordinates(u32 uid, struct access_coordinate *coord); +#else +static inline int acpi_get_genport_coordinates(u32 uid, + struct access_coordinate *coord) +{ + return -EOPNOTSUPP; +} +#endif + #ifdef CONFIG_ACPI_NUMA int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); @@ -477,8 +480,6 @@ static inline int acpi_get_node(acpi_handle handle) return 0; } #endif -extern int acpi_paddr_to_node(u64 start_addr, u64 size); - extern int pnpacpi_disabled; #define PXM_INVAL (-1) @@ -773,6 +774,10 @@ const char *acpi_get_subsystem_id(acpi_handle handle); #define ACPI_HANDLE(dev) (NULL) #define ACPI_HANDLE_FWNODE(fwnode) (NULL) +/* Get rid of the -Wunused-variable for adev */ +#define acpi_dev_uid_match(adev, uid2) (adev && false) +#define acpi_dev_hid_uid_match(adev, hid2, uid2) (adev && false) + #include <acpi/acpi_numa.h> struct fwnode_handle; @@ -789,12 +794,6 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) struct acpi_device; -static inline bool -acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2) -{ - return false; -} - static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer) { return -ENODEV; @@ -1100,7 +1099,7 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); -#ifdef CONFIG_X86 +#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) struct acpi_s2idle_dev_ops { struct list_head list_node; void (*prepare)(void); @@ -1109,15 +1108,14 @@ struct acpi_s2idle_dev_ops { }; int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); -#endif /* CONFIG_X86 */ -#ifndef CONFIG_IA64 -void arch_reserve_mem_area(acpi_physical_address addr, size_t size); -#else -static inline void arch_reserve_mem_area(acpi_physical_address addr, - size_t size) +int acpi_get_lps0_constraint(struct acpi_device *adev); +#else /* CONFIG_SUSPEND && CONFIG_X86 */ +static inline int acpi_get_lps0_constraint(struct device *dev) { + return ACPI_STATE_UNKNOWN; } -#endif /* CONFIG_X86 */ +#endif /* CONFIG_SUSPEND && CONFIG_X86 */ +void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif @@ -1172,6 +1170,7 @@ static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif #ifdef CONFIG_ACPI +char *acpi_handle_path(acpi_handle handle); __printf(3, 4) void acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...); @@ -1476,6 +1475,15 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +#ifdef CONFIG_ACPI_PROCESSOR_IDLE +#ifndef arch_get_idle_state_flags +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + return 0; +} +#endif +#endif /* CONFIG_ACPI_PROCESSOR_IDLE */ + #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); @@ -1535,4 +1543,30 @@ static inline void acpi_device_notify(struct device *dev) { } static inline void acpi_device_notify_remove(struct device *dev) { } #endif +static inline void acpi_use_parent_companion(struct device *dev) +{ + ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); +} + +#ifdef CONFIG_ACPI_HMAT +int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, + enum access_coordinate_class access); +#else +static inline int hmat_update_target_coordinates(int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ + return -EOPNOTSUPP; +} +#endif + +#ifdef CONFIG_ACPI_NUMA +bool acpi_node_backed_by_real_pxm(int nid); +#else +static inline bool acpi_node_backed_by_real_pxm(int nid) +{ + return false; +} +#endif + #endif /*_LINUX_ACPI_H*/ diff --git a/include/linux/acpi_amd_wbrf.h b/include/linux/acpi_amd_wbrf.h new file mode 100644 index 000000000000..898f31d536d4 --- /dev/null +++ b/include/linux/acpi_amd_wbrf.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Wifi Band Exclusion Interface (AMD ACPI Implementation) + * Copyright (C) 2023 Advanced Micro Devices + */ + +#ifndef _ACPI_AMD_WBRF_H +#define _ACPI_AMD_WBRF_H + +#include <linux/device.h> +#include <linux/notifier.h> + +/* The maximum number of frequency band ranges */ +#define MAX_NUM_OF_WBRF_RANGES 11 + +/* Record actions */ +#define WBRF_RECORD_ADD 0x0 +#define WBRF_RECORD_REMOVE 0x1 + +/** + * struct freq_band_range - Wifi frequency band range definition + * @start: start frequency point (in Hz) + * @end: end frequency point (in Hz) + */ +struct freq_band_range { + u64 start; + u64 end; +}; + +/** + * struct wbrf_ranges_in_out - wbrf ranges info + * @num_of_ranges: total number of band ranges in this struct + * @band_list: array of Wifi band ranges + */ +struct wbrf_ranges_in_out { + u64 num_of_ranges; + struct freq_band_range band_list[MAX_NUM_OF_WBRF_RANGES]; +}; + +/** + * enum wbrf_notifier_actions - wbrf notifier actions index + * @WBRF_CHANGED: there was some frequency band updates. The consumers + * should retrieve the latest active frequency bands. + */ +enum wbrf_notifier_actions { + WBRF_CHANGED, +}; + +#if IS_ENABLED(CONFIG_AMD_WBRF) +bool acpi_amd_wbrf_supported_producer(struct device *dev); +int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in); +bool acpi_amd_wbrf_supported_consumer(struct device *dev); +int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out); +int amd_wbrf_register_notifier(struct notifier_block *nb); +int amd_wbrf_unregister_notifier(struct notifier_block *nb); +#else +static inline +bool acpi_amd_wbrf_supported_consumer(struct device *dev) +{ + return false; +} + +static inline +int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in) +{ + return -ENODEV; +} + +static inline +bool acpi_amd_wbrf_supported_producer(struct device *dev) +{ + return false; +} +static inline +int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out) +{ + return -ENODEV; +} +static inline +int amd_wbrf_register_notifier(struct notifier_block *nb) +{ + return -ENODEV; +} +static inline +int amd_wbrf_unregister_notifier(struct notifier_block *nb) +{ + return -ENODEV; +} +#endif /* CONFIG_AMD_WBRF */ + +#endif /* _ACPI_AMD_WBRF_H */ diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index ee7cb6aaff71..1cb65592c95d 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -21,6 +21,7 @@ */ #define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */ #define IORT_SMMU_V3_PMCG_HISI_HIP08 0x00000001 /* HiSilicon HIP08 PMCG */ +#define IORT_SMMU_V3_PMCG_HISI_HIP09 0x00000002 /* HiSilicon HIP09 PMCG */ int iort_register_domain_token(int trans_id, phys_addr_t base, struct fwnode_handle *fw_node); diff --git a/include/linux/aer.h b/include/linux/aer.h index 3a3ab05e13fd..4b97f38f3fcf 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -18,11 +18,8 @@ struct pci_dev; -struct aer_header_log_regs { - unsigned int dw0; - unsigned int dw1; - unsigned int dw2; - unsigned int dw3; +struct pcie_tlp_log { + u32 dw[4]; }; struct aer_capability_regs { @@ -33,34 +30,27 @@ struct aer_capability_regs { u32 cor_status; u32 cor_mask; u32 cap_control; - struct aer_header_log_regs header_log; + struct pcie_tlp_log header_log; u32 root_command; u32 root_status; u16 cor_err_source; u16 uncor_err_source; }; +int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log); + #if defined(CONFIG_PCIEAER) -/* PCIe port driver needs this function to enable AER */ -int pci_enable_pcie_error_reporting(struct pci_dev *dev); -int pci_disable_pcie_error_reporting(struct pci_dev *dev); int pci_aer_clear_nonfatal_status(struct pci_dev *dev); +int pcie_aer_is_native(struct pci_dev *dev); #else -static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) -{ - return -EINVAL; -} -static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) -{ - return -EINVAL; -} static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } +static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif -void cper_print_aer(struct pci_dev *dev, int aer_severity, +void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h deleted file mode 100644 index 421b0fa90d6a..000000000000 --- a/include/linux/amba/clcd-regs.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * David A Rusling - * - * Copyright (C) 2001 ARM Limited - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ - -#ifndef AMBA_CLCD_REGS_H -#define AMBA_CLCD_REGS_H - -/* - * CLCD Controller Internal Register addresses - */ -#define CLCD_TIM0 0x00000000 -#define CLCD_TIM1 0x00000004 -#define CLCD_TIM2 0x00000008 -#define CLCD_TIM3 0x0000000c -#define CLCD_UBAS 0x00000010 -#define CLCD_LBAS 0x00000014 - -#define CLCD_PL110_IENB 0x00000018 -#define CLCD_PL110_CNTL 0x0000001c -#define CLCD_PL110_STAT 0x00000020 -#define CLCD_PL110_INTR 0x00000024 -#define CLCD_PL110_UCUR 0x00000028 -#define CLCD_PL110_LCUR 0x0000002C - -#define CLCD_PL111_CNTL 0x00000018 -#define CLCD_PL111_IENB 0x0000001c -#define CLCD_PL111_RIS 0x00000020 -#define CLCD_PL111_MIS 0x00000024 -#define CLCD_PL111_ICR 0x00000028 -#define CLCD_PL111_UCUR 0x0000002c -#define CLCD_PL111_LCUR 0x00000030 - -#define CLCD_PALL 0x00000200 -#define CLCD_PALETTE 0x00000200 - -#define TIM2_PCD_LO_MASK GENMASK(4, 0) -#define TIM2_PCD_LO_BITS 5 -#define TIM2_CLKSEL (1 << 5) -#define TIM2_ACB_MASK GENMASK(10, 6) -#define TIM2_IVS (1 << 11) -#define TIM2_IHS (1 << 12) -#define TIM2_IPC (1 << 13) -#define TIM2_IOE (1 << 14) -#define TIM2_BCD (1 << 26) -#define TIM2_PCD_HI_MASK GENMASK(31, 27) -#define TIM2_PCD_HI_BITS 5 -#define TIM2_PCD_HI_SHIFT 27 - -#define CNTL_LCDEN (1 << 0) -#define CNTL_LCDBPP1 (0 << 1) -#define CNTL_LCDBPP2 (1 << 1) -#define CNTL_LCDBPP4 (2 << 1) -#define CNTL_LCDBPP8 (3 << 1) -#define CNTL_LCDBPP16 (4 << 1) -#define CNTL_LCDBPP16_565 (6 << 1) -#define CNTL_LCDBPP16_444 (7 << 1) -#define CNTL_LCDBPP24 (5 << 1) -#define CNTL_LCDBW (1 << 4) -#define CNTL_LCDTFT (1 << 5) -#define CNTL_LCDMONO8 (1 << 6) -#define CNTL_LCDDUAL (1 << 7) -#define CNTL_BGR (1 << 8) -#define CNTL_BEBO (1 << 9) -#define CNTL_BEPO (1 << 10) -#define CNTL_LCDPWR (1 << 11) -#define CNTL_LCDVCOMP(x) ((x) << 12) -#define CNTL_LDMAFIFOTIME (1 << 15) -#define CNTL_WATERMARK (1 << 16) - -/* ST Microelectronics variant bits */ -#define CNTL_ST_1XBPP_444 0x0 -#define CNTL_ST_1XBPP_5551 (1 << 17) -#define CNTL_ST_1XBPP_565 (1 << 18) -#define CNTL_ST_CDWID_12 0x0 -#define CNTL_ST_CDWID_16 (1 << 19) -#define CNTL_ST_CDWID_18 (1 << 20) -#define CNTL_ST_CDWID_24 ((1 << 19)|(1 << 20)) -#define CNTL_ST_CEAEN (1 << 21) -#define CNTL_ST_LCDBPP24_PACKED (6 << 1) - -#endif /* AMBA_CLCD_REGS_H */ diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h deleted file mode 100644 index b6e0cbeaf533..000000000000 --- a/include/linux/amba/clcd.h +++ /dev/null @@ -1,290 +0,0 @@ -/* - * linux/include/asm-arm/hardware/amba_clcd.h -- Integrator LCD panel. - * - * David A Rusling - * - * Copyright (C) 2001 ARM Limited - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ -#include <linux/fb.h> -#include <linux/amba/clcd-regs.h> - -enum { - /* individual formats */ - CLCD_CAP_RGB444 = (1 << 0), - CLCD_CAP_RGB5551 = (1 << 1), - CLCD_CAP_RGB565 = (1 << 2), - CLCD_CAP_RGB888 = (1 << 3), - CLCD_CAP_BGR444 = (1 << 4), - CLCD_CAP_BGR5551 = (1 << 5), - CLCD_CAP_BGR565 = (1 << 6), - CLCD_CAP_BGR888 = (1 << 7), - - /* connection layouts */ - CLCD_CAP_444 = CLCD_CAP_RGB444 | CLCD_CAP_BGR444, - CLCD_CAP_5551 = CLCD_CAP_RGB5551 | CLCD_CAP_BGR5551, - CLCD_CAP_565 = CLCD_CAP_RGB565 | CLCD_CAP_BGR565, - CLCD_CAP_888 = CLCD_CAP_RGB888 | CLCD_CAP_BGR888, - - /* red/blue ordering */ - CLCD_CAP_RGB = CLCD_CAP_RGB444 | CLCD_CAP_RGB5551 | - CLCD_CAP_RGB565 | CLCD_CAP_RGB888, - CLCD_CAP_BGR = CLCD_CAP_BGR444 | CLCD_CAP_BGR5551 | - CLCD_CAP_BGR565 | CLCD_CAP_BGR888, - - CLCD_CAP_ALL = CLCD_CAP_BGR | CLCD_CAP_RGB, -}; - -struct backlight_device; - -struct clcd_panel { - struct fb_videomode mode; - signed short width; /* width in mm */ - signed short height; /* height in mm */ - u32 tim2; - u32 tim3; - u32 cntl; - u32 caps; - unsigned int bpp:8, - fixedtimings:1, - grayscale:1; - unsigned int connector; - struct backlight_device *backlight; - /* - * If the B/R lines are switched between the CLCD - * and the panel we need to know this and not try to - * compensate with the BGR bit in the control register. - */ - bool bgr_connection; -}; - -struct clcd_regs { - u32 tim0; - u32 tim1; - u32 tim2; - u32 tim3; - u32 cntl; - unsigned long pixclock; -}; - -struct clcd_fb; - -/* - * the board-type specific routines - */ -struct clcd_board { - const char *name; - - /* - * Optional. Hardware capability flags. - */ - u32 caps; - - /* - * Optional. Check whether the var structure is acceptable - * for this display. - */ - int (*check)(struct clcd_fb *fb, struct fb_var_screeninfo *var); - - /* - * Compulsory. Decode fb->fb.var into regs->*. In the case of - * fixed timing, set regs->* to the register values required. - */ - void (*decode)(struct clcd_fb *fb, struct clcd_regs *regs); - - /* - * Optional. Disable any extra display hardware. - */ - void (*disable)(struct clcd_fb *); - - /* - * Optional. Enable any extra display hardware. - */ - void (*enable)(struct clcd_fb *); - - /* - * Setup platform specific parts of CLCD driver - */ - int (*setup)(struct clcd_fb *); - - /* - * mmap the framebuffer memory - */ - int (*mmap)(struct clcd_fb *, struct vm_area_struct *); - - /* - * Remove platform specific parts of CLCD driver - */ - void (*remove)(struct clcd_fb *); -}; - -struct amba_device; -struct clk; - -/* this data structure describes each frame buffer device we find */ -struct clcd_fb { - struct fb_info fb; - struct amba_device *dev; - struct clk *clk; - struct clcd_panel *panel; - struct clcd_board *board; - void *board_data; - void __iomem *regs; - u16 off_ienb; - u16 off_cntl; - u32 clcd_cntl; - u32 cmap[16]; - bool clk_enabled; -}; - -static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) -{ - struct fb_var_screeninfo *var = &fb->fb.var; - u32 val, cpl; - - /* - * Program the CLCD controller registers and start the CLCD - */ - val = ((var->xres / 16) - 1) << 2; - val |= (var->hsync_len - 1) << 8; - val |= (var->right_margin - 1) << 16; - val |= (var->left_margin - 1) << 24; - regs->tim0 = val; - - val = var->yres; - if (fb->panel->cntl & CNTL_LCDDUAL) - val /= 2; - val -= 1; - val |= (var->vsync_len - 1) << 10; - val |= var->lower_margin << 16; - val |= var->upper_margin << 24; - regs->tim1 = val; - - val = fb->panel->tim2; - val |= var->sync & FB_SYNC_HOR_HIGH_ACT ? 0 : TIM2_IHS; - val |= var->sync & FB_SYNC_VERT_HIGH_ACT ? 0 : TIM2_IVS; - - cpl = var->xres_virtual; - if (fb->panel->cntl & CNTL_LCDTFT) /* TFT */ - /* / 1 */; - else if (!var->grayscale) /* STN color */ - cpl = cpl * 8 / 3; - else if (fb->panel->cntl & CNTL_LCDMONO8) /* STN monochrome, 8bit */ - cpl /= 8; - else /* STN monochrome, 4bit */ - cpl /= 4; - - regs->tim2 = val | ((cpl - 1) << 16); - - regs->tim3 = fb->panel->tim3; - - val = fb->panel->cntl; - if (var->grayscale) - val |= CNTL_LCDBW; - - if (fb->panel->caps && fb->board->caps && var->bits_per_pixel >= 16) { - /* - * if board and panel supply capabilities, we can support - * changing BGR/RGB depending on supplied parameters. Here - * we switch to what the framebuffer is providing if need - * be, so if the framebuffer is BGR but the display connection - * is RGB (first case) we switch it around. Vice versa mutatis - * mutandis if the framebuffer is RGB but the display connection - * is BGR, we flip it around. - */ - if (var->red.offset == 0) - val &= ~CNTL_BGR; - else - val |= CNTL_BGR; - if (fb->panel->bgr_connection) - val ^= CNTL_BGR; - } - - switch (var->bits_per_pixel) { - case 1: - val |= CNTL_LCDBPP1; - break; - case 2: - val |= CNTL_LCDBPP2; - break; - case 4: - val |= CNTL_LCDBPP4; - break; - case 8: - val |= CNTL_LCDBPP8; - break; - case 16: - /* - * PL110 cannot choose between 5551 and 565 modes in its - * control register. It is possible to use 565 with - * custom external wiring. - */ - if (amba_part(fb->dev) == 0x110 || - var->green.length == 5) - val |= CNTL_LCDBPP16; - else if (var->green.length == 6) - val |= CNTL_LCDBPP16_565; - else - val |= CNTL_LCDBPP16_444; - break; - case 32: - val |= CNTL_LCDBPP24; - break; - } - - regs->cntl = val; - regs->pixclock = var->pixclock; -} - -static inline int clcdfb_check(struct clcd_fb *fb, struct fb_var_screeninfo *var) -{ - var->xres_virtual = var->xres = (var->xres + 15) & ~15; - var->yres_virtual = var->yres = (var->yres + 1) & ~1; - -#define CHECK(e,l,h) (var->e < l || var->e > h) - if (CHECK(right_margin, (5+1), 256) || /* back porch */ - CHECK(left_margin, (5+1), 256) || /* front porch */ - CHECK(hsync_len, (5+1), 256) || - var->xres > 4096 || - var->lower_margin > 255 || /* back porch */ - var->upper_margin > 255 || /* front porch */ - var->vsync_len > 32 || - var->yres > 1024) - return -EINVAL; -#undef CHECK - - /* single panel mode: PCD = max(PCD, 1) */ - /* dual panel mode: PCD = max(PCD, 5) */ - - /* - * You can't change the grayscale setting, and - * we can only do non-interlaced video. - */ - if (var->grayscale != fb->fb.var.grayscale || - (var->vmode & FB_VMODE_MASK) != FB_VMODE_NONINTERLACED) - return -EINVAL; - -#define CHECK(e) (var->e != fb->fb.var.e) - if (fb->panel->fixedtimings && - (CHECK(xres) || - CHECK(yres) || - CHECK(bits_per_pixel) || - CHECK(pixclock) || - CHECK(left_margin) || - CHECK(right_margin) || - CHECK(upper_margin) || - CHECK(lower_margin) || - CHECK(hsync_len) || - CHECK(vsync_len) || - CHECK(sync))) - return -EINVAL; -#undef CHECK - - var->nonstd = 0; - var->accel_flags = 0; - - return 0; -} diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index 9bf58aac0df2..d7b07d0311e1 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -16,6 +16,7 @@ #ifndef _SSP_PL022_H #define _SSP_PL022_H +#include <linux/dmaengine.h> #include <linux/types.h> /** @@ -224,6 +225,7 @@ struct dma_chan; * struct pl022_ssp_master - device.platform_data for SPI controller devices. * @bus_id: identifier for this bus * @enable_dma: if true enables DMA driven transfers. + * @dma_filter: callback filter for dma_request_channel. * @dma_rx_param: parameter to locate an RX DMA channel. * @dma_tx_param: parameter to locate a TX DMA channel. * @autosuspend_delay: delay in ms following transfer completion before the @@ -235,7 +237,7 @@ struct dma_chan; struct pl022_ssp_controller { u16 bus_id; u8 enable_dma:1; - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; void *dma_rx_param; void *dma_tx_param; int autosuspend_delay; diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index a1307b58cc2c..9120de05ead0 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -10,6 +10,11 @@ #ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H #define ASM_ARM_HARDWARE_SERIAL_AMBA_H +#ifndef __ASSEMBLY__ +#include <linux/bitfield.h> +#include <linux/bits.h> +#endif + #include <linux/types.h> /* ------------------------------------------------------------------------------- @@ -70,141 +75,145 @@ #define ZX_UART011_ICR 0x4c #define ZX_UART011_DMACR 0x50 -#define UART011_DR_OE (1 << 11) -#define UART011_DR_BE (1 << 10) -#define UART011_DR_PE (1 << 9) -#define UART011_DR_FE (1 << 8) - -#define UART01x_RSR_OE 0x08 -#define UART01x_RSR_BE 0x04 -#define UART01x_RSR_PE 0x02 -#define UART01x_RSR_FE 0x01 - -#define UART011_FR_RI 0x100 -#define UART011_FR_TXFE 0x080 -#define UART011_FR_RXFF 0x040 -#define UART01x_FR_TXFF 0x020 -#define UART01x_FR_RXFE 0x010 -#define UART01x_FR_BUSY 0x008 -#define UART01x_FR_DCD 0x004 -#define UART01x_FR_DSR 0x002 -#define UART01x_FR_CTS 0x001 +#define UART011_DR_OE BIT(11) +#define UART011_DR_BE BIT(10) +#define UART011_DR_PE BIT(9) +#define UART011_DR_FE BIT(8) + +#define UART01x_RSR_OE BIT(3) +#define UART01x_RSR_BE BIT(2) +#define UART01x_RSR_PE BIT(1) +#define UART01x_RSR_FE BIT(0) + +#define UART011_FR_RI BIT(8) +#define UART011_FR_TXFE BIT(7) +#define UART011_FR_RXFF BIT(6) +#define UART01x_FR_TXFF (1 << 5) /* used in ASM */ +#define UART01x_FR_RXFE BIT(4) +#define UART01x_FR_BUSY (1 << 3) /* used in ASM */ +#define UART01x_FR_DCD BIT(2) +#define UART01x_FR_DSR BIT(1) +#define UART01x_FR_CTS BIT(0) #define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY) /* * Some bits of Flag Register on ZTE device have different position from * standard ones. */ -#define ZX_UART01x_FR_BUSY 0x100 -#define ZX_UART01x_FR_DSR 0x008 -#define ZX_UART01x_FR_CTS 0x002 -#define ZX_UART011_FR_RI 0x001 - -#define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */ -#define UART011_CR_RTSEN 0x4000 /* RTS hardware flow control */ -#define UART011_CR_OUT2 0x2000 /* OUT2 */ -#define UART011_CR_OUT1 0x1000 /* OUT1 */ -#define UART011_CR_RTS 0x0800 /* RTS */ -#define UART011_CR_DTR 0x0400 /* DTR */ -#define UART011_CR_RXE 0x0200 /* receive enable */ -#define UART011_CR_TXE 0x0100 /* transmit enable */ -#define UART011_CR_LBE 0x0080 /* loopback enable */ -#define UART010_CR_RTIE 0x0040 -#define UART010_CR_TIE 0x0020 -#define UART010_CR_RIE 0x0010 -#define UART010_CR_MSIE 0x0008 -#define ST_UART011_CR_OVSFACT 0x0008 /* Oversampling factor */ -#define UART01x_CR_IIRLP 0x0004 /* SIR low power mode */ -#define UART01x_CR_SIREN 0x0002 /* SIR enable */ -#define UART01x_CR_UARTEN 0x0001 /* UART enable */ - -#define UART011_LCRH_SPS 0x80 +#define ZX_UART01x_FR_BUSY BIT(8) +#define ZX_UART01x_FR_DSR BIT(3) +#define ZX_UART01x_FR_CTS BIT(1) +#define ZX_UART011_FR_RI BIT(0) + +#define UART011_CR_CTSEN BIT(15) /* CTS hardware flow control */ +#define UART011_CR_RTSEN BIT(14) /* RTS hardware flow control */ +#define UART011_CR_OUT2 BIT(13) /* OUT2 */ +#define UART011_CR_OUT1 BIT(12) /* OUT1 */ +#define UART011_CR_RTS BIT(11) /* RTS */ +#define UART011_CR_DTR BIT(10) /* DTR */ +#define UART011_CR_RXE BIT(9) /* receive enable */ +#define UART011_CR_TXE BIT(8) /* transmit enable */ +#define UART011_CR_LBE BIT(7) /* loopback enable */ +#define UART010_CR_RTIE BIT(6) +#define UART010_CR_TIE BIT(5) +#define UART010_CR_RIE BIT(4) +#define UART010_CR_MSIE BIT(3) +#define ST_UART011_CR_OVSFACT BIT(3) /* Oversampling factor */ +#define UART01x_CR_IIRLP BIT(2) /* SIR low power mode */ +#define UART01x_CR_SIREN BIT(1) /* SIR enable */ +#define UART01x_CR_UARTEN BIT(0) /* UART enable */ + +#define UART011_LCRH_SPS BIT(7) #define UART01x_LCRH_WLEN_8 0x60 #define UART01x_LCRH_WLEN_7 0x40 #define UART01x_LCRH_WLEN_6 0x20 #define UART01x_LCRH_WLEN_5 0x00 -#define UART01x_LCRH_FEN 0x10 -#define UART01x_LCRH_STP2 0x08 -#define UART01x_LCRH_EPS 0x04 -#define UART01x_LCRH_PEN 0x02 -#define UART01x_LCRH_BRK 0x01 - -#define ST_UART011_DMAWM_RX_1 (0 << 3) -#define ST_UART011_DMAWM_RX_2 (1 << 3) -#define ST_UART011_DMAWM_RX_4 (2 << 3) -#define ST_UART011_DMAWM_RX_8 (3 << 3) -#define ST_UART011_DMAWM_RX_16 (4 << 3) -#define ST_UART011_DMAWM_RX_32 (5 << 3) -#define ST_UART011_DMAWM_RX_48 (6 << 3) -#define ST_UART011_DMAWM_TX_1 0 -#define ST_UART011_DMAWM_TX_2 1 -#define ST_UART011_DMAWM_TX_4 2 -#define ST_UART011_DMAWM_TX_8 3 -#define ST_UART011_DMAWM_TX_16 4 -#define ST_UART011_DMAWM_TX_32 5 -#define ST_UART011_DMAWM_TX_48 6 - -#define UART010_IIR_RTIS 0x08 -#define UART010_IIR_TIS 0x04 -#define UART010_IIR_RIS 0x02 -#define UART010_IIR_MIS 0x01 - -#define UART011_IFLS_RX1_8 (0 << 3) -#define UART011_IFLS_RX2_8 (1 << 3) -#define UART011_IFLS_RX4_8 (2 << 3) -#define UART011_IFLS_RX6_8 (3 << 3) -#define UART011_IFLS_RX7_8 (4 << 3) -#define UART011_IFLS_TX1_8 (0 << 0) -#define UART011_IFLS_TX2_8 (1 << 0) -#define UART011_IFLS_TX4_8 (2 << 0) -#define UART011_IFLS_TX6_8 (3 << 0) -#define UART011_IFLS_TX7_8 (4 << 0) +#define UART01x_LCRH_FEN BIT(4) +#define UART01x_LCRH_STP2 BIT(3) +#define UART01x_LCRH_EPS BIT(2) +#define UART01x_LCRH_PEN BIT(1) +#define UART01x_LCRH_BRK BIT(0) + +#define ST_UART011_DMAWM_RX GENMASK(5, 3) +#define ST_UART011_DMAWM_RX_1 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 0) +#define ST_UART011_DMAWM_RX_2 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 1) +#define ST_UART011_DMAWM_RX_4 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 2) +#define ST_UART011_DMAWM_RX_8 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 3) +#define ST_UART011_DMAWM_RX_16 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 4) +#define ST_UART011_DMAWM_RX_32 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 5) +#define ST_UART011_DMAWM_RX_48 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 6) +#define ST_UART011_DMAWM_TX GENMASK(2, 0) +#define ST_UART011_DMAWM_TX_1 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 0) +#define ST_UART011_DMAWM_TX_2 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 1) +#define ST_UART011_DMAWM_TX_4 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 2) +#define ST_UART011_DMAWM_TX_8 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 3) +#define ST_UART011_DMAWM_TX_16 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 4) +#define ST_UART011_DMAWM_TX_32 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 5) +#define ST_UART011_DMAWM_TX_48 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 6) + +#define UART010_IIR_RTIS BIT(3) +#define UART010_IIR_TIS BIT(2) +#define UART010_IIR_RIS BIT(1) +#define UART010_IIR_MIS BIT(0) + +#define UART011_IFLS_RXIFLSEL GENMASK(5, 3) +#define UART011_IFLS_RX1_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 0) +#define UART011_IFLS_RX2_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 1) +#define UART011_IFLS_RX4_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 2) +#define UART011_IFLS_RX6_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 3) +#define UART011_IFLS_RX7_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 4) +#define UART011_IFLS_TXIFLSEL GENMASK(2, 0) +#define UART011_IFLS_TX1_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 0) +#define UART011_IFLS_TX2_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 1) +#define UART011_IFLS_TX4_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 2) +#define UART011_IFLS_TX6_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 3) +#define UART011_IFLS_TX7_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 4) /* special values for ST vendor with deeper fifo */ -#define UART011_IFLS_RX_HALF (5 << 3) -#define UART011_IFLS_TX_HALF (5 << 0) - -#define UART011_OEIM (1 << 10) /* overrun error interrupt mask */ -#define UART011_BEIM (1 << 9) /* break error interrupt mask */ -#define UART011_PEIM (1 << 8) /* parity error interrupt mask */ -#define UART011_FEIM (1 << 7) /* framing error interrupt mask */ -#define UART011_RTIM (1 << 6) /* receive timeout interrupt mask */ -#define UART011_TXIM (1 << 5) /* transmit interrupt mask */ -#define UART011_RXIM (1 << 4) /* receive interrupt mask */ -#define UART011_DSRMIM (1 << 3) /* DSR interrupt mask */ -#define UART011_DCDMIM (1 << 2) /* DCD interrupt mask */ -#define UART011_CTSMIM (1 << 1) /* CTS interrupt mask */ -#define UART011_RIMIM (1 << 0) /* RI interrupt mask */ - -#define UART011_OEIS (1 << 10) /* overrun error interrupt status */ -#define UART011_BEIS (1 << 9) /* break error interrupt status */ -#define UART011_PEIS (1 << 8) /* parity error interrupt status */ -#define UART011_FEIS (1 << 7) /* framing error interrupt status */ -#define UART011_RTIS (1 << 6) /* receive timeout interrupt status */ -#define UART011_TXIS (1 << 5) /* transmit interrupt status */ -#define UART011_RXIS (1 << 4) /* receive interrupt status */ -#define UART011_DSRMIS (1 << 3) /* DSR interrupt status */ -#define UART011_DCDMIS (1 << 2) /* DCD interrupt status */ -#define UART011_CTSMIS (1 << 1) /* CTS interrupt status */ -#define UART011_RIMIS (1 << 0) /* RI interrupt status */ - -#define UART011_OEIC (1 << 10) /* overrun error interrupt clear */ -#define UART011_BEIC (1 << 9) /* break error interrupt clear */ -#define UART011_PEIC (1 << 8) /* parity error interrupt clear */ -#define UART011_FEIC (1 << 7) /* framing error interrupt clear */ -#define UART011_RTIC (1 << 6) /* receive timeout interrupt clear */ -#define UART011_TXIC (1 << 5) /* transmit interrupt clear */ -#define UART011_RXIC (1 << 4) /* receive interrupt clear */ -#define UART011_DSRMIC (1 << 3) /* DSR interrupt clear */ -#define UART011_DCDMIC (1 << 2) /* DCD interrupt clear */ -#define UART011_CTSMIC (1 << 1) /* CTS interrupt clear */ -#define UART011_RIMIC (1 << 0) /* RI interrupt clear */ - -#define UART011_DMAONERR (1 << 2) /* disable dma on error */ -#define UART011_TXDMAE (1 << 1) /* enable transmit dma */ -#define UART011_RXDMAE (1 << 0) /* enable receive dma */ - -#define UART01x_RSR_ANY (UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE) -#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) +#define UART011_IFLS_RX_HALF FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 5) +#define UART011_IFLS_TX_HALF FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 5) + +#define UART011_OEIM BIT(10) /* overrun error interrupt mask */ +#define UART011_BEIM BIT(9) /* break error interrupt mask */ +#define UART011_PEIM BIT(8) /* parity error interrupt mask */ +#define UART011_FEIM BIT(7) /* framing error interrupt mask */ +#define UART011_RTIM BIT(6) /* receive timeout interrupt mask */ +#define UART011_TXIM BIT(5) /* transmit interrupt mask */ +#define UART011_RXIM BIT(4) /* receive interrupt mask */ +#define UART011_DSRMIM BIT(3) /* DSR interrupt mask */ +#define UART011_DCDMIM BIT(2) /* DCD interrupt mask */ +#define UART011_CTSMIM BIT(1) /* CTS interrupt mask */ +#define UART011_RIMIM BIT(0) /* RI interrupt mask */ + +#define UART011_OEIS BIT(10) /* overrun error interrupt status */ +#define UART011_BEIS BIT(9) /* break error interrupt status */ +#define UART011_PEIS BIT(8) /* parity error interrupt status */ +#define UART011_FEIS BIT(7) /* framing error interrupt status */ +#define UART011_RTIS BIT(6) /* receive timeout interrupt status */ +#define UART011_TXIS BIT(5) /* transmit interrupt status */ +#define UART011_RXIS BIT(4) /* receive interrupt status */ +#define UART011_DSRMIS BIT(3) /* DSR interrupt status */ +#define UART011_DCDMIS BIT(2) /* DCD interrupt status */ +#define UART011_CTSMIS BIT(1) /* CTS interrupt status */ +#define UART011_RIMIS BIT(0) /* RI interrupt status */ + +#define UART011_OEIC BIT(10) /* overrun error interrupt clear */ +#define UART011_BEIC BIT(9) /* break error interrupt clear */ +#define UART011_PEIC BIT(8) /* parity error interrupt clear */ +#define UART011_FEIC BIT(7) /* framing error interrupt clear */ +#define UART011_RTIC BIT(6) /* receive timeout interrupt clear */ +#define UART011_TXIC BIT(5) /* transmit interrupt clear */ +#define UART011_RXIC BIT(4) /* receive interrupt clear */ +#define UART011_DSRMIC BIT(3) /* DSR interrupt clear */ +#define UART011_DCDMIC BIT(2) /* DCD interrupt clear */ +#define UART011_CTSMIC BIT(1) /* CTS interrupt clear */ +#define UART011_RIMIC BIT(0) /* RI interrupt clear */ + +#define UART011_DMAONERR BIT(2) /* disable dma on error */ +#define UART011_TXDMAE BIT(1) /* enable transmit dma */ +#define UART011_RXDMAE BIT(0) /* enable receive dma */ + +#define UART01x_RSR_ANY (UART01x_RSR_OE | UART01x_RSR_BE | UART01x_RSR_PE | UART01x_RSR_FE) +#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD | UART01x_FR_DSR | UART01x_FR_CTS) #ifndef __ASSEMBLY__ struct amba_device; /* in uncompress this is included but amba/bus.h is not */ @@ -220,8 +229,8 @@ struct amba_pl011_data { bool dma_rx_poll_enable; unsigned int dma_rx_poll_rate; unsigned int dma_rx_poll_timeout; - void (*init) (void); - void (*exit) (void); + void (*init)(void); + void (*exit)(void); }; #endif diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 953e6f12fa1c..2b90c48a6a87 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -32,128 +32,7 @@ struct task_struct; struct pci_dev; extern int amd_iommu_detect(void); -extern int amd_iommu_init_hardware(void); - -/** - * amd_iommu_init_device() - Init device for use with IOMMUv2 driver - * @pdev: The PCI device to initialize - * @pasids: Number of PASIDs to support for this device - * - * This function does all setup for the device pdev so that it can be - * used with IOMMUv2. - * Returns 0 on success or negative value on error. - */ -extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); - -/** - * amd_iommu_free_device() - Free all IOMMUv2 related device resources - * and disable IOMMUv2 usage for this device - * @pdev: The PCI device to disable IOMMUv2 usage for' - */ -extern void amd_iommu_free_device(struct pci_dev *pdev); - -/** - * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device - * @pdev: The PCI device to bind the task to - * @pasid: The PASID on the device the task should be bound to - * @task: the task to bind - * - * The function returns 0 on success or a negative value on error. - */ -extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task); - -/** - * amd_iommu_unbind_pasid() - Unbind a PASID from its task on - * a device - * @pdev: The device of the PASID - * @pasid: The PASID to unbind - * - * When this function returns the device is no longer using the PASID - * and the PASID is no longer bound to its task. - */ -extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid); - -/** - * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed - * PRI requests - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - * - * The IOMMUv2 driver invokes this call-back when it is unable to - * successfully handle a PRI request. The device driver can then decide - * which PRI response the device should see. Possible return values for - * the call-back are: - * - * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device - * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device - * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, - * the device is required to disable - * PRI when it receives this response - * - * The function returns 0 on success or negative value on error. - */ -#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 -#define AMD_IOMMU_INV_PRI_RSP_INVALID 1 -#define AMD_IOMMU_INV_PRI_RSP_FAIL 2 - -typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, - u32 pasid, - unsigned long address, - u16); - -extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb); - -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) - -/** - * amd_iommu_device_info() - Get information about IOMMUv2 support of a - * PCI device - * @pdev: PCI device to query information from - * @info: A pointer to an amd_iommu_device_info structure which will contain - * the information about the PCI device - * - * Returns 0 on success, negative value on error - */ - -#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ -#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution - on memory pages */ -#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request - super-user privileges */ - -struct amd_iommu_device_info { - int max_pasids; - u32 flags; -}; - -extern int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info); - -/** - * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating - * a pasid context. This call-back is - * invoked when the IOMMUv2 driver needs to - * invalidate a PASID context, for example - * because the task that is bound to that - * context is about to exit. - * - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - */ - -typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid); -extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb); #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } @@ -206,8 +85,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); struct amd_iommu *get_amd_iommu(unsigned int idx); -#ifdef CONFIG_AMD_MEM_ENCRYPT -int amd_iommu_snp_enable(void); +#ifdef CONFIG_KVM_AMD_SEV +int amd_iommu_snp_disable(void); +#else +static inline int amd_iommu_snp_disable(void) { return 0; } #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/include/linux/amd-pmf-io.h b/include/linux/amd-pmf-io.h new file mode 100644 index 000000000000..b4f818205216 --- /dev/null +++ b/include/linux/amd-pmf-io.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Platform Management Framework Interface + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Authors: Shyam Sundar S K <Shyam-sundar.S-k@amd.com> + * Basavaraj Natikar <Basavaraj.Natikar@amd.com> + */ + +#ifndef AMD_PMF_IO_H +#define AMD_PMF_IO_H + +#include <linux/types.h> + +/** + * enum sfh_message_type - Query the SFH message type + * @MT_HPD: Message ID to know the Human presence info from MP2 FW + * @MT_ALS: Message ID to know the Ambient light info from MP2 FW + */ +enum sfh_message_type { + MT_HPD, + MT_ALS, +}; + +/** + * enum sfh_hpd_info - Query the Human presence information + * @SFH_NOT_DETECTED: Check the HPD connection information from MP2 FW + * @SFH_USER_PRESENT: Check if the user is present from HPD sensor + * @SFH_USER_AWAY: Check if the user is away from HPD sensor + */ +enum sfh_hpd_info { + SFH_NOT_DETECTED, + SFH_USER_PRESENT, + SFH_USER_AWAY, +}; + +/** + * struct amd_sfh_info - get HPD sensor info from MP2 FW + * @ambient_light: Populates the ambient light information + * @user_present: Populates the user presence information + */ +struct amd_sfh_info { + u32 ambient_light; + u8 user_present; +}; + +int amd_get_sfh_info(struct amd_sfh_info *sfh_info, enum sfh_message_type op); +#endif diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 446394f84606..d21838835abd 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -39,11 +39,16 @@ struct amd_aperf_mperf { * @cppc_req_cached: cached performance request hints * @highest_perf: the maximum performance an individual processor may reach, * assuming ideal conditions + * For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the fixed value as the highest_perf. * @nominal_perf: the maximum sustained performance level of the processor, * assuming ideal operating conditions * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power * savings are achieved * @lowest_perf: the absolute lowest performance level of the processor + * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher + * priority. * @max_freq: the frequency that mapped to highest_perf * @min_freq: the frequency that mapped to lowest_perf * @nominal_freq: the frequency that mapped to nominal_perf @@ -52,6 +57,9 @@ struct amd_aperf_mperf { * @prev: Last Aperf/Mperf/tsc count value read from register * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value @@ -70,6 +78,11 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 prefcore_ranking; + u32 min_limit_perf; + u32 max_limit_perf; + u32 min_limit_freq; + u32 max_limit_freq; u32 max_freq; u32 min_freq; @@ -81,6 +94,7 @@ struct amd_cpudata { u64 freq; bool boost_supported; + bool hw_prefcore; /* EPP feature related attributes*/ s16 epp_policy; diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 5deaddbd7927..93a5f16d03f3 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -15,13 +15,13 @@ struct inode; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); -struct file *anon_inode_getfile_secure(const char *name, +struct file *anon_inode_create_getfile(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode); int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags); -int anon_inode_getfd_secure(const char *name, +int anon_inode_create_getfd(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode); diff --git a/include/linux/apple-mailbox.h b/include/linux/apple-mailbox.h deleted file mode 100644 index 720fbb70294a..000000000000 --- a/include/linux/apple-mailbox.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ -/* - * Apple mailbox message format - * - * Copyright (C) 2021 The Asahi Linux Contributors - */ - -#ifndef _LINUX_APPLE_MAILBOX_H_ -#define _LINUX_APPLE_MAILBOX_H_ - -#include <linux/types.h> - -/* encodes a single 96bit message sent over the single channel */ -struct apple_mbox_msg { - u64 msg0; - u32 msg1; -}; - -#endif diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index a07b510e7dc5..a63d61ca55af 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -27,6 +27,13 @@ static inline unsigned long topology_get_cpu_scale(int cpu) void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); +DECLARE_PER_CPU(unsigned long, capacity_freq_ref); + +static inline unsigned long topology_get_freq_ref(int cpu) +{ + return per_cpu(capacity_freq_ref, cpu); +} + DECLARE_PER_CPU(unsigned long, arch_freq_scale); static inline unsigned long topology_get_freq_scale(int cpu) @@ -92,6 +99,7 @@ void update_siblings_masks(unsigned int cpu); void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); +void freq_inv_set_max_ratio(int cpu, u64 max_rate); #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ diff --git a/include/linux/args.h b/include/linux/args.h new file mode 100644 index 000000000000..8ff60a54eb7d --- /dev/null +++ b/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 12. Any more, it will return 13th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index f196c19f8e55..083f85653716 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -5,6 +5,7 @@ #ifndef __LINUX_ARM_SMCCC_H #define __LINUX_ARM_SMCCC_H +#include <linux/args.h> #include <linux/init.h> #include <uapi/linux/const.h> @@ -66,6 +67,8 @@ #define ARM_SMCCC_VERSION_1_3 0x10003 #define ARM_SMCCC_1_3_SVE_HINT 0x10000 +#define ARM_SMCCC_CALL_HINTS ARM_SMCCC_1_3_SVE_HINT + #define ARM_SMCCC_VERSION_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ @@ -413,31 +416,26 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #endif -#define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x - -#define __count_args(...) \ - ___count_args(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0) - -#define __constraint_read_0 "r" (arg0) -#define __constraint_read_1 __constraint_read_0, "r" (arg1) -#define __constraint_read_2 __constraint_read_1, "r" (arg2) -#define __constraint_read_3 __constraint_read_2, "r" (arg3) -#define __constraint_read_4 __constraint_read_3, "r" (arg4) -#define __constraint_read_5 __constraint_read_4, "r" (arg5) -#define __constraint_read_6 __constraint_read_5, "r" (arg6) -#define __constraint_read_7 __constraint_read_6, "r" (arg7) +#define __constraint_read_2 "r" (arg0) +#define __constraint_read_3 __constraint_read_2, "r" (arg1) +#define __constraint_read_4 __constraint_read_3, "r" (arg2) +#define __constraint_read_5 __constraint_read_4, "r" (arg3) +#define __constraint_read_6 __constraint_read_5, "r" (arg4) +#define __constraint_read_7 __constraint_read_6, "r" (arg5) +#define __constraint_read_8 __constraint_read_7, "r" (arg6) +#define __constraint_read_9 __constraint_read_8, "r" (arg7) -#define __declare_arg_0(a0, res) \ +#define __declare_arg_2(a0, res) \ struct arm_smccc_res *___res = res; \ register unsigned long arg0 asm("r0") = (u32)a0 -#define __declare_arg_1(a0, a1, res) \ +#define __declare_arg_3(a0, a1, res) \ typeof(a1) __a1 = a1; \ struct arm_smccc_res *___res = res; \ register unsigned long arg0 asm("r0") = (u32)a0; \ register typeof(a1) arg1 asm("r1") = __a1 -#define __declare_arg_2(a0, a1, a2, res) \ +#define __declare_arg_4(a0, a1, a2, res) \ typeof(a1) __a1 = a1; \ typeof(a2) __a2 = a2; \ struct arm_smccc_res *___res = res; \ @@ -445,7 +443,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register typeof(a1) arg1 asm("r1") = __a1; \ register typeof(a2) arg2 asm("r2") = __a2 -#define __declare_arg_3(a0, a1, a2, a3, res) \ +#define __declare_arg_5(a0, a1, a2, a3, res) \ typeof(a1) __a1 = a1; \ typeof(a2) __a2 = a2; \ typeof(a3) __a3 = a3; \ @@ -455,34 +453,26 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register typeof(a2) arg2 asm("r2") = __a2; \ register typeof(a3) arg3 asm("r3") = __a3 -#define __declare_arg_4(a0, a1, a2, a3, a4, res) \ +#define __declare_arg_6(a0, a1, a2, a3, a4, res) \ typeof(a4) __a4 = a4; \ - __declare_arg_3(a0, a1, a2, a3, res); \ + __declare_arg_5(a0, a1, a2, a3, res); \ register typeof(a4) arg4 asm("r4") = __a4 -#define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ +#define __declare_arg_7(a0, a1, a2, a3, a4, a5, res) \ typeof(a5) __a5 = a5; \ - __declare_arg_4(a0, a1, a2, a3, a4, res); \ + __declare_arg_6(a0, a1, a2, a3, a4, res); \ register typeof(a5) arg5 asm("r5") = __a5 -#define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ +#define __declare_arg_8(a0, a1, a2, a3, a4, a5, a6, res) \ typeof(a6) __a6 = a6; \ - __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ + __declare_arg_7(a0, a1, a2, a3, a4, a5, res); \ register typeof(a6) arg6 asm("r6") = __a6 -#define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ +#define __declare_arg_9(a0, a1, a2, a3, a4, a5, a6, a7, res) \ typeof(a7) __a7 = a7; \ - __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ + __declare_arg_8(a0, a1, a2, a3, a4, a5, a6, res); \ register typeof(a7) arg7 asm("r7") = __a7 -#define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) -#define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__) - -#define ___constraints(count) \ - : __constraint_read_ ## count \ - : smccc_sve_clobbers "memory" -#define __constraints(count) ___constraints(count) - /* * We have an output list that is not necessarily used, and GCC feels * entitled to optimise the whole sequence away. "volatile" is what @@ -494,11 +484,14 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register unsigned long r1 asm("r1"); \ register unsigned long r2 asm("r2"); \ register unsigned long r3 asm("r3"); \ - __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ + CONCATENATE(__declare_arg_, \ + COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \ asm volatile(SMCCC_SVE_CHECK \ inst "\n" : \ "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \ - __constraints(__count_args(__VA_ARGS__))); \ + : CONCATENATE(__constraint_read_, \ + COUNT_ARGS(__VA_ARGS__)) \ + : smccc_sve_clobbers "memory"); \ if (___res) \ *___res = (typeof(*___res)){r0, r1, r2, r3}; \ } while (0) @@ -542,8 +535,12 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, */ #define __fail_smccc_1_1(...) \ do { \ - __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ - asm ("" : __constraints(__count_args(__VA_ARGS__))); \ + CONCATENATE(__declare_arg_, \ + COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \ + asm ("" : \ + : CONCATENATE(__constraint_read_, \ + COUNT_ARGS(__VA_ARGS__)) \ + : smccc_sve_clobbers "memory"); \ if (___res) \ ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ } while (0) diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cc060da51bec..c906f666ff5d 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -6,6 +6,7 @@ #ifndef _LINUX_ARM_FFA_H #define _LINUX_ARM_FFA_H +#include <linux/bitfield.h> #include <linux/device.h> #include <linux/module.h> #include <linux/types.h> @@ -20,6 +21,7 @@ #define FFA_ERROR FFA_SMC_32(0x60) #define FFA_SUCCESS FFA_SMC_32(0x61) +#define FFA_FN64_SUCCESS FFA_SMC_64(0x61) #define FFA_INTERRUPT FFA_SMC_32(0x62) #define FFA_VERSION FFA_SMC_32(0x63) #define FFA_FEATURES FFA_SMC_32(0x64) @@ -54,6 +56,23 @@ #define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A) #define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B) #define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C) +#define FFA_NOTIFICATION_BITMAP_CREATE FFA_SMC_32(0x7D) +#define FFA_NOTIFICATION_BITMAP_DESTROY FFA_SMC_32(0x7E) +#define FFA_NOTIFICATION_BIND FFA_SMC_32(0x7F) +#define FFA_NOTIFICATION_UNBIND FFA_SMC_32(0x80) +#define FFA_NOTIFICATION_SET FFA_SMC_32(0x81) +#define FFA_NOTIFICATION_GET FFA_SMC_32(0x82) +#define FFA_NOTIFICATION_INFO_GET FFA_SMC_32(0x83) +#define FFA_FN64_NOTIFICATION_INFO_GET FFA_SMC_64(0x83) +#define FFA_RX_ACQUIRE FFA_SMC_32(0x84) +#define FFA_SPM_ID_GET FFA_SMC_32(0x85) +#define FFA_MSG_SEND2 FFA_SMC_32(0x86) +#define FFA_SECONDARY_EP_REGISTER FFA_SMC_32(0x87) +#define FFA_FN64_SECONDARY_EP_REGISTER FFA_SMC_64(0x87) +#define FFA_MEM_PERM_GET FFA_SMC_32(0x88) +#define FFA_FN64_MEM_PERM_GET FFA_SMC_64(0x88) +#define FFA_MEM_PERM_SET FFA_SMC_32(0x89) +#define FFA_FN64_MEM_PERM_SET FFA_SMC_64(0x89) /* * For some calls it is necessary to use SMC64 to pass or return 64-bit values. @@ -76,6 +95,7 @@ #define FFA_RET_DENIED (-6) #define FFA_RET_RETRY (-7) #define FFA_RET_ABORTED (-8) +#define FFA_RET_NO_DATA (-9) /* FFA version encoding */ #define FFA_MAJOR_VERSION_MASK GENMASK(30, 16) @@ -86,6 +106,7 @@ (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \ FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) #define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) +#define FFA_VERSION_1_1 FFA_PACK_VERSION_INFO(1, 1) /** * FF-A specification mentions explicitly about '4K pages'. This should @@ -188,6 +209,8 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } #define module_ffa_driver(__ffa_driver) \ module_driver(__ffa_driver, ffa_register, ffa_unregister) +extern const struct bus_type ffa_bus_type; + /* FFA transport related */ struct ffa_partition_info { u16 id; @@ -278,8 +301,8 @@ struct ffa_mem_region { #define FFA_MEM_NON_SHAREABLE (0) #define FFA_MEM_OUTER_SHAREABLE (2) #define FFA_MEM_INNER_SHAREABLE (3) - u8 attributes; - u8 reserved_0; + /* Memory region attributes, upper byte MBZ pre v1.1 */ + u16 attributes; /* * Clear memory region contents after unmapping it from the sender and * before mapping it for any receiver. @@ -317,27 +340,41 @@ struct ffa_mem_region { * memory region. */ u64 tag; - u32 reserved_1; + /* Size of each endpoint memory access descriptor, MBZ pre v1.1 */ + u32 ep_mem_size; /* * The number of `ffa_mem_region_attributes` entries included in this * transaction. */ u32 ep_count; /* - * An array of endpoint memory access descriptors. - * Each one specifies a memory region offset, an endpoint and the - * attributes with which this memory region should be mapped in that - * endpoint's page table. + * 16-byte aligned offset from the base address of this descriptor + * to the first element of the endpoint memory access descriptor array + * Valid only from v1.1 */ - struct ffa_mem_region_attributes ep_mem_access[]; + u32 ep_mem_offset; + /* MBZ, valid only from v1.1 */ + u32 reserved[3]; }; -#define COMPOSITE_OFFSET(x) \ - (offsetof(struct ffa_mem_region, ep_mem_access[x])) #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) -#define COMPOSITE_CONSTITUENTS_OFFSET(x, y) \ - (COMPOSITE_OFFSET(x) + CONSTITUENTS_OFFSET(y)) + +static inline u32 +ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) +{ + u32 offset = count * sizeof(struct ffa_mem_region_attributes); + /* + * Earlier to v1.1, the endpoint memory descriptor array started at + * offset 32(i.e. offset of ep_mem_offset in the current structure) + */ + if (ffa_version <= FFA_VERSION_1_0) + offset += offsetof(struct ffa_mem_region, ep_mem_offset); + else + offset += sizeof(struct ffa_mem_region); + + return offset; +} struct ffa_mem_ops_args { bool use_txbuf; @@ -367,10 +404,30 @@ struct ffa_mem_ops { int (*memory_lend)(struct ffa_mem_ops_args *args); }; +struct ffa_cpu_ops { + int (*run)(struct ffa_device *dev, u16 vcpu); +}; + +typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); +typedef void (*ffa_notifier_cb)(int notify_id, void *cb_data); + +struct ffa_notifier_ops { + int (*sched_recv_cb_register)(struct ffa_device *dev, + ffa_sched_recv_cb cb, void *cb_data); + int (*sched_recv_cb_unregister)(struct ffa_device *dev); + int (*notify_request)(struct ffa_device *dev, bool per_vcpu, + ffa_notifier_cb cb, void *cb_data, int notify_id); + int (*notify_relinquish)(struct ffa_device *dev, int notify_id); + int (*notify_send)(struct ffa_device *dev, int notify_id, bool per_vcpu, + u16 vcpu); +}; + struct ffa_ops { const struct ffa_info_ops *info_ops; const struct ffa_msg_ops *msg_ops; const struct ffa_mem_ops *mem_ops; + const struct ffa_cpu_ops *cpu_ops; + const struct ffa_notifier_ops *notifier_ops; }; #endif /* _LINUX_ARM_FFA_H */ diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 14dc461b0e82..255701e1251b 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -47,10 +47,12 @@ int sdei_unregister_ghes(struct ghes *ghes); int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); void __init sdei_init(void); +void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } static inline void sdei_init(void) { } +static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/include/linux/array_size.h b/include/linux/array_size.h new file mode 100644 index 000000000000..06d7d83196ca --- /dev/null +++ b/include/linux/array_size.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ARRAY_SIZE_H +#define _LINUX_ARRAY_SIZE_H + +#include <linux/compiler.h> + +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +#endif /* _LINUX_ARRAY_SIZE_H */ diff --git a/include/linux/async.h b/include/linux/async.h index cce4ad31e8fc..19b778d08600 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, struct device *dev) return async_schedule_node(func, dev, dev_to_node(dev)); } +bool async_schedule_dev_nocall(async_func_t func, struct device *dev); + /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously @@ -118,4 +120,5 @@ extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); extern bool current_is_async(void); +extern void async_init(void); #endif diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h deleted file mode 100644 index 1491af38cc6e..000000000000 --- a/include/linux/atmel-mci.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_ATMEL_MCI_H -#define __LINUX_ATMEL_MCI_H - -#include <linux/types.h> -#include <linux/dmaengine.h> - -#define ATMCI_MAX_NR_SLOTS 2 - -/** - * struct mci_slot_pdata - board-specific per-slot configuration - * @bus_width: Number of data lines wired up the slot - * @detect_pin: GPIO pin wired to the card detect switch - * @wp_pin: GPIO pin wired to the write protect sensor - * @detect_is_active_high: The state of the detect pin when it is active - * @non_removable: The slot is not removable, only detect once - * - * If a given slot is not present on the board, @bus_width should be - * set to 0. The other fields are ignored in this case. - * - * Any pins that aren't available should be set to a negative value. - * - * Note that support for multiple slots is experimental -- some cards - * might get upset if we don't get the clock management exactly right. - * But in most cases, it should work just fine. - */ -struct mci_slot_pdata { - unsigned int bus_width; - int detect_pin; - int wp_pin; - bool detect_is_active_high; - bool non_removable; -}; - -/** - * struct mci_platform_data - board-specific MMC/SDcard configuration - * @dma_slave: DMA slave interface to use in data transfers. - * @slot: Per-slot configuration data. - */ -struct mci_platform_data { - void *dma_slave; - dma_filter_fn dma_filter; - struct mci_slot_pdata slot[ATMCI_MAX_NR_SLOTS]; -}; - -#endif /* __LINUX_ATMEL_MCI_H */ diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 18f5744dfb5d..956bcba5dbf2 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void); #define raw_sync_cmpxchg arch_sync_cmpxchg +#ifdef arch_sync_try_cmpxchg +#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg +#else +#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif + /** * raw_atomic_read() - atomic load with relaxed ordering * @v: pointer to atomic_t @@ -459,8 +472,6 @@ raw_atomic_read_acquire(const atomic_t *v) { #if defined(arch_atomic_read_acquire) return arch_atomic_read_acquire(v); -#elif defined(arch_atomic_read) - return arch_atomic_read(v); #else int ret; @@ -508,8 +519,6 @@ raw_atomic_set_release(atomic_t *v, int i) { #if defined(arch_atomic_set_release) arch_atomic_set_release(v, i); -#elif defined(arch_atomic_set) - arch_atomic_set(v, i); #else if (__native_word(atomic_t)) { smp_store_release(&(v)->counter, i); @@ -1996,6 +2005,7 @@ raw_atomic_xchg_relaxed(atomic_t *v, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere. * @@ -2024,6 +2034,7 @@ raw_atomic_cmpxchg(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere. * @@ -2052,6 +2063,7 @@ raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere. * @@ -2079,6 +2091,7 @@ raw_atomic_cmpxchg_release(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere. * @@ -2103,7 +2116,8 @@ raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere. * @@ -2136,7 +2150,8 @@ raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere. * @@ -2169,7 +2184,8 @@ raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere. * @@ -2201,7 +2217,8 @@ raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere. * @@ -2394,6 +2411,7 @@ raw_atomic_add_negative_relaxed(int i, atomic_t *v) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere. * @@ -2423,6 +2441,7 @@ raw_atomic_fetch_add_unless(atomic_t *v, int a, int u) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_add_unless() elsewhere. * @@ -2443,6 +2462,7 @@ raw_atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere. * @@ -2463,6 +2483,7 @@ raw_atomic_inc_not_zero(atomic_t *v) * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere. * @@ -2490,6 +2511,7 @@ raw_atomic_inc_unless_negative(atomic_t *v) * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere. * @@ -2517,6 +2539,7 @@ raw_atomic_dec_unless_positive(atomic_t *v) * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere. * @@ -2575,8 +2598,6 @@ raw_atomic64_read_acquire(const atomic64_t *v) { #if defined(arch_atomic64_read_acquire) return arch_atomic64_read_acquire(v); -#elif defined(arch_atomic64_read) - return arch_atomic64_read(v); #else s64 ret; @@ -2624,8 +2645,6 @@ raw_atomic64_set_release(atomic64_t *v, s64 i) { #if defined(arch_atomic64_set_release) arch_atomic64_set_release(v, i); -#elif defined(arch_atomic64_set) - arch_atomic64_set(v, i); #else if (__native_word(atomic64_t)) { smp_store_release(&(v)->counter, i); @@ -4112,6 +4131,7 @@ raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere. * @@ -4140,6 +4160,7 @@ raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere. * @@ -4168,6 +4189,7 @@ raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere. * @@ -4195,6 +4217,7 @@ raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere. * @@ -4219,7 +4242,8 @@ raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere. * @@ -4252,7 +4276,8 @@ raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere. * @@ -4285,7 +4310,8 @@ raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere. * @@ -4317,7 +4343,8 @@ raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere. * @@ -4510,6 +4537,7 @@ raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere. * @@ -4539,6 +4567,7 @@ raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere. * @@ -4559,6 +4588,7 @@ raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere. * @@ -4579,6 +4609,7 @@ raw_atomic64_inc_not_zero(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere. * @@ -4606,6 +4637,7 @@ raw_atomic64_inc_unless_negative(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere. * @@ -4633,6 +4665,7 @@ raw_atomic64_dec_unless_positive(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere. * @@ -4657,4 +4690,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 202b45c7db600ce36198eb1f1fc2c2d5268ace2d +// 14850c0b0db20c62fdc78ccd1d42b98b88d76331 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index d401b406ef7c..debd487fe971 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -1182,6 +1182,7 @@ atomic_xchg_relaxed(atomic_t *v, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg() there. * @@ -1202,6 +1203,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_acquire() there. * @@ -1221,6 +1223,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_release() there. * @@ -1241,6 +1244,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_relaxed() there. * @@ -1260,7 +1264,8 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there. * @@ -1282,7 +1287,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there. * @@ -1303,7 +1309,8 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there. * @@ -1325,7 +1332,8 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there. * @@ -1475,6 +1483,7 @@ atomic_add_negative_relaxed(int i, atomic_t *v) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_fetch_add_unless() there. * @@ -1495,6 +1504,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_add_unless() there. * @@ -1513,6 +1523,7 @@ atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_inc_not_zero() there. * @@ -1531,6 +1542,7 @@ atomic_inc_not_zero(atomic_t *v) * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_inc_unless_negative() there. * @@ -1549,6 +1561,7 @@ atomic_inc_unless_negative(atomic_t *v) * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_dec_unless_positive() there. * @@ -1567,6 +1580,7 @@ atomic_dec_unless_positive(atomic_t *v) * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there. * @@ -2746,6 +2760,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg() there. * @@ -2766,6 +2781,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_acquire() there. * @@ -2785,6 +2801,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_release() there. * @@ -2805,6 +2822,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_relaxed() there. * @@ -2824,7 +2842,8 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there. * @@ -2846,7 +2865,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there. * @@ -2867,7 +2887,8 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there. * @@ -2889,7 +2910,8 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there. * @@ -3039,6 +3061,7 @@ atomic64_add_negative_relaxed(s64 i, atomic64_t *v) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_unless() there. * @@ -3059,6 +3082,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_add_unless() there. * @@ -3077,6 +3101,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_inc_not_zero() there. * @@ -3095,6 +3120,7 @@ atomic64_inc_not_zero(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_inc_unless_negative() there. * @@ -3113,6 +3139,7 @@ atomic64_inc_unless_negative(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_dec_unless_positive() there. * @@ -3131,6 +3158,7 @@ atomic64_dec_unless_positive(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there. * @@ -4310,6 +4338,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg() there. * @@ -4330,6 +4359,7 @@ atomic_long_cmpxchg(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_acquire() there. * @@ -4349,6 +4379,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_release() there. * @@ -4369,6 +4400,7 @@ atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_relaxed() there. * @@ -4388,7 +4420,8 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there. * @@ -4410,7 +4443,8 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there. * @@ -4431,7 +4465,8 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there. * @@ -4453,7 +4488,8 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there. * @@ -4603,6 +4639,7 @@ atomic_long_add_negative_relaxed(long i, atomic_long_t *v) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_unless() there. * @@ -4623,6 +4660,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_add_unless() there. * @@ -4641,6 +4679,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u) * @v: pointer to atomic_long_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_inc_not_zero() there. * @@ -4659,6 +4698,7 @@ atomic_long_inc_not_zero(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_inc_unless_negative() there. * @@ -4677,6 +4717,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_dec_unless_positive() there. * @@ -4695,6 +4736,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there. * @@ -4998,6 +5040,14 @@ atomic_long_dec_if_positive(atomic_long_t *v) raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) +#define sync_try_cmpxchg(ptr, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ + instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \ + raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \ +}) + #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 1568f875fef72097413caab8339120c065a39aa4 +// ce5b65e0f1f8a276268b667194581d24bed219d4 diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h index c82947170ddc..3ef844b3ab8a 100644 --- a/include/linux/atomic/atomic-long.h +++ b/include/linux/atomic/atomic-long.h @@ -1352,6 +1352,7 @@ raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg() elsewhere. * @@ -1374,6 +1375,7 @@ raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_acquire() elsewhere. * @@ -1396,6 +1398,7 @@ raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_release() elsewhere. * @@ -1418,6 +1421,7 @@ raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_relaxed() elsewhere. * @@ -1440,7 +1444,8 @@ raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere. * @@ -1463,7 +1468,8 @@ raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere. * @@ -1486,7 +1492,8 @@ raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere. * @@ -1509,7 +1516,8 @@ raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere. * @@ -1677,6 +1685,7 @@ raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_fetch_add_unless() elsewhere. * @@ -1699,6 +1708,7 @@ raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_add_unless() elsewhere. * @@ -1719,6 +1729,7 @@ raw_atomic_long_add_unless(atomic_long_t *v, long a, long u) * @v: pointer to atomic_long_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_inc_not_zero() elsewhere. * @@ -1739,6 +1750,7 @@ raw_atomic_long_inc_not_zero(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_inc_unless_negative() elsewhere. * @@ -1759,6 +1771,7 @@ raw_atomic_long_inc_unless_negative(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_dec_unless_positive() elsewhere. * @@ -1779,6 +1792,7 @@ raw_atomic_long_dec_unless_positive(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere. * @@ -1795,4 +1809,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v) } #endif /* _LINUX_ATOMIC_LONG_H */ -// 4ef23f98c73cff96d239896175fd26b10b88899e +// 1c4a26fc77f345342953770ebe3c4d08e7ce2f9a diff --git a/include/linux/audit.h b/include/linux/audit.h index 6a3a9e122bb5..0050ef288ab3 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -36,6 +36,7 @@ struct mqstat; struct audit_watch; struct audit_tree; struct sk_buff; +struct kern_ipc_perm; struct audit_krule { u32 pflags; @@ -117,6 +118,8 @@ enum audit_nfcfgop { AUDIT_NFT_OP_OBJ_RESET, AUDIT_NFT_OP_FLOWTABLE_REGISTER, AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, + AUDIT_NFT_OP_SETELEM_RESET, + AUDIT_NFT_OP_RULE_RESET, AUDIT_NFT_OP_INVALID, }; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index c15221dcb75e..8e177b67e82f 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -4,6 +4,11 @@ #ifndef _VIRTCHNL_H_ #define _VIRTCHNL_H_ +#include <linux/bitops.h> +#include <linux/bits.h> +#include <linux/overflow.h> +#include <uapi/linux/if_ether.h> + /* Description: * This header file describes the Virtual Function (VF) - Physical Function * (PF) communication protocol used by the drivers for all devices starting @@ -114,6 +119,7 @@ enum virtchnl_ops { VIRTCHNL_OP_GET_STATS = 15, VIRTCHNL_OP_RSVD = 16, VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + VIRTCHNL_OP_CONFIG_RSS_HFUNC = 18, /* opcode 19 is reserved */ VIRTCHNL_OP_IWARP = 20, /* advanced opcode */ VIRTCHNL_OP_RDMA = VIRTCHNL_OP_IWARP, @@ -240,6 +246,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6) /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) +#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10) #define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) @@ -268,10 +275,11 @@ struct virtchnl_vf_resource { u32 rss_key_size; u32 rss_lut_size; - struct virtchnl_vsi_resource vsi_res[1]; + struct virtchnl_vsi_resource vsi_res[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_vf_resource); +VIRTCHNL_CHECK_STRUCT_LEN(20, virtchnl_vf_resource); +#define virtchnl_vf_resource_LEGACY_SIZEOF 36 /* VIRTCHNL_OP_CONFIG_TX_QUEUE * VF sends this message to set up parameters for one TX queue. @@ -294,7 +302,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. - * PF configures requested queue and returns a status code. + * PF configures requested queue and returns a status code. The + * crc_disable flag disables CRC stripping on the VF. Setting + * the crc_disable flag to 1 will disable CRC stripping for each + * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC + * offload must have been set prior to sending this info or the PF + * will ignore the request. This flag should be set the same for + * all of the queues for a VF. */ /* Rx queue config info */ @@ -306,7 +320,7 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; - u8 pad0; + u8 crc_disable; u8 rxdid; u8 pad1[2]; u64 dma_ring_addr; @@ -340,10 +354,11 @@ struct virtchnl_vsi_queue_config_info { u16 vsi_id; u16 num_queue_pairs; u32 pad; - struct virtchnl_queue_pair_info qpair[1]; + struct virtchnl_queue_pair_info qpair[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info); +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vsi_queue_config_info); +#define virtchnl_vsi_queue_config_info_LEGACY_SIZEOF 72 /* VIRTCHNL_OP_REQUEST_QUEUES * VF sends this message to request the PF to allocate additional queues to @@ -385,10 +400,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_vector_map); struct virtchnl_irq_map_info { u16 num_vectors; - struct virtchnl_vector_map vecmap[1]; + struct virtchnl_vector_map vecmap[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(14, virtchnl_irq_map_info); +VIRTCHNL_CHECK_STRUCT_LEN(2, virtchnl_irq_map_info); +#define virtchnl_irq_map_info_LEGACY_SIZEOF 14 /* VIRTCHNL_OP_ENABLE_QUEUES * VIRTCHNL_OP_DISABLE_QUEUES @@ -459,10 +475,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_ether_addr); struct virtchnl_ether_addr_list { u16 vsi_id; u16 num_elements; - struct virtchnl_ether_addr list[1]; + struct virtchnl_ether_addr list[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_ether_addr_list); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_ether_addr_list); +#define virtchnl_ether_addr_list_LEGACY_SIZEOF 12 /* VIRTCHNL_OP_ADD_VLAN * VF sends this message to add one or more VLAN tag filters for receives. @@ -481,10 +498,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_ether_addr_list); struct virtchnl_vlan_filter_list { u16 vsi_id; u16 num_elements; - u16 vlan_id[1]; + u16 vlan_id[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_vlan_filter_list); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_vlan_filter_list); +#define virtchnl_vlan_filter_list_LEGACY_SIZEOF 6 /* This enum is used for all of the VIRTCHNL_VF_OFFLOAD_VLAN_V2_CAPS related * structures and opcodes. @@ -711,10 +729,11 @@ struct virtchnl_vlan_filter_list_v2 { u16 vport_id; u16 num_elements; u8 pad[4]; - struct virtchnl_vlan_filter filters[1]; + struct virtchnl_vlan_filter filters[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_filter_list_v2); +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vlan_filter_list_v2); +#define virtchnl_vlan_filter_list_v2_LEGACY_SIZEOF 40 /* VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 * VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 @@ -866,18 +885,20 @@ VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_promisc_info); struct virtchnl_rss_key { u16 vsi_id; u16 key_len; - u8 key[1]; /* RSS hash key, packed bytes */ + u8 key[]; /* RSS hash key, packed bytes */ }; -VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_key); +#define virtchnl_rss_key_LEGACY_SIZEOF 6 struct virtchnl_rss_lut { u16 vsi_id; u16 lut_entries; - u8 lut[1]; /* RSS lookup table */ + u8 lut[]; /* RSS lookup table */ }; -VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_lut); +#define virtchnl_rss_lut_LEGACY_SIZEOF 6 /* VIRTCHNL_OP_GET_RSS_HENA_CAPS * VIRTCHNL_OP_SET_RSS_HENA @@ -892,6 +913,29 @@ struct virtchnl_rss_hena { VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena); +/* Type of RSS algorithm */ +enum virtchnl_rss_algorithm { + VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0, + VIRTCHNL_RSS_ALG_R_ASYMMETRIC = 1, + VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC = 2, + VIRTCHNL_RSS_ALG_XOR_SYMMETRIC = 3, +}; + +/* VIRTCHNL_OP_CONFIG_RSS_HFUNC + * VF sends this message to configure the RSS hash function. Only supported + * if both PF and VF drivers set the VIRTCHNL_VF_OFFLOAD_RSS_PF bit during + * configuration negotiation. + * The hash function is initialized to VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC + * by the PF. + */ +struct virtchnl_rss_hfunc { + u16 vsi_id; + u16 rss_algorithm; /* enum virtchnl_rss_algorithm */ + u32 reserved; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hfunc); + /* VIRTCHNL_OP_ENABLE_CHANNELS * VIRTCHNL_OP_DISABLE_CHANNELS * VF sends these messages to enable or disable channels based on @@ -911,10 +955,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info); struct virtchnl_tc_info { u32 num_tc; u32 pad; - struct virtchnl_channel_info list[1]; + struct virtchnl_channel_info list[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info); +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_tc_info); +#define virtchnl_tc_info_LEGACY_SIZEOF 24 /* VIRTCHNL_ADD_CLOUD_FILTER * VIRTCHNL_DEL_CLOUD_FILTER @@ -1052,10 +1097,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_rdma_qv_info); struct virtchnl_rdma_qvlist_info { u32 num_vectors; - struct virtchnl_rdma_qv_info qv_info[1]; + struct virtchnl_rdma_qv_info qv_info[]; }; -VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_rdma_qvlist_info); +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rdma_qvlist_info); +#define virtchnl_rdma_qvlist_info_LEGACY_SIZEOF 16 /* VF reset states - these are written into the RSTAT register: * VFGEN_RSTAT on the VF @@ -1074,14 +1120,6 @@ enum virtchnl_vfr_states { VIRTCHNL_VFR_VFACTIVE, }; -/* Type of RSS algorithm */ -enum virtchnl_rss_algorithm { - VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0, - VIRTCHNL_RSS_ALG_R_ASYMMETRIC = 1, - VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC = 2, - VIRTCHNL_RSS_ALG_XOR_SYMMETRIC = 3, -}; - #define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 #define PROTO_HDR_SHIFT 5 #define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) @@ -1367,6 +1405,31 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +#define __vss_byone(p, member, count, old) \ + (struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0))) + +#define __vss_byelem(p, member, count, old) \ + (struct_size(p, member, count - 1) + (old - struct_size(p, member, 0))) + +#define __vss_full(p, member, count, old) \ + (struct_size(p, member, count) + (old - struct_size(p, member, 0))) + +#define __vss(type, func, p, member, count) \ + struct type: func(p, member, count, type##_LEGACY_SIZEOF) + +#define virtchnl_struct_size(p, m, c) \ + _Generic(*p, \ + __vss(virtchnl_vf_resource, __vss_full, p, m, c), \ + __vss(virtchnl_vsi_queue_config_info, __vss_full, p, m, c), \ + __vss(virtchnl_irq_map_info, __vss_full, p, m, c), \ + __vss(virtchnl_ether_addr_list, __vss_full, p, m, c), \ + __vss(virtchnl_vlan_filter_list, __vss_full, p, m, c), \ + __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c), \ + __vss(virtchnl_tc_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_rss_key, __vss_byone, p, m, c), \ + __vss(virtchnl_rss_lut, __vss_byone, p, m, c)) + /** * virtchnl_vc_validate_vf_msg * @ver: Virtchnl version info @@ -1401,24 +1464,23 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, valid_len = sizeof(struct virtchnl_rxq_info); break; case VIRTCHNL_OP_CONFIG_VSI_QUEUES: - valid_len = sizeof(struct virtchnl_vsi_queue_config_info); + valid_len = virtchnl_vsi_queue_config_info_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_vsi_queue_config_info *vqc = (struct virtchnl_vsi_queue_config_info *)msg; - valid_len += (vqc->num_queue_pairs * - sizeof(struct - virtchnl_queue_pair_info)); + valid_len = virtchnl_struct_size(vqc, qpair, + vqc->num_queue_pairs); if (vqc->num_queue_pairs == 0) err_msg_format = true; } break; case VIRTCHNL_OP_CONFIG_IRQ_MAP: - valid_len = sizeof(struct virtchnl_irq_map_info); + valid_len = virtchnl_irq_map_info_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_irq_map_info *vimi = (struct virtchnl_irq_map_info *)msg; - valid_len += (vimi->num_vectors * - sizeof(struct virtchnl_vector_map)); + valid_len = virtchnl_struct_size(vimi, vecmap, + vimi->num_vectors); if (vimi->num_vectors == 0) err_msg_format = true; } @@ -1429,23 +1491,24 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, break; case VIRTCHNL_OP_ADD_ETH_ADDR: case VIRTCHNL_OP_DEL_ETH_ADDR: - valid_len = sizeof(struct virtchnl_ether_addr_list); + valid_len = virtchnl_ether_addr_list_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_ether_addr_list *veal = (struct virtchnl_ether_addr_list *)msg; - valid_len += veal->num_elements * - sizeof(struct virtchnl_ether_addr); + valid_len = virtchnl_struct_size(veal, list, + veal->num_elements); if (veal->num_elements == 0) err_msg_format = true; } break; case VIRTCHNL_OP_ADD_VLAN: case VIRTCHNL_OP_DEL_VLAN: - valid_len = sizeof(struct virtchnl_vlan_filter_list); + valid_len = virtchnl_vlan_filter_list_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_vlan_filter_list *vfl = (struct virtchnl_vlan_filter_list *)msg; - valid_len += vfl->num_elements * sizeof(u16); + valid_len = virtchnl_struct_size(vfl, vlan_id, + vfl->num_elements); if (vfl->num_elements == 0) err_msg_format = true; } @@ -1469,31 +1532,36 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP: break; case VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP: - valid_len = sizeof(struct virtchnl_rdma_qvlist_info); + valid_len = virtchnl_rdma_qvlist_info_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_rdma_qvlist_info *qv = (struct virtchnl_rdma_qvlist_info *)msg; - valid_len += ((qv->num_vectors - 1) * - sizeof(struct virtchnl_rdma_qv_info)); + valid_len = virtchnl_struct_size(qv, qv_info, + qv->num_vectors); } break; case VIRTCHNL_OP_CONFIG_RSS_KEY: - valid_len = sizeof(struct virtchnl_rss_key); + valid_len = virtchnl_rss_key_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; - valid_len += vrk->key_len - 1; + valid_len = virtchnl_struct_size(vrk, key, + vrk->key_len); } break; case VIRTCHNL_OP_CONFIG_RSS_LUT: - valid_len = sizeof(struct virtchnl_rss_lut); + valid_len = virtchnl_rss_lut_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; - valid_len += vrl->lut_entries - 1; + valid_len = virtchnl_struct_size(vrl, lut, + vrl->lut_entries); } break; + case VIRTCHNL_OP_CONFIG_RSS_HFUNC: + valid_len = sizeof(struct virtchnl_rss_hfunc); + break; case VIRTCHNL_OP_GET_RSS_HENA_CAPS: break; case VIRTCHNL_OP_SET_RSS_HENA: @@ -1506,12 +1574,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, valid_len = sizeof(struct virtchnl_vf_res_request); break; case VIRTCHNL_OP_ENABLE_CHANNELS: - valid_len = sizeof(struct virtchnl_tc_info); + valid_len = virtchnl_tc_info_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_tc_info *vti = (struct virtchnl_tc_info *)msg; - valid_len += (vti->num_tc - 1) * - sizeof(struct virtchnl_channel_info); + valid_len = virtchnl_struct_size(vti, list, + vti->num_tc); if (vti->num_tc == 0) err_msg_format = true; } @@ -1538,13 +1606,13 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, break; case VIRTCHNL_OP_ADD_VLAN_V2: case VIRTCHNL_OP_DEL_VLAN_V2: - valid_len = sizeof(struct virtchnl_vlan_filter_list_v2); + valid_len = virtchnl_vlan_filter_list_v2_LEGACY_SIZEOF; if (msglen >= valid_len) { struct virtchnl_vlan_filter_list_v2 *vfl = (struct virtchnl_vlan_filter_list_v2 *)msg; - valid_len += (vfl->num_elements - 1) * - sizeof(struct virtchnl_vlan_filter); + valid_len = virtchnl_struct_size(vfl, filters, + vfl->num_elements); if (vfl->num_elements == 0) { err_msg_format = true; diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ae12696ec492..2ad261082bba 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -141,8 +141,6 @@ struct bdi_writeback { struct delayed_work dwork; /* work item used for writeback */ struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ - unsigned long dirty_sleep; /* last wait */ - struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK @@ -179,6 +177,11 @@ struct backing_dev_info { * any dirty wbs, which is depended upon by bdi_has_dirty(). */ atomic_long_t tot_write_bandwidth; + /* + * Jiffies when last process was dirty throttled on this bdi. Used by + * blk-wbt. + */ + unsigned long last_bdp_sleep; struct bdi_writeback wb; /* the root writeback info for this bdi */ struct list_head wb_list; /* list of all wbs */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index fbad4fcd408e..8e7af9a03b41 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -38,7 +38,6 @@ struct backing_dev_info *bdi_alloc(int node_id); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); -void wb_wakeup_delayed(struct bdi_writeback *wb); void wb_wait_for_completion(struct wb_completion *done); @@ -46,7 +45,6 @@ extern spinlock_t bdi_lock; extern struct list_head bdi_list; extern struct workqueue_struct *bdi_wq; -extern struct workqueue_struct *bdi_async_bio_wq; static inline bool wb_has_dirty_io(struct bdi_writeback *wb) { diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h new file mode 100644 index 000000000000..3f1fe1774f1b --- /dev/null +++ b/include/linux/backing-file.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Common helpers for stackable filesystems and backing files. + * + * Copyright (C) 2023 CTERA Networks. + */ + +#ifndef _LINUX_BACKING_FILE_H +#define _LINUX_BACKING_FILE_H + +#include <linux/file.h> +#include <linux/uio.h> +#include <linux/fs.h> + +struct backing_file_ctx { + const struct cred *cred; + struct file *user_file; + void (*accessed)(struct file *); + void (*end_write)(struct file *); +}; + +struct file *backing_file_open(const struct path *user_path, int flags, + const struct path *real_path, + const struct cred *cred); +ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx); +ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx); +ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx); +ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx); +int backing_file_mmap(struct file *file, struct vm_area_struct *vma, + struct backing_file_ctx *ctx); + +#endif /* _LINUX_BACKING_FILE_H */ diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 2426276b9bd3..670f2dae692f 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -15,6 +15,7 @@ #define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) #define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) #define BB_ACK(x) (!!((x) & BB_ACK_MASK)) +#define BB_END(x) (BB_OFFSET(x) + BB_LEN(x)) #define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) /* Bad block numbers are stored sorted in a single page. @@ -41,6 +42,12 @@ struct badblocks { sector_t size; /* in sectors */ }; +struct badblocks_context { + sector_t start; + sector_t len; + int ack; +}; + int badblocks_check(struct badblocks *bb, sector_t s, int sectors, sector_t *first_bad, int *bad_sectors); int badblocks_set(struct badblocks *bb, sector_t s, int sectors, @@ -63,4 +70,27 @@ static inline void devm_exit_badblocks(struct device *dev, struct badblocks *bb) } badblocks_exit(bb); } + +static inline int badblocks_full(struct badblocks *bb) +{ + return (bb->count >= MAX_BADBLOCKS); +} + +static inline int badblocks_empty(struct badblocks *bb) +{ + return (bb->count == 0); +} + +static inline void set_changed(struct badblocks *bb) +{ + if (bb->changed != 1) + bb->changed = 1; +} + +static inline void clear_changed(struct badblocks *bb) +{ + if (bb->changed != 0) + bb->changed = 0; +} + #endif diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 8d51f69f9f5e..70f97f685bff 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -90,6 +90,16 @@ struct linux_binfmt { #endif } __randomize_layout; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc { + struct list_head entries; + rwlock_t entries_lock; + bool enabled; +} __randomize_layout; + +extern struct binfmt_misc init_binfmt_misc; +#endif + extern void __register_binfmt(struct linux_binfmt *fmt, int insert); /* Registration of default binfmt handlers */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 11984ed29cb8..875d792bffff 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -253,6 +253,11 @@ static inline struct page *bio_first_page_all(struct bio *bio) return bio_first_bvec_all(bio)->bv_page; } +static inline struct folio *bio_first_folio_all(struct bio *bio) +{ + return page_folio(bio_first_page_all(bio)); +} + static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); @@ -281,6 +286,11 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, { struct bio_vec *bvec = bio_first_bvec_all(bio) + i; + if (unlikely(i >= bio->bi_vcnt)) { + fi->folio = NULL; + return; + } + fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + PAGE_SIZE * (bvec->bv_page - &fi->folio->page); @@ -298,10 +308,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) fi->offset = 0; fi->length = min(folio_size(fi->folio), fi->_seg_count); fi->_next = folio_next(fi->folio); - } else if (fi->_i + 1 < bio->bi_vcnt) { - bio_first_folio(fi, bio, fi->_i + 1); } else { - fi->folio = NULL; + bio_first_folio(fi, bio, fi->_i + 1); } } @@ -319,6 +327,8 @@ enum bip_flags { BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ + BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */ + BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */ }; /* @@ -488,7 +498,12 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void guard_bio_eod(struct bio *bio); -void zero_fill_bio(struct bio *bio); +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); + +static inline void zero_fill_bio(struct bio *bio) +{ + zero_fill_bio_iter(bio, bio->bi_iter); +} static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { @@ -708,6 +723,7 @@ static inline bool bioset_initialized(struct bio_set *bs) for_each_bio(_bio) \ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) +int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); @@ -779,6 +795,12 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, return 0; } +static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, + ssize_t len, u32 seed) +{ + return -EINVAL; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ /* diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index ebfa12f69501..63928f173223 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -66,7 +66,8 @@ _pfx "mask is not constant"); \ BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ - ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + ~((_mask) >> __bf_shf(_mask)) & \ + (0 + (_val)) : 0, \ _pfx "value too large for the field"); \ BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ __bf_cast_unsigned(_reg, ~0ull), \ diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h new file mode 100644 index 000000000000..17caeca94cab --- /dev/null +++ b/include/linux/bitmap-str.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BITMAP_STR_H +#define __LINUX_BITMAP_STR_H + +int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); +extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); +int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, + unsigned long *dst, int nbits); + +#endif /* __LINUX_BITMAP_STR_H */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 03644237e1ef..aa4096126553 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,10 +6,13 @@ #include <linux/align.h> #include <linux/bitops.h> +#include <linux/cleanup.h> +#include <linux/errno.h> #include <linux/find.h> #include <linux/limits.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/bitmap-str.h> struct device; @@ -52,6 +55,7 @@ struct device; * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits * bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap + * bitmap_weight_andnot(src1, src2, nbits) Hamming Weight of andnot'ed bitmap * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area @@ -60,6 +64,8 @@ struct device; * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) + * bitmap_scatter(dst, src, mask, nbits) *dst = map(dense, sparse)(src) + * bitmap_gather(dst, src, mask, nbits) *dst = map(sparse, dense)(src) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap @@ -125,6 +131,8 @@ unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node); unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node); void bitmap_free(const unsigned long *bitmap); +DEFINE_FREE(bitmap, unsigned long *, if (_T) bitmap_free(_T)) + /* Managed variants of the above. */ unsigned long *devm_bitmap_alloc(struct device *dev, unsigned int nbits, gfp_t flags); @@ -167,6 +175,8 @@ bool __bitmap_subset(const unsigned long *bitmap1, unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); unsigned int __bitmap_weight_and(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); +unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); @@ -200,14 +210,6 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -int bitmap_parse(const char *buf, unsigned int buflen, - unsigned long *dst, int nbits); -int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); -int bitmap_parselist(const char *buf, unsigned long *maskp, - int nmaskbits); -int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); int bitmap_bitremap(int oldbit, @@ -216,23 +218,6 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, unsigned int bits); void bitmap_fold(unsigned long *dst, const unsigned long *orig, unsigned int sz, unsigned int nbits); -int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); -void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); -int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); - -#ifdef __BIG_ENDIAN -void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); -#else -#define bitmap_copy_le bitmap_copy -#endif -int bitmap_print_to_pagebuf(bool list, char *buf, - const unsigned long *maskp, int nmaskbits); - -extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); - -extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) @@ -448,6 +433,15 @@ unsigned long bitmap_weight_and(const unsigned long *src1, return __bitmap_weight_and(src1, src2, nbits); } +static __always_inline +unsigned long bitmap_weight_andnot(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return hweight_long(*src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight_andnot(src1, src2, nbits); +} + static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { @@ -510,6 +504,107 @@ static inline void bitmap_replace(unsigned long *dst, __bitmap_replace(dst, old, new, mask, nbits); } +/** + * bitmap_scatter - Scatter a bitmap according to the given mask + * @dst: scattered bitmap + * @src: gathered bitmap + * @mask: mask representing bits to assign to in the scattered bitmap + * @nbits: number of bits in each of these bitmaps + * + * Scatters bitmap with sequential bits according to the given @mask. + * + * Example: + * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302. + * + * Or in binary form + * @src @mask @dst + * 0000000001011010 0001001100010011 0000001100000010 + * + * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12) + * + * A more 'visual' description of the operation:: + * + * src: 0000000001011010 + * |||||| + * +------+||||| + * | +----+|||| + * | |+----+||| + * | || +-+|| + * | || | || + * mask: ...v..vv...v..vv + * ...0..11...0..10 + * dst: 0000001100000010 + * + * A relationship exists between bitmap_scatter() and bitmap_gather(). + * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. + * See bitmap_scatter() for details related to this relationship. + */ +static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int n = 0; + unsigned int bit; + + bitmap_zero(dst, nbits); + + for_each_set_bit(bit, mask, nbits) + __assign_bit(bit, dst, test_bit(n++, src)); +} + +/** + * bitmap_gather - Gather a bitmap according to given mask + * @dst: gathered bitmap + * @src: scattered bitmap + * @mask: mask representing bits to extract from in the scattered bitmap + * @nbits: number of bits in each of these bitmaps + * + * Gathers bitmap with sparse bits according to the given @mask. + * + * Example: + * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a. + * + * Or in binary form + * @src @mask @dst + * 0000001100000010 0001001100010011 0000000000011010 + * + * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5) + * + * A more 'visual' description of the operation:: + * + * mask: ...v..vv...v..vv + * src: 0000001100000010 + * ^ ^^ ^ 0 + * | || | 10 + * | || > 010 + * | |+--> 1010 + * | +--> 11010 + * +----> 011010 + * dst: 0000000000011010 + * + * A relationship exists between bitmap_gather() and bitmap_scatter(). See + * bitmap_scatter() for the bitmap scatter detailed operations. + * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n). + * The operation bitmap_gather(result, scattered, mask, n) leads to a result + * equal or equivalent to src. + * + * The result can be 'equivalent' because bitmap_scatter() and bitmap_gather() + * are not bijective. + * The result and src values are equivalent in that sense that a call to + * bitmap_scatter(res, src, mask, n) and a call to + * bitmap_scatter(res, result, mask, n) will lead to the same res value. + */ +static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int n = 0; + unsigned int bit; + + bitmap_zero(dst, nbits); + + for_each_set_bit(bit, mask, nbits) + __assign_bit(n++, dst, test_bit(bit, src)); +} + static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) @@ -519,6 +614,66 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, } /** + * bitmap_release_region - release allocated bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to release + * @order: region size (log base 2 of number of bits) to release + * + * This is the complement to __bitmap_find_free_region() and releases + * the found region (by clearing it in the bitmap). + */ +static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) +{ + bitmap_clear(bitmap, pos, BIT(order)); +} + +/** + * bitmap_allocate_region - allocate bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to allocate + * @order: region size (log base 2 of number of bits) to allocate + * + * Allocate (set bits in) a specified region of a bitmap. + * + * Returns: 0 on success, or %-EBUSY if specified region wasn't + * free (not all bits were zero). + */ +static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) +{ + unsigned int len = BIT(order); + + if (find_next_bit(bitmap, pos + len, pos) < pos + len) + return -EBUSY; + bitmap_set(bitmap, pos, len); + return 0; +} + +/** + * bitmap_find_free_region - find a contiguous aligned mem region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @bits: number of bits in the bitmap + * @order: region size (log base 2 of number of bits) to find + * + * Find a region of free (zero) bits in a @bitmap of @bits bits and + * allocate them (set them to one). Only consider regions of length + * a power (@order) of two, aligned to that power of two, which + * makes the search algorithm much faster. + * + * Returns: the bit offset in bitmap of the allocated region, + * or -errno on failure. + */ +static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) +{ + unsigned int pos, end; /* scans bitmap by regions of size order */ + + for (pos = 0; (end = pos + BIT(order)) <= bits; pos = end) { + if (!bitmap_allocate_region(bitmap, pos, order)) + return pos; + } + return -ENOMEM; +} + +/** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value * diff --git a/include/linux/bits.h b/include/linux/bits.h index 7c0cf5031abe..0eb24d21aac2 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -4,6 +4,7 @@ #include <linux/const.h> #include <vdso/bits.h> +#include <uapi/linux/bits.h> #include <asm/bitsperlong.h> #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) @@ -30,15 +31,8 @@ #define GENMASK_INPUT_CHECK(h, l) 0 #endif -#define __GENMASK(h, l) \ - (((~UL(0)) - (UL(1) << (l)) + 1) & \ - (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) - -#define __GENMASK_ULL(h, l) \ - (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ - (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 378b2459efe2..e253e7bd0d17 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -20,6 +20,7 @@ struct blk_integrity_iter { unsigned int data_size; unsigned short interval; unsigned char tuple_size; + unsigned char pi_offset; const char *disk_name; }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 495ca198775f..d3d8fd8e229b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -8,6 +8,7 @@ #include <linux/scatterlist.h> #include <linux/prefetch.h> #include <linux/srcu.h> +#include <linux/rw_hint.h> struct blk_mq_tags; struct blk_flush_queue; @@ -32,8 +33,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) /* merge of different types, fail separately */ #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) -/* track inflight for MQ */ -#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) /* use hctx->sched_tags */ @@ -137,6 +136,7 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif + enum rw_hint write_hint; unsigned short ioprio; enum mq_rq_state state; @@ -178,14 +178,10 @@ struct request { struct { unsigned int seq; - struct list_head list; rq_end_io_fn *saved_end_io; } flush; - union { - struct __call_single_data csd; - u64 fifo_time; - }; + u64 fifo_time; /* * completion callback. @@ -397,9 +393,6 @@ struct blk_mq_hw_ctx { */ struct blk_mq_tags *sched_tags; - /** @run: Number of dispatched requests. */ - unsigned long run; - /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; /** @queue_num: Index of this hardware queue. */ @@ -691,17 +684,19 @@ enum { #define BLK_MQ_NO_HCTX_IDX (-1U) -struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass); -#define blk_mq_alloc_disk(set, queuedata) \ +#define blk_mq_alloc_disk(set, lim, queuedata) \ ({ \ static struct lock_class_key __key; \ \ - __blk_mq_alloc_disk(set, queuedata, &__key); \ + __blk_mq_alloc_disk(set, lim, queuedata, &__key); \ }) struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass); -struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_destroy_queue(struct request_queue *); @@ -836,6 +831,12 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib); */ static inline bool blk_mq_need_time_stamp(struct request *rq) { + /* + * passthrough io doesn't use iostat accounting, cgroup stats + * and io scheduler functionalities. + */ + if (blk_rq_is_passthrough(rq)) + return false; return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); } diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h index 2580e05a8ab6..004b38a538ff 100644 --- a/include/linux/blk-pm.h +++ b/include/linux/blk-pm.h @@ -15,7 +15,6 @@ extern int blk_pre_runtime_suspend(struct request_queue *q); extern void blk_post_runtime_suspend(struct request_queue *q, int err); extern void blk_pre_runtime_resume(struct request_queue *q); extern void blk_post_runtime_resume(struct request_queue *q); -extern void blk_set_runtime_active(struct request_queue *q); #else static inline void blk_pm_runtime_init(struct request_queue *q, struct device *dev) {} diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0bad62cca3d0..cb1526ec44b5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -10,6 +10,7 @@ #include <linux/bvec.h> #include <linux/device.h> #include <linux/ktime.h> +#include <linux/rw_hint.h> struct bio_set; struct bio; @@ -49,27 +50,26 @@ struct block_device { bool bd_write_holder; bool bd_has_submit_bio; dev_t bd_dev; + struct inode *bd_inode; /* will die */ + atomic_t bd_openers; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ - struct inode * bd_inode; /* will die */ - struct super_block * bd_super; void * bd_claiming; void * bd_holder; const struct blk_holder_ops *bd_holder_ops; struct mutex bd_holder_lock; - /* The counter of freeze processes */ - int bd_fsfreeze_count; int bd_holders; struct kobject *bd_holder_dir; - /* Mutex for freeze */ - struct mutex bd_fsfreeze_mutex; - struct super_block *bd_fsfreeze_sb; + atomic_t bd_fsfreeze_count; /* number of freeze requests */ + struct mutex bd_fsfreeze_mutex; /* serialize freeze/thaw */ struct partition_meta_info *bd_meta_info; #ifdef CONFIG_FAIL_MAKE_REQUEST bool bd_make_it_fail; #endif + bool bd_ro_warned; + int bd_writers; /* * keep this out-of-line as it's both big and not needed in the fast * path @@ -207,52 +207,10 @@ static inline bool blk_path_error(blk_status_t error) return true; } -/* - * From most significant bit: - * 1 bit: reserved for other usage, see below - * 12 bits: original size of bio - * 51 bits: issue time of bio - */ -#define BIO_ISSUE_RES_BITS 1 -#define BIO_ISSUE_SIZE_BITS 12 -#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) -#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) -#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) -#define BIO_ISSUE_SIZE_MASK \ - (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) -#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) - -/* Reserved bit for blk-throtl */ -#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) - struct bio_issue { u64 value; }; -static inline u64 __bio_issue_time(u64 time) -{ - return time & BIO_ISSUE_TIME_MASK; -} - -static inline u64 bio_issue_time(struct bio_issue *issue) -{ - return __bio_issue_time(issue->value); -} - -static inline sector_t bio_issue_size(struct bio_issue *issue) -{ - return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); -} - -static inline void bio_issue_init(struct bio_issue *issue, - sector_t size) -{ - size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; - issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | - (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | - ((u64)size << BIO_ISSUE_SIZE_SHIFT)); -} - typedef __u32 __bitwise blk_opf_t; typedef unsigned int blk_qc_t; @@ -270,6 +228,7 @@ struct bio { */ unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; + enum rw_hint bi_write_hint; blk_status_t bi_status; atomic_t __bi_remaining; @@ -379,6 +338,8 @@ enum req_op { REQ_OP_DISCARD = (__force blk_opf_t)3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = (__force blk_opf_t)5, + /* write data at the current zone write pointer */ + REQ_OP_ZONE_APPEND = (__force blk_opf_t)7, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ @@ -387,12 +348,10 @@ enum req_op { REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, /* Transition a zone to full */ REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, - /* write data at the current zone write pointer */ - REQ_OP_ZONE_APPEND = (__force blk_opf_t)13, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = (__force blk_opf_t)15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)13, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15, /* Driver private requests */ REQ_OP_DRV_IN = (__force blk_opf_t)34, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 87d94be7825a..69e7da33ca49 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include <linux/sbitmap.h> #include <linux/uuid.h> #include <linux/xarray.h> +#include <linux/file.h> struct module; struct request_queue; @@ -42,7 +43,7 @@ struct blk_crypto_profile; extern const struct device_type disk_type; extern const struct device_type part_type; -extern struct class block_class; +extern const struct class block_class; /* * Maximum number of blkcg policies allowed to be registered concurrently. @@ -108,6 +109,7 @@ struct blk_integrity { const struct blk_integrity_profile *profile; unsigned char flags; unsigned char tuple_size; + unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; }; @@ -124,6 +126,10 @@ typedef unsigned int __bitwise blk_mode_t; #define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) /* open for "writes" only for ioctls (specialy hack for floppy.c) */ #define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) +/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */ +#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5)) +/* return partition scanning errors */ +#define BLK_OPEN_STRICT_SCAN ((__force blk_mode_t)(1 << 6)) struct gendisk { /* @@ -187,8 +193,6 @@ struct gendisk { * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; - unsigned int max_open_zones; - unsigned int max_active_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -264,18 +268,6 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) } /* - * Zoned block device models (zoned limit). - * - * Note: This needs to be ordered from the least to the most severe - * restrictions for the inheritance in blk_stack_limits() to work. - */ -enum blk_zoned_model { - BLK_ZONED_NONE = 0, /* Regular block device */ - BLK_ZONED_HA, /* Host-aware zoned block device */ - BLK_ZONED_HM, /* Host-managed zoned block device */ -}; - -/* * BLK_BOUNCE_NONE: never bounce (default) * BLK_BOUNCE_HIGH: bounce all highmem pages */ @@ -302,6 +294,7 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; + unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; @@ -316,7 +309,9 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; - enum blk_zoned_model zoned; + bool zoned; + unsigned int max_open_zones; + unsigned int max_active_zones; /* * Drivers that set dma_alignment to less than 511 must be prepared to @@ -329,24 +324,15 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); +void disk_set_zoned(struct gendisk *disk); -#ifdef CONFIG_BLK_DEV_ZONED #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, - unsigned int nr_zones, report_zones_cb cb, void *data); -unsigned int bdev_nr_zones(struct block_device *bdev); -extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, - sector_t sectors, sector_t nr_sectors, - gfp_t gfp_mask); + unsigned int nr_zones, report_zones_cb cb, void *data); +int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, + sector_t sectors, sector_t nr_sectors); int blk_revalidate_disk_zones(struct gendisk *disk, - void (*update_driver_data)(struct gendisk *disk)); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return 0; -} -#endif /* CONFIG_BLK_DEV_ZONED */ + void (*update_driver_data)(struct gendisk *disk)); /* * Independent access ranges: struct blk_independent_access_range describes @@ -376,59 +362,51 @@ struct blk_independent_access_ranges { }; struct request_queue { - struct request *last_merge; - struct elevator_queue *elevator; - - struct percpu_ref q_usage_counter; + /* + * The queue owner gets to use this for whatever they like. + * ll_rw_blk doesn't touch it. + */ + void *queuedata; - struct blk_queue_stats *stats; - struct rq_qos *rq_qos; - struct mutex rq_qos_mutex; + struct elevator_queue *elevator; const struct blk_mq_ops *mq_ops; /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; + /* + * various queue flags, see QUEUE_* below + */ + unsigned long queue_flags; + + unsigned int rq_timeout; + unsigned int queue_depth; + refcount_t refs; + /* hw dispatch queues */ - struct xarray hctx_table; unsigned int nr_hw_queues; + struct xarray hctx_table; - /* - * The queue owner gets to use this for whatever they like. - * ll_rw_blk doesn't touch it. - */ - void *queuedata; - - /* - * various queue flags, see QUEUE_* below - */ - unsigned long queue_flags; - /* - * Number of contexts that have called blk_set_pm_only(). If this - * counter is above zero then only RQF_PM requests are processed. - */ - atomic_t pm_only; + struct percpu_ref q_usage_counter; - /* - * ida allocated id for this queue. Used to index queues from - * ioctx. - */ - int id; + struct request *last_merge; spinlock_t queue_lock; - struct gendisk *disk; + int quiesce_depth; - refcount_t refs; + struct gendisk *disk; /* * mq queue kobject */ struct kobject *mq_kobj; + struct queue_limits limits; + #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -439,24 +417,40 @@ struct request_queue { #endif /* - * queue settings + * Number of contexts that have called blk_set_pm_only(). If this + * counter is above zero then only RQF_PM requests are processed. */ - unsigned long nr_requests; /* Max # of requests */ + atomic_t pm_only; + + struct blk_queue_stats *stats; + struct rq_qos *rq_qos; + struct mutex rq_qos_mutex; + + /* + * ida allocated id for this queue. Used to index queues from + * ioctx. + */ + int id; unsigned int dma_pad_mask; + /* + * queue settings + */ + unsigned long nr_requests; /* Max # of requests */ + #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; struct kobject *crypto_kobject; #endif - unsigned int rq_timeout; - struct timer_list timeout; struct work_struct timeout_work; atomic_t nr_active_requests_shared_tags; + unsigned int required_elevator_features; + struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; @@ -467,11 +461,12 @@ struct request_queue { struct mutex blkcg_mutex; #endif - struct queue_limits limits; + int node; - unsigned int required_elevator_features; + spinlock_t requeue_lock; + struct list_head requeue_list; + struct delayed_work requeue_work; - int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -481,12 +476,9 @@ struct request_queue { struct blk_flush_queue *fq; struct list_head flush_list; - struct list_head requeue_list; - spinlock_t requeue_lock; - struct delayed_work requeue_work; - struct mutex sysfs_lock; struct mutex sysfs_dir_lock; + struct mutex limits_lock; /* * for reusing dead hctx instance in case of updating @@ -509,8 +501,6 @@ struct request_queue { */ struct mutex mq_freeze_lock; - int quiesce_depth; - struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; @@ -538,6 +528,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ +#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -622,26 +613,14 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q) } #endif -static inline enum blk_zoned_model -blk_queue_zoned_model(struct request_queue *q) -{ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) - return q->limits.zoned; - return BLK_ZONED_NONE; -} - static inline bool blk_queue_is_zoned(struct request_queue *q) { - switch (blk_queue_zoned_model(q)) { - case BLK_ZONED_HA: - case BLK_ZONED_HM: - return true; - default: - return false; - } + return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned; } #ifdef CONFIG_BLK_DEV_ZONED +unsigned int bdev_nr_zones(struct block_device *bdev); + static inline unsigned int disk_nr_zones(struct gendisk *disk) { return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; @@ -666,26 +645,31 @@ static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - disk->max_open_zones = max_open_zones; + disk->queue->limits.max_open_zones = max_open_zones; } static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - disk->max_active_zones = max_active_zones; + disk->queue->limits.max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return bdev->bd_disk->max_open_zones; + return bdev->bd_disk->queue->limits.max_open_zones; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return bdev->bd_disk->max_active_zones; + return bdev->bd_disk->queue->limits.max_active_zones; } #else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return 0; +} + static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; @@ -750,7 +734,8 @@ static inline int bdev_read_only(struct block_device *bdev) } bool set_capacity_and_notify(struct gendisk *disk, sector_t size); -bool disk_force_media_change(struct gendisk *disk, unsigned int events); +void disk_force_media_change(struct gendisk *disk); +void bdev_mark_dead(struct block_device *bdev, bool surprise); void add_disk_randomness(struct gendisk *disk) __latent_entropy; void rand_initialize_disk(struct gendisk *disk); @@ -784,22 +769,26 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); +struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, + struct lock_class_key *lkclass); /** * blk_alloc_disk - allocate a gendisk structure + * @lim: queue limits to be used for this disk. * @node_id: numa node to allocate on * * Allocate and pre-initialize a gendisk structure for use with BIO based * drivers. * + * Returns an ERR_PTR on error, else the allocated disk. + * * Context: can sleep */ -#define blk_alloc_disk(node_id) \ +#define blk_alloc_disk(lim, node_id) \ ({ \ static struct lock_class_key __key; \ \ - __blk_alloc_disk(node_id, &__key); \ + __blk_alloc_disk(lim, node_id, &__key); \ }) int __register_blkdev(unsigned int major, const char *name, @@ -809,7 +798,6 @@ int __register_blkdev(unsigned int major, const char *name, void unregister_blkdev(unsigned int major, const char *name); bool disk_check_media_change(struct gendisk *disk); -int __invalidate_device(struct block_device *bdev, bool kill_dirty); void set_capacity(struct gendisk *disk, sector_t size); #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED @@ -846,6 +834,7 @@ extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); +const char *blk_status_to_str(blk_status_t status); /* only poll the hardware once, don't continue until a completion was found */ #define BLK_POLL_ONESHOT (1 << 0) @@ -882,6 +871,29 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, return chunk_sectors - (offset & (chunk_sectors - 1)); } +/** + * queue_limits_start_update - start an atomic update of queue limits + * @q: queue to update + * + * This functions starts an atomic update of the queue limits. It takes a lock + * to prevent other updates and returns a snapshot of the current limits that + * the caller can modify. The caller must call queue_limits_commit_update() + * to finish the update. + * + * Context: process context. The caller must have frozen the queue or ensured + * that there is outstanding I/O by other means. + */ +static inline struct queue_limits +queue_limits_start_update(struct request_queue *q) + __acquires(q->limits_lock) +{ + mutex_lock(&q->limits_lock); + return q->limits; +} +int queue_limits_commit_update(struct request_queue *q, + struct queue_limits *lim); +int queue_limits_set(struct request_queue *q, struct queue_limits *lim); + /* * Access functions for manipulating queue properties */ @@ -915,8 +927,8 @@ extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); -extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, - sector_t offset); +void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, + sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); @@ -963,6 +975,7 @@ struct blk_plug { /* if ios_left is > 1, we can batch tag/rq allocations */ struct request *cached_rq; + u64 cur_ktime; unsigned short nr_ios; unsigned short rq_count; @@ -993,6 +1006,18 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) __blk_flush_plug(plug, async); } +/* + * tsk == current here + */ +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + if (plug) + plug->cur_ktime = 0; + current->flags &= ~PF_BLOCK_TS; +} + int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -1016,6 +1041,10 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ +} + static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; @@ -1078,7 +1107,14 @@ enum blk_default_limits { BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, }; -#define BLK_DEF_MAX_SECTORS 2560u +/* + * Default upper limit for the software max_sectors limit used for + * regular file system I/O. This can be increased through sysfs. + * + * Not to be confused with the max_hw_sector limit that is entirely + * controlled by the driver, usually based on hardware limits. + */ +#define BLK_DEF_MAX_SECTORS_CAP 2560u static inline unsigned long queue_segment_boundary(const struct request_queue *q) { @@ -1257,11 +1293,6 @@ static inline bool bdev_nowait(struct block_device *bdev) return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); } -static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) -{ - return blk_queue_zoned_model(bdev_get_queue(bdev)); -} - static inline bool bdev_is_zoned(struct block_device *bdev) { return blk_queue_is_zoned(bdev_get_queue(bdev)); @@ -1460,30 +1491,53 @@ void blkdev_show(struct seq_file *seqf, off_t offset); #endif struct blk_holder_ops { - void (*mark_dead)(struct block_device *bdev); + void (*mark_dead)(struct block_device *bdev, bool surprise); + + /* + * Sync the file system mounted on the block device. + */ + void (*sync)(struct block_device *bdev); + + /* + * Freeze the file system mounted on the block device. + */ + int (*freeze)(struct block_device *bdev); + + /* + * Thaw the file system mounted on the block device. + */ + int (*thaw)(struct block_device *bdev); }; /* + * For filesystems using @fs_holder_ops, the @holder argument passed to + * helpers used to open and claim block devices via + * bd_prepare_to_claim() must point to a superblock. + */ +extern const struct blk_holder_ops fs_holder_ops; + +/* * Return the correct open flags for blkdev_get_by_* for super block flags * as stored in sb->s_flags. */ #define sb_open_mode(flags) \ - (BLK_OPEN_READ | (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) + (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ + (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) -struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, +struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); -struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, +struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void blkdev_put(struct block_device *bdev, void *holder); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); void blkdev_put_no_open(struct block_device *bdev); struct block_device *I_BDEV(struct inode *inode); +struct block_device *file_bdev(struct file *bdev_file); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); @@ -1521,10 +1575,9 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev) } #endif /* CONFIG_BLOCK */ -int fsync_bdev(struct block_device *bdev); - -int freeze_bdev(struct block_device *bdev); -int thaw_bdev(struct block_device *bdev); +int bdev_freeze(struct block_device *bdev); +int bdev_thaw(struct block_device *bdev); +void bdev_fput(struct file *bdev_file); struct io_comp_batch { struct request *req_list; diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index ca73940e26df..3f4b4ac527ca 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -10,6 +10,7 @@ #ifdef __KERNEL__ #include <linux/kernel.h> #include <linux/types.h> +bool __init cmdline_has_extra_options(void); #else /* !__KERNEL__ */ /* * NOTE: This is only for tools/bootconfig, because tools/bootconfig will @@ -287,7 +288,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_exit(void); +void __init _xbc_exit(bool early); + +static inline void xbc_exit(void) +{ + _xbc_exit(false); +} /* XBC embedded bootconfig data in kernel */ #ifdef CONFIG_BOOT_CONFIG_EMBED diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index e1a3c9c9754c..cffa38a73618 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -60,7 +60,7 @@ static inline void get_page_bootmem(unsigned long info, struct page *page, static inline void free_bootmem_page(struct page *page) { - kmemleak_free_part(page_to_virt(page), PAGE_SIZE); + kmemleak_free_part_phys(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); free_reserved_page(page); } #endif diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 57e9e109257e..fb3c3e7181e6 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -137,11 +143,12 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, - int *optname, char __user *optval, + int *optname, sockptr_t optval, int *optlen, char **kernel_optval); + int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, - int optname, char __user *optval, - int __user *optlen, int max_optlen, + int optname, sockptr_t optval, + sockptr_t optlen, int max_optlen, int retval); int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, @@ -189,7 +196,8 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ - cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ + cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS) && sk && \ + sk_fullsock(sk)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ @@ -199,9 +207,9 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \ + if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk) && \ + if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ @@ -230,22 +238,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +264,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +284,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -377,7 +394,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ - get_user(__ret, optlen); \ + copy_from_sockptr(&__ret, optlen, sizeof(int)); \ __ret; \ }) @@ -477,24 +494,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f58895830ada..890e152d553e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -29,6 +29,7 @@ #include <linux/rcupdate_trace.h> #include <linux/static_call.h> #include <linux/memcontrol.h> +#include <linux/cfi.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -36,6 +37,7 @@ struct perf_event; struct bpf_prog; struct bpf_prog_aux; struct bpf_map; +struct bpf_arena; struct sock; struct seq_file; struct btf; @@ -51,11 +53,15 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; -extern struct bpf_mem_alloc bpf_global_ma; +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); @@ -106,7 +112,11 @@ struct bpf_map_ops { /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); - void (*map_fd_put_ptr)(void *ptr); + /* If need_defer is true, the implementation should guarantee that + * the to-be-put element is still alive before the bpf program, which + * may manipulate it, exists. + */ + void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer); int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, @@ -130,6 +140,9 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* Functions called by bpf_local_storage maps */ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, @@ -180,14 +193,15 @@ enum btf_field_type { BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, - BPF_LIST_HEAD = (1 << 4), - BPF_LIST_NODE = (1 << 5), - BPF_RB_ROOT = (1 << 6), - BPF_RB_NODE = (1 << 7), - BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | - BPF_RB_NODE | BPF_RB_ROOT, - BPF_REFCOUNT = (1 << 8), + BPF_KPTR_PERCPU = (1 << 4), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU, + BPF_LIST_HEAD = (1 << 5), + BPF_LIST_NODE = (1 << 6), + BPF_RB_ROOT = (1 << 7), + BPF_RB_NODE = (1 << 8), + BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, + BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, + BPF_REFCOUNT = (1 << 9), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -228,11 +242,20 @@ struct btf_record { struct btf_field fields[]; }; +/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */ +struct bpf_rb_node_kern { + struct rb_node rb_node; + void *owner; +} __attribute__((aligned(8))); + +/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */ +struct bpf_list_node_kern { + struct list_head list_head; + void *owner; +} __attribute__((aligned(8))); + struct bpf_map { - /* The first two cachelines with read-mostly members of which some - * are also accessed in fast-path (e.g. ops, max_entries). - */ - const struct bpf_map_ops *ops ____cacheline_aligned; + const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; @@ -254,13 +277,14 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - /* The 3rd and 4th cacheline with misc members to avoid false sharing - * particularly with refcounting. - */ - atomic64_t refcnt ____cacheline_aligned; - atomic64_t usercnt; - struct work_struct work; struct mutex freeze_mutex; + atomic64_t refcnt; + atomic64_t usercnt; + /* rcu is used before freeing and work is only used during freeing */ + union { + struct work_struct work; + struct rcu_head rcu; + }; atomic64_t writecnt; /* 'Ownership' of program-containing map is claimed by the first program * that is going to use this map or by the first program which FD is @@ -275,6 +299,10 @@ struct bpf_map { } owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ + bool free_after_mult_rcu_gp; + bool free_after_rcu_gp; + atomic64_t sleepable_refcnt; + s64 __percpu *elem_count; }; static inline const char *btf_field_type_name(enum btf_field_type type) @@ -287,6 +315,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_KPTR_PERCPU: + return "percpu_kptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -312,6 +342,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -338,6 +369,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -376,6 +408,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_TIMER: case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: break; default: WARN_ON_ONCE(1); @@ -425,7 +458,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) size /= sizeof(long); while (size--) - *ldst++ = *lsrc++; + data_race(*ldst++ = *lsrc++); } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ @@ -496,8 +529,8 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock); - - +u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); +u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -640,7 +673,8 @@ enum bpf_type_flag { MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), /* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning. - * Currently only valid for linked-list and rbtree nodes. + * Currently only valid for linked-list and rbtree nodes. If the nodes + * have a bpf_refcount_field, they must be tagged MEM_RCU as well. */ NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), @@ -678,6 +712,7 @@ enum bpf_arg_type { * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_ARENA, ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -849,6 +884,7 @@ enum bpf_reg_type { * an explicit null check is required for this struct. */ PTR_TO_MEM, /* reg points to valid memory region */ + PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ @@ -889,10 +925,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static bool bpf_is_ldimm64(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + static inline bool bpf_pseudo_func(const struct bpf_insn *insn) { - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; + return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } struct bpf_prog_ops { @@ -1015,6 +1055,22 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) +/* Indicate that current trampoline is in a tail call context. Then, it has to + * cache and restore tail_call_cnt to avoid infinite tail call loop. + */ +#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) + +/* + * Indicate the trampoline should be suitable to receive indirect calls; + * without this indirectly calling the generated code can result in #UD/#CP, + * depending on the CFI options. + * + * Used by bpf_struct_ops. + * + * Incompatible with FENTRY usage, overloads @func_addr argument. + */ +#define BPF_TRAMP_F_INDIRECT BIT(8) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -1054,10 +1110,17 @@ struct bpf_tramp_run_ctx; * fexit = a set of program to run after original function */ struct bpf_tramp_image; -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call); + void *func_addr); +void *arch_alloc_bpf_trampoline(unsigned int size); +void arch_free_bpf_trampoline(void *image, unsigned int size); +void arch_protect_bpf_trampoline(void *image, unsigned int size); +void arch_unprotect_bpf_trampoline(void *image, unsigned int size); +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr); + u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, @@ -1090,6 +1153,7 @@ enum bpf_tramp_prog_type { struct bpf_tramp_image { void *image; + int size; struct bpf_ksym ksym; struct percpu_ref pcref; void *ip_after_call; @@ -1125,7 +1189,6 @@ struct bpf_trampoline { int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; - struct module *mod; }; struct bpf_attach_target_info { @@ -1159,7 +1222,11 @@ struct bpf_dispatcher { #endif }; -static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( +#ifndef __bpfcall +#define __bpfcall __nocfi +#endif + +static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, bpf_func_t bpf_func) @@ -1197,6 +1264,8 @@ enum bpf_dynptr_type { int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); @@ -1249,7 +1318,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func #define DEFINE_BPF_DISPATCHER(name) \ __BPF_DISPATCHER_SC(name); \ - noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ + noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ @@ -1272,7 +1341,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); @@ -1293,7 +1362,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) @@ -1316,6 +1385,8 @@ static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) struct bpf_func_info_aux { u16 linkage; bool unreliable; + bool called : 1; + bool verified : 1; }; enum bpf_jit_poke_reason { @@ -1344,6 +1415,7 @@ struct bpf_jit_poke_descriptor { struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; + struct btf *btf; u32 btf_id; }; @@ -1364,6 +1436,7 @@ struct bpf_prog_aux { u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; @@ -1380,10 +1453,13 @@ struct bpf_prog_aux { bool dev_bound; /* Program is bound to the netdev. */ bool offload_requested; /* Program is bound and offloaded to the netdev. */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; - bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool exception_cb; + bool exception_boundary; + struct bpf_arena *arena; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1394,6 +1470,9 @@ struct bpf_prog_aux { struct bpf_kfunc_desc_tab *kfunc_tab; struct bpf_kfunc_btf_tab *kfunc_btf_tab; u32 size_poke_tab; +#ifdef CONFIG_FINEIBT + struct bpf_ksym ksym_prefix; +#endif struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; @@ -1406,9 +1485,11 @@ struct bpf_prog_aux { int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; + u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64); #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1459,7 +1540,8 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ + sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ @@ -1492,12 +1574,26 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if underlying BPF program is sleepable we go through tasks trace + * RCU GP and then "classic" RCU GP + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); @@ -1533,10 +1629,82 @@ struct bpf_link_primer { u32 id; }; +struct bpf_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; + + /* BPF token-related delegation options */ + u64 delegate_cmds; + u64 delegate_maps; + u64 delegate_progs; + u64 delegate_attachs; +}; + +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; + u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; +#ifdef CONFIG_SECURITY + void *security; +#endif +}; + struct bpf_struct_ops_value; struct btf_member; #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 +/** + * struct bpf_struct_ops - A structure of callbacks allowing a subsystem to + * define a BPF_MAP_TYPE_STRUCT_OPS map type composed + * of BPF_PROG_TYPE_STRUCT_OPS progs. + * @verifier_ops: A structure of callbacks that are invoked by the verifier + * when determining whether the struct_ops progs in the + * struct_ops map are valid. + * @init: A callback that is invoked a single time, and before any other + * callback, to initialize the structure. A nonzero return value means + * the subsystem could not be initialized. + * @check_member: When defined, a callback invoked by the verifier to allow + * the subsystem to determine if an entry in the struct_ops map + * is valid. A nonzero return value means that the map is + * invalid and should be rejected by the verifier. + * @init_member: A callback that is invoked for each member of the struct_ops + * map to allow the subsystem to initialize the member. A nonzero + * value means the member could not be initialized. This callback + * is exclusive with the @type, @type_id, @value_type, and + * @value_id fields. + * @reg: A callback that is invoked when the struct_ops map has been + * initialized and is being attached to. Zero means the struct_ops map + * has been successfully registered and is live. A nonzero return value + * means the struct_ops map could not be registered. + * @unreg: A callback that is invoked when the struct_ops map should be + * unregistered. + * @update: A callback that is invoked when the live struct_ops map is being + * updated to contain new values. This callback is only invoked when + * the struct_ops map is loaded with BPF_F_LINK. If not defined, the + * it is assumed that the struct_ops map cannot be updated. + * @validate: A callback that is invoked after all of the members have been + * initialized. This callback should perform static checks on the + * map, meaning that it should either fail or succeed + * deterministically. A struct_ops map that has been validated may + * not necessarily succeed in being registered if the call to @reg + * fails. For example, a valid struct_ops map may be loaded, but + * then fail to be registered due to there being another active + * struct_ops map on the system in the subsystem already. For this + * reason, if this callback is not defined, the check is skipped as + * the struct_ops map will have final verification performed in + * @reg. + * @type: BTF type. + * @value_type: Value type. + * @name: The name of the struct bpf_struct_ops object. + * @func_models: Func models + * @type_id: BTF type id. + * @value_id: BTF value id. + */ struct bpf_struct_ops { const struct bpf_verifier_ops *verifier_ops; int (*init)(struct btf *btf); @@ -1550,18 +1718,64 @@ struct bpf_struct_ops { void (*unreg)(void *kdata); int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); - const struct btf_type *type; - const struct btf_type *value_type; + void *cfi_stubs; + struct module *owner; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; +}; + +/* Every member of a struct_ops type has an instance even a member is not + * an operator (function pointer). The "info" field will be assigned to + * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the + * argument information required by the verifier to verify the program. + * + * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the + * corresponding entry for an given argument. + */ +struct bpf_struct_ops_arg_info { + struct bpf_ctx_arg_aux *info; + u32 cnt; +}; + +struct bpf_struct_ops_desc { + struct bpf_struct_ops *st_ops; + + const struct btf_type *type; + const struct btf_type *value_type; u32 type_id; u32 value_id; + + /* Collection of argument information for each member */ + struct bpf_struct_ops_arg_info *arg_info; +}; + +enum bpf_struct_ops_state { + BPF_STRUCT_OPS_STATE_INIT, + BPF_STRUCT_OPS_STATE_INUSE, + BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, +}; + +struct bpf_struct_ops_common_value { + refcount_t refcnt; + enum bpf_struct_ops_state state; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +/* This macro helps developer to register a struct_ops type and generate + * type information correctly. Developers should use this macro to register + * a struct_ops type instead of calling __register_bpf_struct_ops() directly. + */ +#define register_bpf_struct_ops(st_ops, type) \ + ({ \ + struct bpf_struct_ops_##type { \ + struct bpf_struct_ops_common_value common; \ + struct type data ____cacheline_aligned_in_smp; \ + }; \ + BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \ + __register_bpf_struct_ops(st_ops); \ + }) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, @@ -1569,7 +1783,10 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, - void *image, void *image_end); + void *stub_func, + void **image, u32 *image_off, + bool allow_alloc); +void bpf_struct_ops_image_free(void *image); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) @@ -1602,15 +1819,13 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log); +void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); +void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc); #else -static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) -{ - return NULL; -} -static inline void bpf_struct_ops_init(struct btf *btf, - struct bpf_verifier_log *log) -{ -} +#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); @@ -1629,6 +1844,13 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ +} + +static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) +{ +} #endif @@ -1806,6 +2028,7 @@ struct bpf_cg_run_ctx { struct bpf_trace_run_ctx { struct bpf_run_ctx run_ctx; u64 bpf_cookie; + bool is_uprobe; }; struct bpf_tramp_run_ctx { @@ -1854,6 +2077,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array, if (unlikely(!array)) return ret; + run_ctx.is_uprobe = false; + migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; @@ -1878,8 +2103,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array, * rcu-protected dynamically sized maps. */ static __always_inline u32 -bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog) +bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; @@ -1893,20 +2118,22 @@ bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, rcu_read_lock_trace(); migrate_disable(); + run_ctx.is_uprobe = true; + array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); if (unlikely(!array)) goto out; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_lock(); run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); @@ -1938,6 +2165,7 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -1977,6 +2205,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); @@ -2004,6 +2233,8 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); +int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, + unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG_KMEM void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); @@ -2040,26 +2271,57 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, } #endif +static inline int +bpf_map_init_elem_count(struct bpf_map *map) +{ + size_t size = sizeof(*map->elem_count), align = size; + gfp_t flags = GFP_USER | __GFP_NOWARN; + + map->elem_count = bpf_map_alloc_percpu(map, size, align, flags); + if (!map->elem_count) + return -ENOMEM; + + return 0; +} + +static inline void +bpf_map_free_elem_count(struct bpf_map *map) +{ + free_percpu(map->elem_count); +} + +static inline void bpf_map_inc_elem_count(struct bpf_map *map) +{ + this_cpu_inc(*map->elem_count); +} + +static inline void bpf_map_dec_elem_count(struct bpf_map *map) +{ + this_cpu_dec(*map->elem_count); +} + extern int sysctl_unprivileged_bpf_disabled; -static inline bool bpf_allow_ptr_leaks(void) +bool bpf_token_capable(const struct bpf_token *token, int cap); + +static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_allow_uninit_stack(void) +static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v1(void) +static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v4(void) +static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2073,12 +2335,24 @@ void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2308,19 +2582,19 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf_func_model *m); struct bpf_reg_state; -int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); -int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); -int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg); +int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key); +int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key, int last_id); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2367,6 +2641,9 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); + +bool dev_check_flush(void); +bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2436,6 +2713,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void __dev_flush(void) { } @@ -2559,7 +2854,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } @@ -2619,6 +2914,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) } #endif /* CONFIG_BPF_SYSCALL */ +static __always_inline int +bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) +{ + int ret = -EFAULT; + + if (IS_ENABLED(CONFIG_BPF_EVENTS)) + ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); + if (unlikely(ret < 0)) + memset(dst, 0, size); + return ret; +} + void __bpf_free_used_btfs(struct bpf_prog_aux *aux, struct btf_mod_pair *used_btfs, u32 len); @@ -2799,6 +3106,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, @@ -3028,6 +3351,9 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); + void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); @@ -3077,4 +3403,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags) return flags; } +static inline bool bpf_is_subprog(const struct bpf_prog *prog) +{ + return prog->aux->func_idx != 0; +} + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 173ec7f43ed1..dcddb0aef7d8 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -129,10 +129,36 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache, bool bpf_ma); -struct bpf_local_storage_data * +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); +/* If cacheit_lockit is false, this lookup function is lockless */ +static inline struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, - bool cacheit_lockit); + bool cacheit_lockit) +{ + struct bpf_local_storage_data *sdata; + struct bpf_local_storage_elem *selem; + + /* Fast path (cache hit) */ + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], + bpf_rcu_lock_held()); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, + rcu_read_lock_trace_held()) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + + if (!selem) + return NULL; + if (cacheit_lockit) + __bpf_local_storage_insert_cache(local_storage, smap, selem); + return SDATA(selem); +} void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index 3929be5743f4..aaf004d94322 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -11,6 +11,8 @@ struct bpf_mem_caches; struct bpf_mem_alloc { struct bpf_mem_caches __percpu *caches; struct bpf_mem_cache __percpu *cache; + struct obj_cgroup *objcg; + bool percpu; struct work_struct work; }; @@ -20,17 +22,26 @@ struct bpf_mem_alloc { * 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects. * Alloc and free are done with bpf_mem_{alloc,free}() and the size of * the returned object is given by the size argument of bpf_mem_alloc(). + * If percpu equals true, error will be returned in order to avoid + * large memory consumption and the below bpf_mem_alloc_percpu_unit_init() + * should be used to do on-demand per-cpu allocation for each size. */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu); +/* Initialize a non-fix-size percpu memory allocator */ +int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg); +/* The percpu allocation with a specific unit size. */ +int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); /* kmalloc/kfree equivalent: */ void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size); void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr); +void bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr); /* kmem_cache_alloc/free equivalent: */ void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma); void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr); +void bpf_mem_cache_free_rcu(struct bpf_mem_alloc *ma, void *ptr); void bpf_mem_cache_raw_free(void *ptr); void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags); diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h new file mode 100644 index 000000000000..929225f7b095 --- /dev/null +++ b/include/linux/bpf_mprog.h @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023 Isovalent */ +#ifndef __BPF_MPROG_H +#define __BPF_MPROG_H + +#include <linux/bpf.h> + +/* bpf_mprog framework: + * + * bpf_mprog is a generic layer for multi-program attachment. In-kernel users + * of the bpf_mprog don't need to care about the dependency resolution + * internals, they can just consume it with few API calls. Currently available + * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of + * a BPF program or BPF link relative to an existing BPF program or BPF link + * inside the multi-program array as well as prepend and append behavior if + * no relative object was specified, see corresponding selftests for concrete + * examples (e.g. tc_links and tc_opts test cases of test_progs). + * + * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code: + * + * Attach case: + * + * struct bpf_mprog_entry *entry, *entry_new; + * int ret; + * + * // bpf_mprog user-side lock + * // fetch active @entry from attach location + * [...] + * ret = bpf_mprog_attach(entry, &entry_new, [...]); + * if (!ret) { + * if (entry != entry_new) { + * // swap @entry to @entry_new at attach location + * // ensure there are no inflight users of @entry: + * synchronize_rcu(); + * } + * bpf_mprog_commit(entry); + * } else { + * // error path, bail out, propagate @ret + * } + * // bpf_mprog user-side unlock + * + * Detach case: + * + * struct bpf_mprog_entry *entry, *entry_new; + * int ret; + * + * // bpf_mprog user-side lock + * // fetch active @entry from attach location + * [...] + * ret = bpf_mprog_detach(entry, &entry_new, [...]); + * if (!ret) { + * // all (*) marked is optional and depends on the use-case + * // whether bpf_mprog_bundle should be freed or not + * if (!bpf_mprog_total(entry_new)) (*) + * entry_new = NULL (*) + * // swap @entry to @entry_new at attach location + * // ensure there are no inflight users of @entry: + * synchronize_rcu(); + * bpf_mprog_commit(entry); + * if (!entry_new) (*) + * // free bpf_mprog_bundle (*) + * } else { + * // error path, bail out, propagate @ret + * } + * // bpf_mprog user-side unlock + * + * Query case: + * + * struct bpf_mprog_entry *entry; + * int ret; + * + * // bpf_mprog user-side lock + * // fetch active @entry from attach location + * [...] + * ret = bpf_mprog_query(attr, uattr, entry); + * // bpf_mprog user-side unlock + * + * Data/fast path: + * + * struct bpf_mprog_entry *entry; + * struct bpf_mprog_fp *fp; + * struct bpf_prog *prog; + * int ret = [...]; + * + * rcu_read_lock(); + * // fetch active @entry from attach location + * [...] + * bpf_mprog_foreach_prog(entry, fp, prog) { + * ret = bpf_prog_run(prog, [...]); + * // process @ret from program + * } + * [...] + * rcu_read_unlock(); + * + * bpf_mprog locking considerations: + * + * bpf_mprog_{attach,detach,query}() must be protected by an external lock + * (like RTNL in case of tcx). + * + * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx + * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets + * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of + * the bpf_mprog_bundle. + * + * Fast path accesses the active bpf_mprog_entry within RCU critical section + * (in case of tcx it runs in NAPI which provides RCU protection there, + * other users might need explicit rcu_read_lock()). The bpf_mprog_commit() + * assumes that for the old bpf_mprog_entry there are no inflight users + * anymore. + * + * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for + * the replacement case where we don't swap the bpf_mprog_entry. + */ + +#define bpf_mprog_foreach_tuple(entry, fp, cp, t) \ + for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\ + ({ \ + t.prog = READ_ONCE(fp->prog); \ + t.link = cp->link; \ + t.prog; \ + }); \ + fp++, cp++) + +#define bpf_mprog_foreach_prog(entry, fp, p) \ + for (fp = &entry->fp_items[0]; \ + (p = READ_ONCE(fp->prog)); \ + fp++) + +#define BPF_MPROG_MAX 64 + +struct bpf_mprog_fp { + struct bpf_prog *prog; +}; + +struct bpf_mprog_cp { + struct bpf_link *link; +}; + +struct bpf_mprog_entry { + struct bpf_mprog_fp fp_items[BPF_MPROG_MAX]; + struct bpf_mprog_bundle *parent; +}; + +struct bpf_mprog_bundle { + struct bpf_mprog_entry a; + struct bpf_mprog_entry b; + struct bpf_mprog_cp cp_items[BPF_MPROG_MAX]; + struct bpf_prog *ref; + atomic64_t revision; + u32 count; +}; + +struct bpf_tuple { + struct bpf_prog *prog; + struct bpf_link *link; +}; + +static inline struct bpf_mprog_entry * +bpf_mprog_peer(const struct bpf_mprog_entry *entry) +{ + if (entry == &entry->parent->a) + return &entry->parent->b; + else + return &entry->parent->a; +} + +static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle) +{ + BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64)); + BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) != + ARRAY_SIZE(bundle->cp_items)); + + memset(bundle, 0, sizeof(*bundle)); + atomic64_set(&bundle->revision, 1); + bundle->a.parent = bundle; + bundle->b.parent = bundle; +} + +static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry) +{ + entry->parent->count++; +} + +static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry) +{ + entry->parent->count--; +} + +static inline int bpf_mprog_max(void) +{ + return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1; +} + +static inline int bpf_mprog_total(struct bpf_mprog_entry *entry) +{ + int total = entry->parent->count; + + WARN_ON_ONCE(total > bpf_mprog_max()); + return total; +} + +static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry, + struct bpf_prog *prog) +{ + const struct bpf_mprog_fp *fp; + const struct bpf_prog *tmp; + + bpf_mprog_foreach_prog(entry, fp, tmp) { + if (tmp == prog) + return true; + } + return false; +} + +static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry, + struct bpf_tuple *tuple) +{ + WARN_ON_ONCE(entry->parent->ref); + if (!tuple->link) + entry->parent->ref = tuple->prog; +} + +static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry) +{ + /* In the non-link case prog deletions can only drop the reference + * to the prog after the bpf_mprog_entry got swapped and the + * bpf_mprog ensured that there are no inflight users anymore. + * + * Paired with bpf_mprog_mark_for_release(). + */ + if (entry->parent->ref) { + bpf_prog_put(entry->parent->ref); + entry->parent->ref = NULL; + } +} + +static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry) +{ + atomic64_inc(&entry->parent->revision); +} + +static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry) +{ + bpf_mprog_complete_release(entry); + bpf_mprog_revision_new(entry); +} + +static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry) +{ + return atomic64_read(&entry->parent->revision); +} + +static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst, + struct bpf_mprog_entry *src) +{ + memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items)); +} + +static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst) +{ + memset(dst->fp_items, 0, sizeof(dst->fp_items)); +} + +static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry, + struct bpf_mprog_entry **entry_new) +{ + struct bpf_mprog_entry *peer; + + peer = bpf_mprog_peer(entry); + bpf_mprog_entry_clear(peer); + peer->parent->count = 0; + *entry_new = peer; +} + +static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx) +{ + int total = bpf_mprog_total(entry); + + memmove(entry->fp_items + idx + 1, + entry->fp_items + idx, + (total - idx) * sizeof(struct bpf_mprog_fp)); + + memmove(entry->parent->cp_items + idx + 1, + entry->parent->cp_items + idx, + (total - idx) * sizeof(struct bpf_mprog_cp)); +} + +static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx) +{ + /* Total array size is needed in this case to enure the NULL + * entry is copied at the end. + */ + int total = ARRAY_SIZE(entry->fp_items); + + memmove(entry->fp_items + idx, + entry->fp_items + idx + 1, + (total - idx - 1) * sizeof(struct bpf_mprog_fp)); + + memmove(entry->parent->cp_items + idx, + entry->parent->cp_items + idx + 1, + (total - idx - 1) * sizeof(struct bpf_mprog_cp)); +} + +static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx, + struct bpf_mprog_fp **fp, + struct bpf_mprog_cp **cp) +{ + *fp = &entry->fp_items[idx]; + *cp = &entry->parent->cp_items[idx]; +} + +static inline void bpf_mprog_write(struct bpf_mprog_fp *fp, + struct bpf_mprog_cp *cp, + struct bpf_tuple *tuple) +{ + WRITE_ONCE(fp->prog, tuple->prog); + cp->link = tuple->link; +} + +int bpf_mprog_attach(struct bpf_mprog_entry *entry, + struct bpf_mprog_entry **entry_new, + struct bpf_prog *prog_new, struct bpf_link *link, + struct bpf_prog *prog_old, + u32 flags, u32 id_or_fd, u64 revision); + +int bpf_mprog_detach(struct bpf_mprog_entry *entry, + struct bpf_mprog_entry **entry_new, + struct bpf_prog *prog, struct bpf_link *link, + u32 flags, u32 id_or_fd, u64 revision); + +int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, + struct bpf_mprog_entry *entry); + +static inline bool bpf_mprog_supported(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_SCHED_CLS: + return true; + default: + return false; + } +} +#endif /* __BPF_MPROG_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fc0d6f32c687..9f2a6b83b49e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -132,6 +132,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) @@ -142,9 +143,13 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETFILTER, netfilter) +BPF_LINK_TYPE(BPF_LINK_TYPE_TCX, tcx) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETKIT, netkit) #endif #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) +BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f70f9ac884d2..7cb1b75eee38 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -275,6 +275,11 @@ struct bpf_reference_state { int callback_ref; }; +struct bpf_retval_range { + s32 minval; + s32 maxval; +}; + /* state of the program: * type of all registers and stack info */ @@ -297,23 +302,67 @@ struct bpf_func_state { * void foo(void) { bpf_timer_set_callback(,foo); } */ u32 async_entry_cnt; + struct bpf_retval_range callback_ret_range; bool in_callback_fn; - struct tnum callback_ret_range; bool in_async_callback_fn; + bool in_exception_callback_fn; + /* For callback calling functions that limit number of possible + * callback executions (e.g. bpf_loop) keeps track of current + * simulated iteration number. + * Value in frame N refers to number of times callback with frame + * N+1 was simulated, e.g. for the following call: + * + * bpf_loop(..., fn, ...); | suppose current frame is N + * | fn would be simulated in frame N+1 + * | number of simulations is tracked in frame N + */ + u32 callback_depth; /* The following fields should be last. See copy_func_state() */ int acquired_refs; struct bpf_reference_state *refs; - int allocated_stack; + /* The state of the stack. Each element of the array describes BPF_REG_SIZE + * (i.e. 8) bytes worth of stack memory. + * stack[0] represents bytes [*(r10-8)..*(r10-1)] + * stack[1] represents bytes [*(r10-16)..*(r10-9)] + * ... + * stack[allocated_stack/8 - 1] represents [*(r10-allocated_stack)..*(r10-allocated_stack+7)] + */ struct bpf_stack_state *stack; + /* Size of the current stack, in bytes. The stack state is tracked below, in + * `stack`. allocated_stack is always a multiple of BPF_REG_SIZE. + */ + int allocated_stack; }; -struct bpf_idx_pair { - u32 prev_idx; +#define MAX_CALL_FRAMES 8 + +/* instruction history flags, used in bpf_jmp_history_entry.flags field */ +enum { + /* instruction references stack slot through PTR_TO_STACK register; + * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) + * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512, + * 8 bytes per slot, so slot index (spi) is [0, 63]) + */ + INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */ + + INSN_F_SPI_MASK = 0x3f, /* 6 bits */ + INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ + + INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ +}; + +static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); +static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); + +struct bpf_jmp_history_entry { u32 idx; + /* insn idx can't be bigger than 1 million */ + u32 prev_idx : 22; + /* special flags, e.g., whether insn is doing register stack spill/load */ + u32 flags : 10; }; -#define MAX_CALL_FRAMES 8 /* Maximum number of register states that can exist at once */ #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) struct bpf_verifier_state { @@ -372,32 +421,49 @@ struct bpf_verifier_state { struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + bool used_as_loop_entry; /* first and last insn idx of this verifier state */ u32 first_insn_idx; u32 last_insn_idx; + /* If this state is a part of states loop this field points to some + * parent of this state such that: + * - it is also a member of the same states loop; + * - DFS states traversal starting from initial state visits loop_entry + * state before this state. + * Used to compute topmost loop entry for state loops. + * State loops might appear because of open coded iterators logic. + * See get_loop_entry() for more information. + */ + struct bpf_verifier_state *loop_entry; /* jmp history recorded from first to last. * backtracking is using it to go from last to first. * For most states jmp_history_cnt is [0-3]. * For loops can go up to ~40. */ - struct bpf_idx_pair *jmp_history; + struct bpf_jmp_history_entry *jmp_history; u32 jmp_history_cnt; + u32 dfs_depth; + u32 callback_unroll_depth; + u32 may_goto_depth; }; -#define bpf_get_spilled_reg(slot, frame) \ +#define bpf_get_spilled_reg(slot, frame, mask) \ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ - (frame->stack[slot].slot_type[0] == STACK_SPILL)) \ + ((1 << frame->stack[slot].slot_type[BPF_REG_SIZE - 1]) & (mask))) \ ? &frame->stack[slot].spilled_ptr : NULL) /* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ -#define bpf_for_each_spilled_reg(iter, frame, reg) \ - for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \ +#define bpf_for_each_spilled_reg(iter, frame, reg, mask) \ + for (iter = 0, reg = bpf_get_spilled_reg(iter, frame, mask); \ iter < frame->allocated_stack / BPF_REG_SIZE; \ - iter++, reg = bpf_get_spilled_reg(iter, frame)) + iter++, reg = bpf_get_spilled_reg(iter, frame, mask)) -/* Invoke __expr over regsiters in __vst, setting __state and __reg */ -#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ +#define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __mask, __expr) \ ({ \ struct bpf_verifier_state *___vstate = __vst; \ int ___i, ___j; \ @@ -409,7 +475,7 @@ struct bpf_verifier_state { __reg = &___regs[___j]; \ (void)(__expr); \ } \ - bpf_for_each_spilled_reg(___j, __state, __reg) { \ + bpf_for_each_spilled_reg(___j, __state, __reg, __mask) { \ if (!__reg) \ continue; \ (void)(__expr); \ @@ -417,6 +483,10 @@ struct bpf_verifier_state { } \ }) +/* Invoke __expr over regsiters in __vst, setting __state and __reg */ +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, 1 << STACK_SPILL, __expr) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; @@ -478,8 +548,10 @@ struct bpf_insn_aux_data { u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ + bool needs_zext; /* alu op needs to clear upper bits */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */ + bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ @@ -490,6 +562,10 @@ struct bpf_insn_aux_data { * this instruction, regardless of any heuristics */ bool force_checkpoint; + /* true if instruction is a call to a helper function that + * accepts callback function as a parameter. + */ + bool calls_callback; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -532,15 +608,30 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) #define BPF_MAX_SUBPROGS 256 +struct bpf_subprog_arg_info { + enum bpf_arg_type arg_type; + union { + u32 mem_size; + u32 btf_id; + }; +}; + struct bpf_subprog_info { /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ - bool has_tail_call; - bool tail_call_reachable; - bool has_ld_abs; - bool is_async_cb; + u16 stack_extra; + bool has_tail_call: 1; + bool tail_call_reachable: 1; + bool has_ld_abs: 1; + bool is_cb: 1; + bool is_async_cb: 1; + bool is_exception_cb: 1; + bool args_cached: 1; + + u8 arg_cnt; + struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; }; struct bpf_verifier_env; @@ -575,10 +666,12 @@ struct bpf_verifier_env { u32 prev_insn_idx; struct bpf_prog *prog; /* eBPF program being verified */ const struct bpf_verifier_ops *ops; + struct module *attach_btf_mod; /* The owner module of prog->aux->attach_btf */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ bool test_state_freq; /* test verifier with different pruning frequency */ + bool test_reg_invariants; /* fail verification on register invariants violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; @@ -587,17 +680,24 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + u32 hidden_subprog_cnt; /* number of hidden subprogs */ + int exception_callback_subprog; bool explore_alu_limits; bool allow_ptr_leaks; + /* Allow access to uninitialized stack memory. Writes with fixed offset are + * always allowed, so this refers to reads (with fixed or variable offset), + * to writes with variable offset and to indirect (helper) accesses. + */ bool allow_uninit_stack; bool bpf_capable; bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; + bool seen_exception; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; - struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */ union { struct bpf_idmap idmap_scratch; struct bpf_idset idset_scratch; @@ -608,6 +708,7 @@ struct bpf_verifier_env { int cur_stack; } cfg; struct backtrack_state bt; + struct bpf_jmp_history_entry *cur_hist_ent; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ @@ -642,6 +743,16 @@ struct bpf_verifier_env { char tmp_str_buf[TMP_STR_BUF_LEN]; }; +static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) +{ + return &env->prog->aux->func_info_aux[subprog]; +} + +static inline struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env, int subprog) +{ + return &env->subprog_info[subprog]; +} + __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, @@ -653,6 +764,10 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos); int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual); +__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, + u32 insn_off, + const char *prefix_fmt, ...); + static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; @@ -675,14 +790,6 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); -int check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno); -int check_func_arg_reg_off(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type); -int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno, u32 mem_size); - /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, struct btf *btf, u32 btf_id) @@ -745,11 +852,92 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) } } -#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED) +#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED | NON_OWN_REF) static inline bool bpf_type_has_unsafe_modifiers(u32 type) { return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS; } +static inline bool type_is_ptr_alloc_obj(u32 type) +{ + return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC; +} + +static inline bool type_is_non_owning_ref(u32 type) +{ + return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF; +} + +static inline bool type_is_pkt_pointer(enum bpf_reg_type type) +{ + type = base_type(type); + return type == PTR_TO_PACKET || + type == PTR_TO_PACKET_META; +} + +static inline bool type_is_sk_pointer(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCK_COMMON || + type == PTR_TO_TCP_SOCK || + type == PTR_TO_XDP_SOCK; +} + +static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) +{ + env->scratched_regs |= 1U << regno; +} + +static inline void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) +{ + env->scratched_stack_slots |= 1ULL << spi; +} + +static inline bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) +{ + return (env->scratched_regs >> regno) & 1; +} + +static inline bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno) +{ + return (env->scratched_stack_slots >> regno) & 1; +} + +static inline bool verifier_state_scratched(const struct bpf_verifier_env *env) +{ + return env->scratched_regs || env->scratched_stack_slots; +} + +static inline void mark_verifier_state_clean(struct bpf_verifier_env *env) +{ + env->scratched_regs = 0U; + env->scratched_stack_slots = 0ULL; +} + +/* Used for printing the entire verifier state. */ +static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env) +{ + env->scratched_regs = ~0U; + env->scratched_stack_slots = ~0ULL; +} + +static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size) +{ +#ifdef __BIG_ENDIAN + off -= spill_size - fill_size; +#endif + + return !(off % BPF_REG_SIZE); +} + +const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); +const char *dynptr_type_str(enum bpf_dynptr_type type); +const char *iter_type_str(const struct btf *btf, u32 btf_id); +const char *iter_state_str(enum bpf_iter_state state); + +void print_verifier_state(struct bpf_verifier_env *env, + const struct bpf_func_state *state, bool print_all); +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h deleted file mode 100644 index 736ded4905e0..000000000000 --- a/include/linux/bpfilter.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BPFILTER_H -#define _LINUX_BPFILTER_H - -#include <uapi/linux/bpfilter.h> -#include <linux/usermode_driver.h> -#include <linux/sockptr.h> - -struct sock; -int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen); -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, - int __user *optlen); - -struct bpfilter_umh_ops { - struct umd_info info; - /* since ip_getsockopt() can run in parallel, serialize access to umh */ - struct mutex lock; - int (*sockopt)(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen, bool is_set); - int (*start)(void); -}; -extern struct bpfilter_umh_ops bpfilter_ops; -#endif diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 5d732f48f787..1394ba302367 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 +#define PHY_ID_BCM5221 0x004061e0 #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 @@ -44,6 +45,7 @@ #define PHY_ID_BCM7366 0x600d8490 #define PHY_ID_BCM7346 0x600d8650 #define PHY_ID_BCM7362 0x600d84b0 +#define PHY_ID_BCM74165 0x359052c0 #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7435 0x600d8750 @@ -330,6 +332,15 @@ #define BCM54XX_WOL_INT_STATUS (MII_BCM54XX_EXP_SEL_WOL + 0x94) +/* BCM5221 Registers */ +#define BCM5221_AEGSR 0x1C +#define BCM5221_AEGSR_MDIX_STATUS BIT(13) +#define BCM5221_AEGSR_MDIX_MAN_SWAP BIT(12) +#define BCM5221_AEGSR_MDIX_DIS BIT(11) + +#define BCM5221_SHDW_AM4_EN_CLK_LPM BIT(2) +#define BCM5221_SHDW_AM4_FORCE_LPM BIT(1) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ /*****************************************************************************/ diff --git a/include/linux/btf.h b/include/linux/btf.h index cac9f304e27a..f9e56fd12a9f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -74,6 +74,7 @@ #define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ +#define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the @@ -83,6 +84,17 @@ */ #define __bpf_kfunc __used noinline +#define __bpf_kfunc_start_defs() \ + __diag_push(); \ + __diag_ignore_all("-Wmissing-declarations", \ + "Global kfuncs as their definitions will be in BTF");\ + __diag_ignore_all("-Wmissing-prototypes", \ + "Global kfuncs as their definitions will be in BTF") + +#define __bpf_kfunc_end_defs() __diag_pop() +#define __bpf_hook_start() __bpf_kfunc_start_defs() +#define __bpf_hook_end() __bpf_kfunc_end_defs() + /* * Return the name of the passed struct, if exists, or halt the build if for * example the structure gets renamed. In this way, developers have to revisit @@ -125,6 +137,7 @@ struct btf_struct_metas { extern const struct file_operations btf_fops; +const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); @@ -204,13 +217,12 @@ u32 btf_nr_types(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); -int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); -int btf_find_timer(const struct btf *btf, const struct btf_type *t); struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size); int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); +s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, u32 id, u32 *res_id); const struct btf_type *btf_type_resolve_ptr(const struct btf *btf, @@ -483,8 +495,26 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); } +bool btf_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix); +int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, + u32 arg_no); + struct bpf_verifier_log; +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +struct bpf_struct_ops; +int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops); +const struct bpf_struct_ops_desc *bpf_struct_ops_find_value(struct btf *btf, u32 value_id); +const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id); +#else +static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id) +{ + return NULL; +} +#endif + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); @@ -501,10 +531,9 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); -const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg); +bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg); int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); @@ -544,12 +573,12 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf { return NULL; } -static inline const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg) +static inline bool +btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) { - return NULL; + return false; } static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) { diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 00950cc03bff..e24aabfe8ecc 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -8,6 +8,9 @@ struct btf_id_set { u32 ids[]; }; +/* This flag implies BTF_SET8 holds kfunc(s) */ +#define BTF_SET8_KFUNCS (1 << 0) + struct btf_id_set8 { u32 cnt; u32 flags; @@ -21,6 +24,7 @@ struct btf_id_set8 { #include <linux/compiler.h> /* for __PASTE */ #include <linux/compiler_attributes.h> /* for __maybe_unused */ +#include <linux/stringify.h> /* * Following macros help to define lists of BTF IDs placed @@ -49,7 +53,7 @@ word \ ____BTF_ID(symbol, word) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing @@ -183,17 +187,18 @@ extern struct btf_id_set name; * .word (1 << 3) | (1 << 1) | (1 << 2) * */ -#define __BTF_SET8_START(name, scope) \ +#define __BTF_SET8_START(name, scope, flags) \ +__BTF_ID_LIST(name, local) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ "." #scope " __BTF_ID__set8__" #name "; \n" \ "__BTF_ID__set8__" #name ":; \n" \ -".zero 8 \n" \ +".zero 4 \n" \ +".long " __stringify(flags) "\n" \ ".popsection; \n"); #define BTF_SET8_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET8_START(name, local) +__BTF_SET8_START(name, local, 0) #define BTF_SET8_END(name) \ asm( \ @@ -202,6 +207,12 @@ asm( \ ".popsection; \n"); \ extern struct btf_id_set8 name; +#define BTF_KFUNCS_START(name) \ +__BTF_SET8_START(name, local, BTF_SET8_KFUNCS) + +#define BTF_KFUNCS_END(name) \ +BTF_SET8_END(name) + #else #define BTF_ID_LIST(name) static u32 __maybe_unused name[64]; @@ -216,6 +227,8 @@ extern struct btf_id_set8 name; #define BTF_SET_END(name) #define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; #define BTF_SET8_END(name) +#define BTF_KFUNCS_START(name) static struct btf_id_set8 __maybe_unused name = { .flags = BTF_SET8_KFUNCS }; +#define BTF_KFUNCS_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ @@ -267,5 +280,6 @@ MAX_BTF_TRACING_TYPE, extern u32 btf_tracing_ids[]; extern u32 bpf_cgroup_btf_id[]; extern u32 bpf_local_storage_map_btf_id[]; +extern u32 btf_bpf_map_id[]; #endif diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6cb3e9af78c9..d78454a4dd1f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -16,8 +16,6 @@ #include <linux/wait.h> #include <linux/atomic.h> -#ifdef CONFIG_BLOCK - enum bh_state_bits { BH_Uptodate, /* Contains valid data */ BH_Dirty, /* Is dirty */ @@ -173,7 +171,10 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) return test_bit_acquire(BH_Uptodate, &bh->b_state); } -#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) +static inline unsigned long bh_offset(const struct buffer_head *bh) +{ + return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1); +} /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ @@ -194,29 +195,19 @@ void buffer_check_dirty_writeback(struct folio *folio, void mark_buffer_dirty(struct buffer_head *bh); void mark_buffer_write_io_error(struct buffer_head *bh); void touch_buffer(struct buffer_head *bh); -void set_bh_page(struct buffer_head *bh, - struct page *page, unsigned long offset); void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset); -bool try_to_free_buffers(struct folio *); struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, - bool retry); + gfp_t gfp); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry); -void create_empty_buffers(struct page *, unsigned long, - unsigned long b_state); -void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize, - unsigned long b_state); +struct buffer_head *create_empty_buffers(struct folio *folio, + unsigned long blocksize, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); -void end_buffer_async_write(struct buffer_head *bh, int uptodate); /* Things to do with buffers at mapping->private_list */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); -int inode_has_buffers(struct inode *); -void invalidate_inode_buffers(struct inode *); -int remove_inode_buffers(struct inode *inode); -int sync_mapping_buffers(struct address_space *mapping); int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, bool datasync); int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, @@ -233,16 +224,13 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); -struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, - unsigned size, gfp_t gfp); +struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); -void invalidate_bh_lrus(void); -void invalidate_bh_lrus_cpu(void); -bool has_bh_in_lru(int cpu, void *dummy); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); @@ -258,18 +246,15 @@ int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait); void __bh_read_batch(int nr, struct buffer_head *bhs[], blk_opf_t op_flags, bool force_lock); -extern int buffer_heads_over_limit; - /* * Generic address_space_operations implementations for buffer_head-backed * address_spaces. */ void block_invalidate_folio(struct folio *folio, size_t offset, size_t length); -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); +int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, + void *get_block); int __block_write_full_folio(struct inode *inode, struct folio *folio, - get_block_t *get_block, struct writeback_control *wbc, - bh_end_io_t *handler); + get_block_t *get_block, struct writeback_control *wbc); int block_read_full_folio(struct folio *, get_block_t *); bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, @@ -283,26 +268,13 @@ int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); -void clean_page_buffers(struct page *page); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, struct page **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); -int block_commit_write(struct page *page, unsigned from, unsigned to); +void block_commit_write(struct page *page, unsigned int from, unsigned int to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); -/* Convert errno to return value from ->page_mkwrite() call */ -static inline vm_fault_t block_page_mkwrite_return(int err) -{ - if (err == 0) - return VM_FAULT_LOCKED; - if (err == -EFAULT || err == -EAGAIN) - return VM_FAULT_NOPAGE; - if (err == -ENOMEM) - return VM_FAULT_OOM; - /* -ENOSPC, -EDQUOT, -EIO ... */ - return VM_FAULT_SIGBUS; -} sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); @@ -316,8 +288,6 @@ extern int buffer_migrate_folio_norefs(struct address_space *, #define buffer_migrate_folio_norefs NULL #endif -void buffer_init(void); - /* * inline definitions */ @@ -363,17 +333,38 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } -static inline struct buffer_head * -sb_getblk(struct super_block *sb, sector_t block) +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, unsigned size) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); + gfp_t gfp; + + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); } +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, unsigned size) +{ + gfp_t gfp; -static inline struct buffer_head * -sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_MOVABLE | __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); +} + +static inline struct buffer_head *sb_getblk(struct super_block *sb, + sector_t block) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); + return __getblk(sb->s_bdev, block, sb->s_blocksize); +} + +static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb, + sector_t block, gfp_t gfp) +{ + return bdev_getblk(sb->s_bdev, block, sb->s_blocksize, gfp); } static inline struct buffer_head * @@ -410,20 +401,6 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } -static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, 0); -} - -static inline struct buffer_head *__getblk(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); -} - static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) { if (!buffer_uptodate(bh) && trylock_buffer(bh)) { @@ -475,9 +452,44 @@ __bread(struct block_device *bdev, sector_t block, unsigned size) return __bread_gfp(bdev, block, size, __GFP_MOVABLE); } +/** + * get_nth_bh - Get a reference on the n'th buffer after this one. + * @bh: The buffer to start counting from. + * @count: How many buffers to skip. + * + * This is primarily useful for finding the nth buffer in a folio; in + * that case you pass the head buffer and the byte offset in the folio + * divided by the block size. It can be used for other purposes, but + * it will wrap at the end of the folio rather than returning NULL or + * proceeding to the next folio for you. + * + * Return: The requested buffer with an elevated refcount. + */ +static inline __must_check +struct buffer_head *get_nth_bh(struct buffer_head *bh, unsigned int count) +{ + while (count--) + bh = bh->b_this_page; + get_bh(bh); + return bh; +} + bool block_dirty_folio(struct address_space *mapping, struct folio *folio); -#else /* CONFIG_BLOCK */ +#ifdef CONFIG_BUFFER_HEAD + +void buffer_init(void); +bool try_to_free_buffers(struct folio *folio); +int inode_has_buffers(struct inode *inode); +void invalidate_inode_buffers(struct inode *inode); +int remove_inode_buffers(struct inode *inode); +int sync_mapping_buffers(struct address_space *mapping); +void invalidate_bh_lrus(void); +void invalidate_bh_lrus_cpu(void); +bool has_bh_in_lru(int cpu, void *dummy); +extern int buffer_heads_over_limit; + +#else /* CONFIG_BUFFER_HEAD */ static inline void buffer_init(void) {} static inline bool try_to_free_buffers(struct folio *folio) { return true; } @@ -485,9 +497,10 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline void invalidate_bh_lrus(void) {} static inline void invalidate_bh_lrus_cpu(void) {} static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } #define buffer_heads_over_limit 0 -#endif /* CONFIG_BLOCK */ +#endif /* CONFIG_BUFFER_HEAD */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 3b7a0ff4642f..20aa3c2d89f7 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -2,15 +2,16 @@ #ifndef _LINUX_BUILDID_H #define _LINUX_BUILDID_H -#include <linux/mm_types.h> +#include <linux/types.h> #define BUILD_ID_SIZE_MAX 20 +struct vm_area_struct; int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE) +#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX]; void init_vmlinux_build_id(void); #else diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 555aae5448ae..bd1e361b351c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -83,7 +83,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/cache.h b/include/linux/cache.h index 9900d20b76c2..0ecb17bb6883 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -85,6 +85,31 @@ #define cache_line_size() L1_CACHE_BYTES #endif +#ifndef __cacheline_group_begin +#define __cacheline_group_begin(GROUP) \ + __u8 __cacheline_group_begin__##GROUP[0] +#endif + +#ifndef __cacheline_group_end +#define __cacheline_group_end(GROUP) \ + __u8 __cacheline_group_end__##GROUP[0] +#endif + +#ifndef CACHELINE_ASSERT_GROUP_MEMBER +#define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \ + BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \ + offsetofend(TYPE, __cacheline_group_begin__##GROUP) && \ + offsetofend(TYPE, MEMBER) <= \ + offsetof(TYPE, __cacheline_group_end__##GROUP))) +#endif + +#ifndef CACHELINE_ASSERT_GROUP_SIZE +#define CACHELINE_ASSERT_GROUP_SIZE(TYPE, GROUP, SIZE) \ + BUILD_BUG_ON(offsetof(TYPE, __cacheline_group_end__##GROUP) - \ + offsetofend(TYPE, __cacheline_group_begin__##GROUP) > \ + SIZE) +#endif + /* * Helper to add padding within a struct to ensure data fall into separate * cachelines. diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h index a6189d21f2ba..55f297b2c23f 100644 --- a/include/linux/cacheflush.h +++ b/include/linux/cacheflush.h @@ -7,14 +7,23 @@ struct folio; #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +#ifndef flush_dcache_folio void flush_dcache_folio(struct folio *folio); #endif #else static inline void flush_dcache_folio(struct folio *folio) { } -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0 +#define flush_dcache_folio flush_dcache_folio #endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */ +#ifndef flush_icache_pages +static inline void flush_icache_pages(struct vm_area_struct *vma, + struct page *page, unsigned int nr) +{ +} +#endif + +#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1) + #endif /* _LINUX_CACHEFLUSH_H */ diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index a5cfd44fab45..2cb15fe4fe12 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -73,6 +73,7 @@ struct cacheinfo { struct cpu_cacheinfo { struct cacheinfo *info_list; + unsigned int per_cpu_data_slice_size; unsigned int num_levels; unsigned int num_leaves; bool cpu_map_populated; @@ -137,4 +138,10 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level) #define use_arch_cache_info() (false) #endif +#ifndef CONFIG_ARCH_HAS_CPU_CACHE_ALIASING +#define cpu_dcache_is_aliasing() false +#else +#include <asm/cachetype.h> +#endif + #endif /* _LINUX_CACHEINFO_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 982ba245eb41..1b92aed49363 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -195,6 +195,10 @@ int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); const char *can_get_state_str(const enum can_state state); +void can_state_get_by_berr_counter(const struct net_device *dev, + const struct can_berr_counter *bec, + enum can_state *tx_state, + enum can_state *rx_state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index c205c51d79c9..d29bb4521947 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -3,7 +3,7 @@ * linux/can/rx-offload.h * * Copyright (c) 2014 David Jander, Protonic Holland - * Copyright (c) 2014-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> + * Copyright (c) 2014-2017, 2023 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> */ #ifndef _CAN_RX_OFFLOAD_H @@ -44,11 +44,14 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); int can_rx_offload_queue_timestamp(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); -unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, - unsigned int idx, u32 timestamp, - unsigned int *frame_len_ptr); +unsigned int can_rx_offload_get_echo_skb_queue_timestamp(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp, + unsigned int *frame_len_ptr); int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb); +unsigned int can_rx_offload_get_echo_skb_queue_tail(struct can_rx_offload *offload, + unsigned int idx, + unsigned int *frame_len_ptr); void can_rx_offload_irq_finish(struct can_rx_offload *offload); void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index cb0d6cd1c12f..60693a145894 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -90,6 +90,14 @@ enum cc_attr { * Examples include TDX Guest. */ CC_ATTR_HOTPLUG_DISABLED, + + /** + * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. + * + * The host kernel is running with the necessary features + * enabled to run SEV-SNP guests. + */ + CC_ATTR_HOST_SEV_SNP, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM @@ -107,10 +115,14 @@ enum cc_attr { * * FALSE - Specified Confidential Computing attribute is not active */ bool cc_platform_has(enum cc_attr attr); +void cc_platform_set(enum cc_attr attr); +void cc_platform_clear(enum cc_attr attr); #else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ static inline bool cc_platform_has(enum cc_attr attr) { return false; } +static inline void cc_platform_set(enum cc_attr attr) { } +static inline void cc_platform_clear(enum cc_attr attr) { } #endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h index bead71b7bc73..b57118aaa679 100644 --- a/include/linux/cdx/cdx_bus.h +++ b/include/linux/cdx/cdx_bus.h @@ -12,6 +12,7 @@ #include <linux/device.h> #include <linux/list.h> #include <linux/mod_devicetable.h> +#include <linux/msi.h> #define MAX_CDX_DEV_RESOURCES 4 #define CDX_CONTROLLER_ID_SHIFT 4 @@ -21,13 +22,31 @@ struct cdx_controller; enum { + CDX_DEV_MSI_CONF, + CDX_DEV_BUS_MASTER_CONF, CDX_DEV_RESET_CONF, + CDX_DEV_MSI_ENABLE, +}; + +struct cdx_msi_config { + u64 addr; + u32 data; + u16 msi_index; }; struct cdx_device_config { u8 type; + union { + struct cdx_msi_config msi; + bool bus_master_enable; + bool msi_enable; + }; }; +typedef int (*cdx_bus_enable_cb)(struct cdx_controller *cdx, u8 bus_num); + +typedef int (*cdx_bus_disable_cb)(struct cdx_controller *cdx, u8 bus_num); + typedef int (*cdx_scan_cb)(struct cdx_controller *cdx); typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, @@ -35,6 +54,19 @@ typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, struct cdx_device_config *dev_config); /** + * CDX_DEVICE - macro used to describe a specific CDX device + * @vend: the 16 bit CDX Vendor ID + * @dev: the 16 bit CDX Device ID + * + * This macro is used to create a struct cdx_device_id that matches a + * specific device. The subvendor and subdevice fields will be set to + * CDX_ANY_ID. + */ +#define CDX_DEVICE(vend, dev) \ + .vendor = (vend), .device = (dev), \ + .subvendor = CDX_ANY_ID, .subdevice = CDX_ANY_ID + +/** * CDX_DEVICE_DRIVER_OVERRIDE - macro used to describe a CDX device with * override_only flags. * @vend: the 16 bit CDX Vendor ID @@ -42,18 +74,24 @@ typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx, * @driver_override: the 32 bit CDX Device override_only * * This macro is used to create a struct cdx_device_id that matches only a - * driver_override device. + * driver_override device. The subvendor and subdevice fields will be set to + * CDX_ANY_ID. */ #define CDX_DEVICE_DRIVER_OVERRIDE(vend, dev, driver_override) \ - .vendor = (vend), .device = (dev), .override_only = (driver_override) + .vendor = (vend), .device = (dev), .subvendor = CDX_ANY_ID,\ + .subdevice = CDX_ANY_ID, .override_only = (driver_override) /** * struct cdx_ops - Callbacks supported by CDX controller. + * @bus_enable: enable bus on the controller + * @bus_disable: disable bus on the controller * @scan: scan the devices on the controller * @dev_configure: configuration like reset, master_enable, * msi_config etc for a CDX device */ struct cdx_ops { + cdx_bus_enable_cb bus_enable; + cdx_bus_disable_cb bus_disable; cdx_scan_cb scan; cdx_dev_configure_cb dev_configure; }; @@ -62,13 +100,17 @@ struct cdx_ops { * struct cdx_controller: CDX controller object * @dev: Linux device associated with the CDX controller. * @priv: private data + * @msi_domain: MSI domain * @id: Controller ID + * @controller_registered: controller registered with bus * @ops: CDX controller ops */ struct cdx_controller { struct device *dev; void *priv; + struct irq_domain *msi_domain; u32 id; + bool controller_registered; struct cdx_ops *ops; }; @@ -78,36 +120,68 @@ struct cdx_controller { * @cdx: CDX controller associated with the device * @vendor: Vendor ID for CDX device * @device: Device ID for CDX device + * @subsystem_vendor: Subsystem Vendor ID for CDX device + * @subsystem_device: Subsystem Device ID for CDX device + * @class: Class for the CDX device + * @revision: Revision of the CDX device * @bus_num: Bus number for this CDX device * @dev_num: Device number for this device * @res: array of MMIO region entries * @res_attr: resource binary attribute + * @debugfs_dir: debugfs directory for this device * @res_count: number of valid MMIO regions * @dma_mask: Default DMA mask * @flags: CDX device flags * @req_id: Requestor ID associated with CDX device + * @is_bus: Is this bus device + * @enabled: is this bus enabled + * @msi_dev_id: MSI Device ID associated with CDX device + * @num_msi: Number of MSI's supported by the device * @driver_override: driver name to force a match; do not set directly, * because core frees it; use driver_set_override() to * set or clear it. + * @irqchip_lock: lock to synchronize irq/msi configuration + * @msi_write_pending: MSI write pending for this device */ struct cdx_device { struct device dev; struct cdx_controller *cdx; u16 vendor; u16 device; + u16 subsystem_vendor; + u16 subsystem_device; + u32 class; + u8 revision; u8 bus_num; u8 dev_num; struct resource res[MAX_CDX_DEV_RESOURCES]; + struct bin_attribute *res_attr[MAX_CDX_DEV_RESOURCES]; + struct dentry *debugfs_dir; u8 res_count; u64 dma_mask; u16 flags; u32 req_id; + bool is_bus; + bool enabled; + u32 msi_dev_id; + u32 num_msi; const char *driver_override; + struct mutex irqchip_lock; + bool msi_write_pending; }; #define to_cdx_device(_dev) \ container_of(_dev, struct cdx_device, dev) +#define cdx_resource_start(dev, num) ((dev)->res[(num)].start) +#define cdx_resource_end(dev, num) ((dev)->res[(num)].end) +#define cdx_resource_flags(dev, num) ((dev)->res[(num)].flags) +#define cdx_resource_len(dev, num) \ + ((cdx_resource_start((dev), (num)) == 0 && \ + cdx_resource_end((dev), (num)) == \ + cdx_resource_start((dev), (num))) ? 0 : \ + (cdx_resource_end((dev), (num)) - \ + cdx_resource_start((dev), (num)) + 1)) /** * struct cdx_driver - CDX device driver * @driver: Generic device driver @@ -170,4 +244,48 @@ extern struct bus_type cdx_bus_type; */ int cdx_dev_reset(struct device *dev); +/** + * cdx_set_master - enables bus-mastering for CDX device + * @cdx_dev: the CDX device to enable + * + * Return: 0 for success, -errno on failure + */ +int cdx_set_master(struct cdx_device *cdx_dev); + +/** + * cdx_clear_master - disables bus-mastering for CDX device + * @cdx_dev: the CDX device to disable + * + * Return: 0 for success, -errno on failure + */ +int cdx_clear_master(struct cdx_device *cdx_dev); + +#ifdef CONFIG_GENERIC_MSI_IRQ +/** + * cdx_enable_msi - Enable MSI for the CDX device. + * @cdx_dev: device pointer + * + * Return: 0 for success, -errno on failure + */ +int cdx_enable_msi(struct cdx_device *cdx_dev); + +/** + * cdx_disable_msi - Disable MSI for the CDX device. + * @cdx_dev: device pointer + */ +void cdx_disable_msi(struct cdx_device *cdx_dev); + +#else /* CONFIG_GENERIC_MSI_IRQ */ + +static inline int cdx_enable_msi(struct cdx_device *cdx_dev) +{ + return -ENODEV; +} + +static inline void cdx_disable_msi(struct cdx_device *cdx_dev) +{ +} + +#endif /* CONFIG_GENERIC_MSI_IRQ */ + #endif /* _CDX_BUS_H_ */ diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index d5a5da838caf..11a92a946016 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -19,12 +19,25 @@ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ kbasename(__FILE__), __LINE__, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \ + 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + kbasename(__FILE__), __LINE__, \ + &client->fsid, client->monc.auth->global_id, \ + ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ if (0) \ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) +# define doutc(client, fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "[%pU %llu] " fmt, \ + &client->fsid, \ + client->monc.auth->global_id, \ + ##__VA_ARGS__); \ + } while (0) # endif #else @@ -33,7 +46,32 @@ * or, just wrap pr_debug */ # define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug(" [%pU %llu] %s: " fmt, &client->fsid, \ + client->monc.auth->global_id, __func__, ##__VA_ARGS__) #endif +#define pr_notice_client(client, fmt, ...) \ + pr_notice("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_info_client(client, fmt, ...) \ + pr_info("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_client(client, fmt, ...) \ + pr_warn("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_once_client(client, fmt, ...) \ + pr_warn_once("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_client(client, fmt, ...) \ + pr_err("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_ratelimited_client(client, fmt, ...) \ + pr_warn_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_ratelimited_client(client, fmt, ...) \ + pr_err_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) + #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 49586ff26152..ee1d0e5f9789 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -357,16 +357,26 @@ enum { CEPH_MDS_OP_RENAMESNAP = 0x01403, }; -extern const char *ceph_mds_op_name(int op); +#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ + op == CEPH_MDS_OP_MKNOD || \ + op == CEPH_MDS_OP_MKDIR || \ + op == CEPH_MDS_OP_SYMLINK) +extern const char *ceph_mds_op_name(int op); -#define CEPH_SETATTR_MODE 1 -#define CEPH_SETATTR_UID 2 -#define CEPH_SETATTR_GID 4 -#define CEPH_SETATTR_MTIME 8 -#define CEPH_SETATTR_ATIME 16 -#define CEPH_SETATTR_SIZE 32 -#define CEPH_SETATTR_CTIME 64 +#define CEPH_SETATTR_MODE (1 << 0) +#define CEPH_SETATTR_UID (1 << 1) +#define CEPH_SETATTR_GID (1 << 2) +#define CEPH_SETATTR_MTIME (1 << 3) +#define CEPH_SETATTR_ATIME (1 << 4) +#define CEPH_SETATTR_SIZE (1 << 5) +#define CEPH_SETATTR_CTIME (1 << 6) +#define CEPH_SETATTR_MTIME_NOW (1 << 7) +#define CEPH_SETATTR_ATIME_NOW (1 << 8) +#define CEPH_SETATTR_BTIME (1 << 9) +#define CEPH_SETATTR_KILL_SGUID (1 << 10) +#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11) +#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12) /* * Ceph setxattr request flags. @@ -479,7 +489,7 @@ union ceph_mds_request_args_ext { #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ -struct ceph_mds_request_head_old { +struct ceph_mds_request_head_legacy { __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ __le32 flags; /* CEPH_MDS_FLAG_* */ @@ -492,9 +502,9 @@ struct ceph_mds_request_head_old { union ceph_mds_request_args args; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 1 +#define CEPH_MDS_REQUEST_HEAD_VERSION 3 -struct ceph_mds_request_head { +struct ceph_mds_request_head_old { __le16 version; /* struct version */ __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ @@ -508,6 +518,26 @@ struct ceph_mds_request_head { union ceph_mds_request_args_ext args; } __attribute__ ((packed)); +struct ceph_mds_request_head { + __le16 version; /* struct version */ + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args_ext args; + + __le32 ext_num_retry; /* new count retry attempts */ + __le32 ext_num_fwd; /* new count fwd attempts */ + + __le32 struct_len; /* to store size of struct ceph_mds_request_head */ + __le32 owner_uid, owner_gid; /* used for OPs which create inodes */ +} __attribute__ ((packed)); + /* cap/lease release record */ struct ceph_mds_request_release { __le64 ino, cap_id; /* ino and unique cap id */ diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h deleted file mode 100644 index 4c3e0648dc27..000000000000 --- a/include/linux/ceph/mdsmap.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _FS_CEPH_MDSMAP_H -#define _FS_CEPH_MDSMAP_H - -#include <linux/bug.h> -#include <linux/ceph/types.h> - -/* - * mds map - describe servers in the mds cluster. - * - * we limit fields to those the client actually xcares about - */ -struct ceph_mds_info { - u64 global_id; - struct ceph_entity_addr addr; - s32 state; - int num_export_targets; - bool laggy; - u32 *export_targets; -}; - -struct ceph_mdsmap { - u32 m_epoch, m_client_epoch, m_last_failure; - u32 m_root; - u32 m_session_timeout; /* seconds */ - u32 m_session_autoclose; /* seconds */ - u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ - u32 m_max_mds; /* expected up:active mds number */ - u32 m_num_active_mds; /* actual up:active mds number */ - u32 possible_max_rank; /* possible max rank index */ - struct ceph_mds_info *m_info; - - /* which object pools file data can be stored in */ - int m_num_data_pg_pools; - u64 *m_data_pg_pools; - u64 m_cas_pg_pool; - - bool m_enabled; - bool m_damaged; - int m_num_laggy; -}; - -static inline struct ceph_entity_addr * -ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) -{ - if (w >= m->possible_max_rank) - return NULL; - return &m->m_info[w].addr; -} - -static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) -{ - BUG_ON(w < 0); - if (w >= m->possible_max_rank) - return CEPH_MDS_STATE_DNE; - return m->m_info[w].state; -} - -static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) -{ - if (w >= 0 && w < m->possible_max_rank) - return m->m_info[w].laggy; - return false; -} - -extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2); -extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); -extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); - -#endif diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 99c1726be6ee..1717cc57cdac 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -17,6 +17,7 @@ struct ceph_msg; struct ceph_connection; +struct ceph_msg_data_cursor; /* * Ceph defines these callbacks for handling connection events. @@ -70,6 +71,30 @@ struct ceph_connection_operations { int used_proto, int result, const int *allowed_protos, int proto_cnt, const int *allowed_modes, int mode_cnt); + + /** + * sparse_read: read sparse data + * @con: connection we're reading from + * @cursor: data cursor for reading extents + * @buf: optional buffer to read into + * + * This should be called more than once, each time setting up to + * receive an extent into the current cursor position, and zeroing + * the holes between them. + * + * Returns amount of data to be read (in bytes), 0 if reading is + * complete, or -errno if there was an error. + * + * If @buf is set on a >0 return, then the data should be read into + * the provided buffer. Otherwise, it should be read into the cursor. + * + * The sparse read operation is expected to initialize the cursor + * with a length covering up to the end of the last extent. + */ + int (*sparse_read)(struct ceph_connection *con, + struct ceph_msg_data_cursor *cursor, + char **buf); + }; /* use format string %s%lld */ @@ -98,6 +123,7 @@ enum ceph_msg_data_type { CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ #endif /* CONFIG_BLOCK */ CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ + CEPH_MSG_DATA_ITER, /* data source/destination is an iov_iter */ }; #ifdef CONFIG_BLOCK @@ -199,6 +225,7 @@ struct ceph_msg_data { bool own_pages; }; struct ceph_pagelist *pagelist; + struct iov_iter iter; }; }; @@ -207,6 +234,7 @@ struct ceph_msg_data_cursor { struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ + int sr_resid; /* residual sparse_read len */ bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK @@ -222,6 +250,10 @@ struct ceph_msg_data_cursor { struct page *page; /* page from list */ size_t offset; /* bytes from list */ }; + struct { + struct iov_iter iov_iter; + unsigned int lastlen; + }; }; }; @@ -251,6 +283,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; + u64 sparse_read_total; int front_alloc_len; struct ceph_msgpool *pool; @@ -309,6 +342,10 @@ struct ceph_connection_v1_info { int in_base_pos; /* bytes read */ + /* sparse reads */ + struct kvec in_sr_kvec; /* current location to receive into */ + u64 in_sr_len; /* amount of data in this extent */ + /* message in temps */ u8 in_tag; /* protocol control byte */ struct ceph_msg_header in_hdr; @@ -395,6 +432,7 @@ struct ceph_connection_v2_info { void *conn_bufs[16]; int conn_buf_cnt; + int data_len_remain; struct kvec in_sign_kvecs[8]; struct kvec out_sign_kvecs[8]; @@ -573,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, #endif /* CONFIG_BLOCK */ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, struct ceph_bvec_iter *bvec_pos); +void ceph_msg_data_add_iter(struct ceph_msg *msg, + struct iov_iter *iter); struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, gfp_t flags, bool can_fail); diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index b658961156a0..7a9a40163c0f 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -19,7 +19,7 @@ struct ceph_monmap { struct ceph_fsid fsid; u32 epoch; u32 num_mon; - struct ceph_entity_inst mon_inst[]; + struct ceph_entity_inst mon_inst[] __counted_by(num_mon); }; struct ceph_mon_client; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fb6be72104df..f66f6aac74f6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -29,14 +29,63 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); #define CEPH_HOMELESS_OSD -1 -/* a given osd we're communicating with */ +/* + * A single extent in a SPARSE_READ reply. + * + * Note that these come from the OSD as little-endian values. On BE arches, + * we convert them in-place after receipt. + */ +struct ceph_sparse_extent { + u64 off; + u64 len; +} __packed; + +/* Sparse read state machine state values */ +enum ceph_sparse_read_state { + CEPH_SPARSE_READ_HDR = 0, + CEPH_SPARSE_READ_EXTENTS, + CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA_PRE, + CEPH_SPARSE_READ_DATA, +}; + +/* + * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of + * 64-bit offset/length pairs, and then all of the actual file data + * concatenated after it (sans holes). + * + * Unfortunately, we don't know how long the extent array is until we've + * started reading the data section of the reply. The caller should send down + * a destination buffer for the array, but we'll alloc one if it's too small + * or if the caller doesn't. + */ +struct ceph_sparse_read { + enum ceph_sparse_read_state sr_state; /* state machine state */ + u64 sr_req_off; /* orig request offset */ + u64 sr_req_len; /* orig request length */ + u64 sr_pos; /* current pos in buffer */ + int sr_index; /* current extent index */ + u32 sr_datalen; /* length of actual data */ + u32 sr_count; /* extent count in reply */ + int sr_ext_len; /* length of extent array */ + struct ceph_sparse_extent *sr_extent; /* extent array */ +}; + +/* + * A given osd we're communicating with. + * + * Note that the o_requests tree can be searched while holding the "lock" mutex + * or the "o_requests_lock" spinlock. Insertion or removal requires both! + */ struct ceph_osd { refcount_t o_ref; + int o_sparse_op_idx; struct ceph_osd_client *o_osdc; int o_osd; int o_incarnation; struct rb_node o_node; struct ceph_connection o_con; + spinlock_t o_requests_lock; struct rb_root o_requests; struct rb_root o_linger_requests; struct rb_root o_backoff_mappings; @@ -46,6 +95,7 @@ struct ceph_osd { unsigned long lru_ttl; struct list_head o_keepalive_item; struct mutex lock; + struct ceph_sparse_read o_sparse_read; }; #define CEPH_OSD_SLAB_OPS 2 @@ -59,6 +109,7 @@ enum ceph_osd_data_type { CEPH_OSD_DATA_TYPE_BIO, #endif /* CONFIG_BLOCK */ CEPH_OSD_DATA_TYPE_BVECS, + CEPH_OSD_DATA_TYPE_ITER, }; struct ceph_osd_data { @@ -82,6 +133,7 @@ struct ceph_osd_data { struct ceph_bvec_iter bvec_pos; u32 num_bvecs; }; + struct iov_iter iter; }; }; @@ -98,6 +150,8 @@ struct ceph_osd_req_op { u64 offset, length; u64 truncate_size; u32 truncate_seq; + int sparse_ext_cnt; + struct ceph_sparse_extent *sparse_ext; struct ceph_osd_data osd_data; } extent; struct { @@ -145,6 +199,9 @@ struct ceph_osd_req_op { u32 src_fadvise_flags; struct ceph_osd_data osd_data; } copy_from; + struct { + u64 ver; + } assert_ver; }; }; @@ -199,6 +256,7 @@ struct ceph_osd_request { struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; + bool r_linger; /* don't resend on failure */ struct completion r_completion; /* private to osd_client.c */ ceph_osdc_callback_t r_callback; @@ -211,9 +269,9 @@ struct ceph_osd_request { struct ceph_snap_context *r_snapc; /* for writes */ struct timespec64 r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ - bool r_linger; /* don't resend on failure */ /* internal */ + u64 r_version; /* data version sent in reply */ unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ ktime_t r_start_latency; /* ktime_t */ @@ -221,7 +279,7 @@ struct ceph_osd_request { int r_attempts; u32 r_map_dne_bound; - struct ceph_osd_req_op r_ops[]; + struct ceph_osd_req_op r_ops[] __counted_by(r_num_ops); }; struct ceph_request_redirect { @@ -450,6 +508,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bvec_iter *bvec_pos); +void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, + unsigned int which, struct iov_iter *iter); extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, unsigned int which, @@ -504,6 +564,23 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, u32 truncate_seq, u64 truncate_size, bool use_mempool); +int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); + +/* + * How big an extent array should we preallocate for a sparse read? This is + * just a starting value. If we get more than this back from the OSD, the + * receiver will reallocate. + */ +#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 + +static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) +{ + if (!cnt) + cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL; + + return __ceph_alloc_sparse_ext_map(op, cnt); +} + extern void ceph_osdc_get_request(struct ceph_osd_request *req); extern void ceph_osdc_put_request(struct ceph_osd_request *req); @@ -558,5 +635,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, struct ceph_object_locator *oloc, struct ceph_watch_item **watchers, u32 *num_watchers); -#endif +/* Find offset into the buffer of the end of the extent map */ +static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op) +{ + struct ceph_sparse_extent *ext; + + /* No extents? No data */ + if (op->extent.sparse_ext_cnt == 0) + return 0; + + ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1]; + + return ext->off + ext->len - op->extent.offset; +} + +#endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 43a7a1573b51..73c3efbec36c 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -524,6 +524,10 @@ struct ceph_osd_op { __le64 cookie; } __attribute__ ((packed)) notify; struct { + __le64 unused; + __le64 ver; + } __attribute__ ((packed)) assert_ver; + struct { __le64 offset, length; __le64 src_offset; } __attribute__ ((packed)) clonerange; diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 5e134f4ce8b7..f0df518e11dd 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -9,6 +9,14 @@ #include <linux/bug.h> #include <linux/module.h> +#include <asm/cfi.h> + +#ifndef cfi_get_offset +static inline int cfi_get_offset(void) +{ + return 0; +} +#endif #ifdef CONFIG_CFI_CLANG enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, @@ -19,11 +27,13 @@ static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, { return report_cfi_failure(regs, addr, NULL, 0); } +#endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS bool is_cfi_trap(unsigned long addr); +#else +static inline bool is_cfi_trap(unsigned long addr) { return false; } #endif -#endif /* CONFIG_CFI_CLANG */ #ifdef CONFIG_MODULES #ifdef CONFIG_ARCH_USES_CFI_TRAPS @@ -36,4 +46,8 @@ static inline void module_cfi_finalize(const Elf_Ehdr *hdr, #endif /* CONFIG_ARCH_USES_CFI_TRAPS */ #endif /* CONFIG_MODULES */ +#ifndef CFI_NOSEAL +#define CFI_NOSEAL(x) +#endif + #endif /* _LINUX_CFI_H */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8a0d5466c7be..ea48c861cd36 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -115,6 +115,11 @@ enum { * Enable recursive subtree protection */ CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18), + + /* + * Enable hugetlb accounting for the memory controller. + */ + CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), }; /* cftype->flags */ @@ -238,7 +243,7 @@ struct css_set { * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to + * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; @@ -342,6 +347,20 @@ struct cgroup_rstat_cpu { struct cgroup_base_stat last_bstat; /* + * This field is used to record the cumulative per-cpu time of + * the cgroup and its descendants. Currently it can be read via + * eBPF/drgn etc, and we are still trying to determine how to + * expose it in the cgroupfs interface. + */ + struct cgroup_base_stat subtree_bstat; + + /* + * Snapshots at the last reading. These are used to calculate the + * deltas to propagate to the per-cpu subtree_bstat. + */ + struct cgroup_base_stat last_subtree_bstat; + + /* * Child cgroups with stat updates on this cpu since the last read * are linked on the parent's ->updated_children through * ->updated_next. @@ -477,6 +496,20 @@ struct cgroup { struct cgroup_rstat_cpu __percpu *rstat_cpu; struct list_head rstat_css_list; + /* + * Add padding to separate the read mostly rstat_cpu and + * rstat_css_list into a different cacheline from the following + * rstat_flush_next and *bstat fields which can have frequent updates. + */ + CACHELINE_PADDING(_pad_); + + /* + * A singly-linked list of cgroup structures to be rstat flushed. + * This is a scratch field to be used exclusively by + * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock. + */ + struct cgroup *rstat_flush_next; + /* cgroup basic resource statistics */ struct cgroup_base_stat last_bstat; struct cgroup_base_stat bstat; @@ -529,6 +562,10 @@ struct cgroup_root { /* Unique id for this hierarchy. */ int hierarchy_id; + /* A list running through the active hierarchies */ + struct list_head root_list; + struct rcu_head rcu; /* Must be near the top */ + /* * The root cgroup. The containing cgroup_root will be destroyed on its * release. cgrp->ancestors[0] will be used overflowing into the @@ -542,9 +579,6 @@ struct cgroup_root { /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; - /* A list running through the active hierarchies */ - struct list_head root_list; - /* Hierarchy-specific flags */ unsigned int flags; @@ -661,6 +695,8 @@ struct cgroup_subsys { void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); + int (*css_local_stat_show)(struct seq_file *seq, + struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b307013b9c6c..34aaf0e87def 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -40,13 +40,11 @@ struct kernel_clone_args; #define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_MAX 10000 -/* walk only threadgroup leaders */ -#define CSS_TASK_ITER_PROCS (1U << 0) -/* walk all threaded css_sets in the domain */ -#define CSS_TASK_ITER_THREADED (1U << 1) - -/* internal flags */ -#define CSS_TASK_ITER_SKIPPED (1U << 16) +enum { + CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ + CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ + CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ +}; /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { @@ -71,6 +69,7 @@ struct css_task_iter { extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; +extern spinlock_t css_set_lock; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> @@ -388,7 +387,6 @@ static inline void cgroup_unlock(void) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ rcu_read_lock_sched_held() || \ @@ -855,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {} #endif /* CONFIG_CGROUP_BPF */ +struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id); + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 53f1a7a932b0..c2d09bc4f976 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -7,8 +7,9 @@ /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() - * based cleanup function. @free is an expression using '_T' to access - * the variable. + * based cleanup function. @free is an expression using '_T' to access the + * variable. @free should typically include a NULL test before calling a + * function, see the example below. * * __free(name): * variable attribute to add a scoped based cleanup to the variable. @@ -17,6 +18,9 @@ * like a non-atomic xchg(var, NULL), such that the cleanup function will * be inhibited -- provided it sanely deals with a NULL value. * + * NOTE: this has __must_check semantics so that it is harder to accidentally + * leak the resource. + * * return_ptr(p): * returns p while inhibiting the __free(). * @@ -24,6 +28,8 @@ * * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) * + * void *alloc_obj(...) + * { * struct obj *p __free(kfree) = kmalloc(...); * if (!p) * return NULL; @@ -32,6 +38,24 @@ * return NULL; * * return_ptr(p); + * } + * + * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though + * kfree() is fine to be called with a NULL value. This is on purpose. This way + * the compiler sees the end of our alloc_obj() function as: + * + * tmp = p; + * p = NULL; + * if (p) + * kfree(p); + * return tmp; + * + * And through the magic of value-propagation and dead-code-elimination, it + * eliminates the actual cleanup call and compiles into: + * + * return p; + * + * Without the NULL test it turns into a mess and the compiler can't help us. */ #define DEFINE_FREE(_name, _type, _free) \ @@ -39,8 +63,17 @@ #define __free(_name) __cleanup(__free_##_name) +#define __get_and_null_ptr(p) \ + ({ __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = NULL; __val; }) + +static inline __must_check +const volatile void * __must_check_fn(const volatile void *val) +{ return val; } + #define no_free_ptr(p) \ - ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) #define return_ptr(p) return no_free_ptr(p) @@ -92,25 +125,55 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * trivial wrapper around DEFINE_CLASS() above specifically * for locks. * + * DEFINE_GUARD_COND(name, ext, condlock) + * wrapper around EXTEND_CLASS above to add conditional lock + * variants to a base class, eg. mutex_trylock() or + * mutex_lock_interruptible(). + * * guard(name): - * an anonymous instance of the (guard) class + * an anonymous instance of the (guard) class, not recommended for + * conditional locks. * * scoped_guard (name, args...) { }: * similar to CLASS(name, scope)(args), except the variable (with the * explicit name 'scope') is declard in a for-loop such that its scope is * bound to the next (compound) statement. * + * for conditional locks the loop body is skipped when the lock is not + * acquired. + * + * scoped_cond_guard (name, fail, args...) { }: + * similar to scoped_guard(), except it does fail when the lock + * acquire fails. + * */ #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return *_T; } + +#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + class_##_name##_t _T) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) +#define __guard_ptr(_name) class_##_name##_lock_ptr + #define scoped_guard(_name, args...) \ for (CLASS(_name, scope)(args), \ - *done = NULL; !done; done = (void *)1) + *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) + +#define scoped_cond_guard(_name, _fail, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) \ + if (!__guard_ptr(_name)(&scope)) _fail; \ + else /* * Additional helper macros for generating lock guards with types, either for @@ -119,6 +182,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1_COND(name, ext, condlock) * * will result in the following type: * @@ -140,6 +204,11 @@ typedef struct { \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ if (_T->lock) { _unlock; } \ +} \ + \ +static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ +{ \ + return _T->lock; \ } @@ -168,4 +237,14 @@ __DEFINE_LOCK_GUARD_1(_name, _type, _lock) __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) +#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ + if (_T->lock && !(_condlock)) _T->lock = NULL; \ + _t; }), \ + typeof_member(class_##_name##_t, lock) l) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } + + #endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0f0cd01906b4..4a537260f655 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -74,7 +74,7 @@ void clk_hw_forward_rate_request(const struct clk_hw *core, unsigned long parent_rate); /** - * struct clk_duty - Struture encoding the duty cycle ratio of a clock + * struct clk_duty - Structure encoding the duty cycle ratio of a clock * * @num: Numerator of the duty cycle ratio * @den: Denominator of the duty cycle ratio @@ -129,7 +129,7 @@ struct clk_duty { * @restore_context: Restore the context of the clock after a restoration * of power. * - * @recalc_rate Recalculate the rate of this clock, by querying hardware. The + * @recalc_rate: Recalculate the rate of this clock, by querying hardware. The * parent rate is an input parameter. It is up to the caller to * ensure that the prepare_mutex is held across this call. If the * driver cannot figure out a rate for this clock, it must return @@ -448,15 +448,15 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, */ #define clk_hw_register_fixed_rate_with_accuracy_parent_hw(dev, name, \ parent_hw, flags, fixed_rate, fixed_accuracy) \ - __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw) \ - NULL, NULL, (flags), (fixed_rate), \ + __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw), \ + NULL, (flags), (fixed_rate), \ (fixed_accuracy), 0, false) /** * clk_hw_register_fixed_rate_with_accuracy_parent_data - register fixed-rate * clock with the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate * @fixed_accuracy: non-adjustable clock accuracy @@ -471,7 +471,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate */ @@ -649,7 +649,7 @@ struct clk_div_table { * Clock with an adjustable divider affecting its output frequency. Implements * .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the * register plus one. If CLK_DIVIDER_ONE_BASED is set then the divider is * the raw value read from the register, with the value of zero considered @@ -1084,18 +1084,28 @@ void of_fixed_factor_clk_setup(struct device_node *node); * @hw: handle between common and hardware-specific interfaces * @mult: multiplier * @div: divider + * @acc: fixed accuracy in ppb + * @flags: behavior modifying flags * * Clock with a fixed multiplier and divider. The output frequency is the * parent clock rate divided by div and multiplied by mult. - * Implements .recalc_rate, .set_rate and .round_rate + * Implements .recalc_rate, .set_rate, .round_rate and .recalc_accuracy + * + * Flags: + * * CLK_FIXED_FACTOR_FIXED_ACCURACY - Use the value in @acc instead of the + * parent clk accuracy. */ struct clk_fixed_factor { struct clk_hw hw; unsigned int mult; unsigned int div; + unsigned long acc; + unsigned int flags; }; +#define CLK_FIXED_FACTOR_FIXED_ACCURACY BIT(0) + #define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw) extern const struct clk_ops clk_fixed_factor_ops; @@ -1106,10 +1116,24 @@ void clk_unregister_fixed_factor(struct clk *clk); struct clk_hw *clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *clk_hw_register_fixed_factor_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div, + unsigned long acc); void clk_hw_unregister_fixed_factor(struct clk_hw *hw); struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *devm_clk_hw_register_fixed_factor_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *devm_clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div, + unsigned long acc); struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev, const char *name, unsigned int index, unsigned long flags, unsigned int mult, unsigned int div); @@ -1130,11 +1154,12 @@ struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev, * @mwidth: width of the numerator bit field * @nshift: shift to the denominator bit field * @nwidth: width of the denominator bit field + * @approximation: clk driver's callback for calculating the divider clock * @lock: register lock * * Clock with adjustable fractional divider affecting its output frequency. * - * Flags: + * @flags: * CLK_FRAC_DIVIDER_ZERO_BASED - by default the numerator and denominator * is the value read from the register. If CLK_FRAC_DIVIDER_ZERO_BASED * is set then the numerator and denominator are both the value read @@ -1191,7 +1216,7 @@ void clk_hw_unregister_fractional_divider(struct clk_hw *hw); * Clock with an adjustable multiplier affecting its output frequency. * Implements .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read * from the register, with 0 being a valid value effectively * zeroing the output clock rate. If CLK_MULTIPLIER_ZERO_BYPASS is @@ -1379,7 +1404,7 @@ struct clk_onecell_data { struct clk_hw_onecell_data { unsigned int num; - struct clk_hw *hws[]; + struct clk_hw *hws[] __counted_by(num); }; #define CLK_OF_DECLARE(name, compat, fn) \ diff --git a/include/linux/clk.h b/include/linux/clk.h index 06f1b292f8a0..0fa56d672532 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -202,6 +202,18 @@ bool clk_is_match(const struct clk *p, const struct clk *q); int clk_rate_exclusive_get(struct clk *clk); /** + * devm_clk_rate_exclusive_get - devm variant of clk_rate_exclusive_get + * @dev: device the exclusivity is bound to + * @clk: clock source + * + * Calls clk_rate_exclusive_get() on @clk and registers a devm cleanup handler + * on @dev to call clk_rate_exclusive_put(). + * + * Must not be called from within atomic context. + */ +int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk); + +/** * clk_rate_exclusive_put - release exclusivity over the rate control of a * producer * @clk: clock source @@ -274,6 +286,11 @@ static inline int clk_rate_exclusive_get(struct clk *clk) return 0; } +static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk) +{ + return 0; +} + static inline void clk_rate_exclusive_put(struct clk *clk) {} #endif @@ -479,6 +496,22 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); /** + * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed) + * @dev: device for clock "consumer" + * @clks: pointer to the clk_bulk_data table of consumer + * + * Returns success (0) or negative errno. + * + * This helper function allows drivers to get all clocks of the + * consumer and enables them in one operation with management. + * The clks will automatically be disabled and freed when the device + * is unbound. + */ + +int __must_check devm_clk_bulk_get_all_enable(struct device *dev, + struct clk_bulk_data **clks); + +/** * devm_clk_get - lookup and obtain a managed reference to a clock producer. * @dev: device for clock "consumer" * @id: clock consumer ID @@ -968,6 +1001,12 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev, return 0; } +static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev, + struct clk_bulk_data **clks) +{ + return 0; +} + static inline struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id) { diff --git a/include/linux/clk/mmp.h b/include/linux/clk/mmp.h deleted file mode 100644 index 445130460380..000000000000 --- a/include/linux/clk/mmp.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __CLK_MMP_H -#define __CLK_MMP_H - -#include <linux/types.h> - -extern void pxa168_clk_init(phys_addr_t mpmu_phys, - phys_addr_t apmu_phys, - phys_addr_t apbc_phys); -extern void pxa910_clk_init(phys_addr_t mpmu_phys, - phys_addr_t apmu_phys, - phys_addr_t apbc_phys, - phys_addr_t apbcp_phys); -extern void mmp2_clk_init(phys_addr_t mpmu_phys, - phys_addr_t apmu_phys, - phys_addr_t apbc_phys); - -#endif diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index cbfcbf186ce3..e656f63efdce 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -13,11 +13,14 @@ /** * struct clk_omap_reg - OMAP register declaration * @offset: offset from the master IP module base address + * @bit: register bit offset * @index: index of the master IP module + * @flags: flags */ struct clk_omap_reg { void __iomem *ptr; u16 offset; + u8 bit; u8 index; u8 flags; }; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1d42d4b17327..0ad8b550bb4b 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -291,7 +291,19 @@ static inline void timer_probe(void) {} #define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) -extern ulong max_cswd_read_retries; +static inline unsigned int clocksource_get_max_watchdog_retry(void) +{ + /* + * When system is in the boot phase or under heavy workload, there + * can be random big latencies during the clocksource/watchdog + * read, so allow retries to filter the noise latency. As the + * latency's frequency and maximum value goes up with the number of + * CPUs, scale the number of retries with the number of online + * CPUs. + */ + return (ilog2(num_online_cpus()) / 2) + 1; +} + void clocksource_verify_percpu(struct clocksource *cs); #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index 16775d7d8f8d..a4fa3436940c 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -6,6 +6,9 @@ enum clocksource_ids { CSID_GENERIC = 0, CSID_ARM_ARCH_COUNTER, + CSID_X86_TSC_EARLY, + CSID_X86_TSC, + CSID_X86_KVM_CLK, CSID_MAX, }; diff --git a/include/linux/closure.h b/include/linux/closure.h new file mode 100644 index 000000000000..c554c6a08768 --- /dev/null +++ b/include/linux/closure.h @@ -0,0 +1,415 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CLOSURE_H +#define _LINUX_CLOSURE_H + +#include <linux/llist.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <linux/workqueue.h> + +/* + * Closure is perhaps the most overused and abused term in computer science, but + * since I've been unable to come up with anything better you're stuck with it + * again. + * + * What are closures? + * + * They embed a refcount. The basic idea is they count "things that are in + * progress" - in flight bios, some other thread that's doing something else - + * anything you might want to wait on. + * + * The refcount may be manipulated with closure_get() and closure_put(). + * closure_put() is where many of the interesting things happen, when it causes + * the refcount to go to 0. + * + * Closures can be used to wait on things both synchronously and asynchronously, + * and synchronous and asynchronous use can be mixed without restriction. To + * wait synchronously, use closure_sync() - you will sleep until your closure's + * refcount hits 1. + * + * To wait asynchronously, use + * continue_at(cl, next_function, workqueue); + * + * passing it, as you might expect, the function to run when nothing is pending + * and the workqueue to run that function out of. + * + * continue_at() also, critically, requires a 'return' immediately following the + * location where this macro is referenced, to return to the calling function. + * There's good reason for this. + * + * To use safely closures asynchronously, they must always have a refcount while + * they are running owned by the thread that is running them. Otherwise, suppose + * you submit some bios and wish to have a function run when they all complete: + * + * foo_endio(struct bio *bio) + * { + * closure_put(cl); + * } + * + * closure_init(cl); + * + * do_stuff(); + * closure_get(cl); + * bio1->bi_endio = foo_endio; + * bio_submit(bio1); + * + * do_more_stuff(); + * closure_get(cl); + * bio2->bi_endio = foo_endio; + * bio_submit(bio2); + * + * continue_at(cl, complete_some_read, system_wq); + * + * If closure's refcount started at 0, complete_some_read() could run before the + * second bio was submitted - which is almost always not what you want! More + * importantly, it wouldn't be possible to say whether the original thread or + * complete_some_read()'s thread owned the closure - and whatever state it was + * associated with! + * + * So, closure_init() initializes a closure's refcount to 1 - and when a + * closure_fn is run, the refcount will be reset to 1 first. + * + * Then, the rule is - if you got the refcount with closure_get(), release it + * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount + * on a closure because you called closure_init() or you were run out of a + * closure - _always_ use continue_at(). Doing so consistently will help + * eliminate an entire class of particularly pernicious races. + * + * Lastly, you might have a wait list dedicated to a specific event, and have no + * need for specifying the condition - you just want to wait until someone runs + * closure_wake_up() on the appropriate wait list. In that case, just use + * closure_wait(). It will return either true or false, depending on whether the + * closure was already on a wait list or not - a closure can only be on one wait + * list at a time. + * + * Parents: + * + * closure_init() takes two arguments - it takes the closure to initialize, and + * a (possibly null) parent. + * + * If parent is non null, the new closure will have a refcount for its lifetime; + * a closure is considered to be "finished" when its refcount hits 0 and the + * function to run is null. Hence + * + * continue_at(cl, NULL, NULL); + * + * returns up the (spaghetti) stack of closures, precisely like normal return + * returns up the C stack. continue_at() with non null fn is better thought of + * as doing a tail call. + * + * All this implies that a closure should typically be embedded in a particular + * struct (which its refcount will normally control the lifetime of), and that + * struct can very much be thought of as a stack frame. + */ + +struct closure; +struct closure_syncer; +typedef void (closure_fn) (struct work_struct *); +extern struct dentry *bcache_debug; + +struct closure_waitlist { + struct llist_head list; +}; + +enum closure_state { + /* + * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by + * the thread that owns the closure, and cleared by the thread that's + * waking up the closure. + * + * The rest are for debugging and don't affect behaviour: + * + * CLOSURE_RUNNING: Set when a closure is running (i.e. by + * closure_init() and when closure_put() runs then next function), and + * must be cleared before remaining hits 0. Primarily to help guard + * against incorrect usage and accidentally transferring references. + * continue_at() and closure_return() clear it for you, if you're doing + * something unusual you can use closure_set_dead() which also helps + * annotate where references are being transferred. + */ + + CLOSURE_BITS_START = (1U << 26), + CLOSURE_DESTRUCTOR = (1U << 26), + CLOSURE_WAITING = (1U << 28), + CLOSURE_RUNNING = (1U << 30), +}; + +#define CLOSURE_GUARD_MASK \ + ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1) + +#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) +#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) + +struct closure { + union { + struct { + struct workqueue_struct *wq; + struct closure_syncer *s; + struct llist_node list; + closure_fn *fn; + }; + struct work_struct work; + }; + + struct closure *parent; + + atomic_t remaining; + bool closure_get_happened; + +#ifdef CONFIG_DEBUG_CLOSURES +#define CLOSURE_MAGIC_DEAD 0xc054dead +#define CLOSURE_MAGIC_ALIVE 0xc054a11e + + unsigned int magic; + struct list_head all; + unsigned long ip; + unsigned long waiting_on; +#endif +}; + +void closure_sub(struct closure *cl, int v); +void closure_put(struct closure *cl); +void __closure_wake_up(struct closure_waitlist *list); +bool closure_wait(struct closure_waitlist *list, struct closure *cl); +void __closure_sync(struct closure *cl); + +static inline unsigned closure_nr_remaining(struct closure *cl) +{ + return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK; +} + +/** + * closure_sync - sleep until a closure a closure has nothing left to wait on + * + * Sleeps until the refcount hits 1 - the thread that's running the closure owns + * the last refcount. + */ +static inline void closure_sync(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened); +#endif + + if (cl->closure_get_happened) + __closure_sync(cl); +} + +#ifdef CONFIG_DEBUG_CLOSURES + +void closure_debug_create(struct closure *cl); +void closure_debug_destroy(struct closure *cl); + +#else + +static inline void closure_debug_create(struct closure *cl) {} +static inline void closure_debug_destroy(struct closure *cl) {} + +#endif + +static inline void closure_set_ip(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->ip = _THIS_IP_; +#endif +} + +static inline void closure_set_ret_ip(struct closure *cl) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->ip = _RET_IP_; +#endif +} + +static inline void closure_set_waiting(struct closure *cl, unsigned long f) +{ +#ifdef CONFIG_DEBUG_CLOSURES + cl->waiting_on = f; +#endif +} + +static inline void closure_set_stopped(struct closure *cl) +{ + atomic_sub(CLOSURE_RUNNING, &cl->remaining); +} + +static inline void set_closure_fn(struct closure *cl, closure_fn *fn, + struct workqueue_struct *wq) +{ + closure_set_ip(cl); + cl->fn = fn; + cl->wq = wq; +} + +static inline void closure_queue(struct closure *cl) +{ + struct workqueue_struct *wq = cl->wq; + /** + * Changes made to closure, work_struct, or a couple of other structs + * may cause work.func not pointing to the right location. + */ + BUILD_BUG_ON(offsetof(struct closure, fn) + != offsetof(struct work_struct, func)); + + if (wq) { + INIT_WORK(&cl->work, cl->work.func); + BUG_ON(!queue_work(wq, &cl->work)); + } else + cl->fn(&cl->work); +} + +/** + * closure_get - increment a closure's refcount + */ +static inline void closure_get(struct closure *cl) +{ + cl->closure_get_happened = true; + +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON((atomic_inc_return(&cl->remaining) & + CLOSURE_REMAINING_MASK) <= 1); +#else + atomic_inc(&cl->remaining); +#endif +} + +/** + * closure_init - Initialize a closure, setting the refcount to 1 + * @cl: closure to initialize + * @parent: parent of the new closure. cl will take a refcount on it for its + * lifetime; may be NULL. + */ +static inline void closure_init(struct closure *cl, struct closure *parent) +{ + cl->fn = NULL; + cl->parent = parent; + if (parent) + closure_get(parent); + + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); + cl->closure_get_happened = false; + + closure_debug_create(cl); + closure_set_ip(cl); +} + +static inline void closure_init_stack(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +} + +/** + * closure_wake_up - wake up all closures on a wait list, + * with memory barrier + */ +static inline void closure_wake_up(struct closure_waitlist *list) +{ + /* Memory barrier for the wait list */ + smp_mb(); + __closure_wake_up(list); +} + +#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws) +#define closure_type(name, type, member) \ + struct closure *cl = container_of(ws, struct closure, work); \ + type *name = container_of(cl, type, member) + +/** + * continue_at - jump to another function with barrier + * + * After @cl is no longer waiting on anything (i.e. all outstanding refs have + * been dropped with closure_put()), it will resume execution at @fn running out + * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly). + * + * This is because after calling continue_at() you no longer have a ref on @cl, + * and whatever @cl owns may be freed out from under you - a running closure fn + * has a ref on its own closure which continue_at() drops. + * + * Note you are expected to immediately return after using this macro. + */ +#define continue_at(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_sub(_cl, CLOSURE_RUNNING + 1); \ +} while (0) + +/** + * closure_return - finish execution of a closure + * + * This is used to indicate that @cl is finished: when all outstanding refs on + * @cl have been dropped @cl's ref on its parent closure (as passed to + * closure_init()) will be dropped, if one was specified - thus this can be + * thought of as returning to the parent closure. + */ +#define closure_return(_cl) continue_at((_cl), NULL, NULL) + +/** + * continue_at_nobarrier - jump to another function without barrier + * + * Causes @fn to be executed out of @cl, in @wq context (or called directly if + * @wq is NULL). + * + * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn, + * thus it's not safe to touch anything protected by @cl after a + * continue_at_nobarrier(). + */ +#define continue_at_nobarrier(_cl, _fn, _wq) \ +do { \ + set_closure_fn(_cl, _fn, _wq); \ + closure_queue(_cl); \ +} while (0) + +/** + * closure_return_with_destructor - finish execution of a closure, + * with destructor + * + * Works like closure_return(), except @destructor will be called when all + * outstanding refs on @cl have been dropped; @destructor may be used to safely + * free the memory occupied by @cl, and it is called with the ref on the parent + * closure still held - so @destructor could safely return an item to a + * freelist protected by @cl's parent. + */ +#define closure_return_with_destructor(_cl, _destructor) \ +do { \ + set_closure_fn(_cl, _destructor, NULL); \ + closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ +} while (0) + +/** + * closure_call - execute @fn out of a new, uninitialized closure + * + * Typically used when running out of one closure, and we want to run @fn + * asynchronously out of a new closure - @parent will then wait for @cl to + * finish. + */ +static inline void closure_call(struct closure *cl, closure_fn fn, + struct workqueue_struct *wq, + struct closure *parent) +{ + closure_init(cl, parent); + continue_at_nobarrier(cl, fn, wq); +} + +#define __closure_wait_event(waitlist, _cond) \ +do { \ + struct closure cl; \ + \ + closure_init_stack(&cl); \ + \ + while (1) { \ + closure_wait(waitlist, &cl); \ + if (_cond) \ + break; \ + closure_sync(&cl); \ + } \ + closure_wake_up(waitlist); \ + closure_sync(&cl); \ +} while (0) + +#define closure_wait_event(waitlist, _cond) \ +do { \ + if (!(_cond)) \ + __closure_wait_event(waitlist, _cond); \ +} while (0) + +#endif /* _LINUX_CLOSURE_H */ diff --git a/include/linux/cma.h b/include/linux/cma.h index 63873b93deaa..9db877506ea8 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -6,12 +6,8 @@ #include <linux/types.h> #include <linux/numa.h> -/* - * There is always at least global CMA area and a few optional - * areas configured in kernel .config. - */ #ifdef CONFIG_CMA_AREAS -#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) +#define MAX_CMA_AREAS CONFIG_CMA_AREAS #endif #define CMA_MAX_NAME 64 diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index d8264417e53c..d527f04400df 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -12,6 +12,8 @@ #define _COMEDI_8254_H #include <linux/types.h> +#include <linux/errno.h> +#include <linux/err.h> struct comedi_device; struct comedi_insn; @@ -57,10 +59,24 @@ struct comedi_subdevice; /* counter maps zero to 0x10000 */ #define I8254_MAX_COUNT 0x10000 +struct comedi_8254; + +/** + * typedef comedi_8254_iocb_fn - call-back function type for 8254 register access + * @i8254: pointer to struct comedi_8254 + * @dir: direction (0 = read, 1 = write) + * @reg: register number + * @val: value to write + * + * Return: Register value when reading, 0 when writing. + */ +typedef unsigned int comedi_8254_iocb_fn(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val); + /** * struct comedi_8254 - private data used by this module - * @iobase: PIO base address of the registers (in/out) - * @mmio: MMIO base address of the registers (read/write) + * @iocb: I/O call-back function for register access + * @context: context for register access (e.g. a base address) * @iosize: I/O size used to access the registers (b/w/l) * @regshift: register gap shift * @osc_base: cascaded oscillator speed in ns @@ -76,8 +92,8 @@ struct comedi_subdevice; * @insn_config: driver specific (*insn_config) callback */ struct comedi_8254 { - unsigned long iobase; - void __iomem *mmio; + comedi_8254_iocb_fn *iocb; + unsigned long context; unsigned int iosize; unsigned int regshift; unsigned int osc_base; @@ -122,13 +138,24 @@ void comedi_8254_set_busy(struct comedi_8254 *i8254, void comedi_8254_subdevice_init(struct comedi_subdevice *s, struct comedi_8254 *i8254); -struct comedi_8254 *comedi_8254_init(unsigned long iobase, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); -struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); +#ifdef CONFIG_HAS_IOPORT +struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); +#else +static inline struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift) +{ + return ERR_PTR(-ENXIO); +} +#endif + +struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); #endif /* _COMEDI_8254_H */ diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h index b2a5bc6b3a49..d24a69da389b 100644 --- a/include/linux/comedi/comedi_8255.h +++ b/include/linux/comedi/comedi_8255.h @@ -10,6 +10,8 @@ #ifndef _COMEDI_8255_H #define _COMEDI_8255_H +#include <linux/errno.h> + #define I8255_SIZE 0x04 #define I8255_DATA_A_REG 0x00 @@ -27,16 +29,26 @@ struct comedi_device; struct comedi_subdevice; -int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), - unsigned long regbase); +#ifdef CONFIG_HAS_IOPORT +int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, + unsigned long regbase); +#else +static inline int subdev_8255_io_init(struct comedi_device *dev, + struct comedi_subdevice *s, + unsigned long regbase) +{ + return -ENXIO; +} +#endif int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), unsigned long regbase); +int subdev_8255_cb_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long context), + unsigned long context); + unsigned long subdev_8255_regbase(struct comedi_subdevice *s); #endif diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h index 0a1150900ef3..c08416a7364b 100644 --- a/include/linux/comedi/comedidev.h +++ b/include/linux/comedi/comedidev.h @@ -633,7 +633,7 @@ extern const struct comedi_lrange range_unknown; */ struct comedi_lrange { int length; - struct comedi_krange range[]; + struct comedi_krange range[] __counted_by(length); }; /** diff --git a/include/linux/compat.h b/include/linux/compat.h index 1cfa4f0f490a..233f61ec8afc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -581,7 +581,6 @@ asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id, struct io_event __user *events, struct __kernel_timespec __user *timeout, const struct __compat_aio_sigset __user *usig); -asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 9b673fefcef8..49feac0162a5 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -9,16 +9,11 @@ * Clang prior to 17 is being silly and considers many __cleanup() variables * as unused (because they are, their sole purpose is to go out of scope). * - * https://reviews.llvm.org/D152180 + * https://github.com/llvm/llvm-project/commit/877210faa447f4cc7db87812f8ed80e398fedd61 */ #undef __cleanup #define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) -/* same as gcc, this was present in clang-2.6 so we can assume it works - * with any version that can compile the kernel - */ -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 @@ -119,11 +114,7 @@ #define __diag_str(s) __diag_str1(s) #define __diag(s) _Pragma(__diag_str(clang diagnostic s)) -#if CONFIG_CLANG_VERSION >= 110000 -#define __diag_clang_11(s) __diag(s) -#else -#define __diag_clang_11(s) -#endif +#define __diag_clang_13(s) __diag(s) #define __diag_ignore_all(option, comment) \ - __diag_clang(11, ignore, option) + __diag_clang(13, ignore, option) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7af9e34ec261..aff92b1d284f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -35,12 +35,10 @@ (typeof(ptr)) (__ptr + (off)); \ }) -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif @@ -66,6 +64,26 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + * + * Work around it via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) +#endif + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ @@ -138,7 +156,7 @@ #endif #define __diag_ignore_all(option, comment) \ - __diag_GCC(8, ignore, option) + __diag(__diag_GCC_ignore option) /* * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24f..8c252e073bd8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -116,6 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __stringify_label(n) #n +#define __annotate_reachable(c) ({ \ + asm volatile(__stringify_label(c) ":\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long " __stringify_label(c) "b - .\n\t" \ + ".popsection\n\t"); \ +}) +#define annotate_reachable() __annotate_reachable(__COUNTER__) + #define __annotate_unreachable(c) ({ \ asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ @@ -128,6 +136,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __annotate_jump_table __section(".rodata..c_jump_table") #else /* !CONFIG_OBJTOOL */ +#define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ @@ -177,10 +186,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __asm__ ("" : "=r" (var) : "0" (var)) #endif -/* Not-quite-unique ID. */ -#ifndef __UNIQUE_ID -# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) -#endif +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) /** * data_race - mark an expression as containing intentional data races @@ -212,7 +218,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define ___ADDRESSABLE(sym, __attrs) \ static void * __used __attrs \ - __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; + __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym; #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) @@ -231,6 +237,53 @@ static inline void *offset_to_ptr(const int *off) #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) /* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + * + * Details: + * - sizeof() return an integer constant expression, and does not evaluate + * the value of its operand; it only examines the type of its operand. + * - The results of comparing two integer constant expressions is also + * an integer constant expression. + * - The first literal "8" isn't important. It could be any literal value. + * - The second literal "8" is to avoid warnings about unaligned pointers; + * this could otherwise just be "1". + * - (long)(x) is used to avoid warnings about 64-bit types on 32-bit + * architectures. + * - The C Standard defines "null pointer constant", "(void *)0", as + * distinct from other void pointers. + * - If (x) is an integer constant expression, then the "* 0l" resolves + * it into an integer constant expression of value 0. Since it is cast to + * "void *", this makes the second operand a null pointer constant. + * - If (x) is not an integer constant expression, then the second operand + * resolves to a void pointer (but not a null pointer constant: the value + * is not an integer constant 0). + * - The conditional operator's third operand, "(int *)8", is an object + * pointer (to type "int"). + * - The behavior (including the return type) of the conditional operator + * ("operand1 ? operand2 : operand3") depends on the kind of expressions + * given for the second and third operands. This is the central mechanism + * of the macro: + * - When one operand is a null pointer constant (i.e. when x is an integer + * constant expression) and the other is an object pointer (i.e. our + * third operand), the conditional operator returns the type of the + * object pointer operand (i.e. "int *"). Here, within the sizeof(), we + * would then get: + * sizeof(*((int *)(...)) == sizeof(int) == 4 + * - When one operand is a void pointer (i.e. when x is not an integer + * constant expression) and the other is an object pointer (i.e. our + * third operand), the conditional operator returns a "void *" type. + * Here, within the sizeof(), we would then get: + * sizeof(*((void *)(...)) == sizeof(void) == 1 + * - The equality comparison to "sizeof(int)" therefore depends on (x): + * sizeof(int) == sizeof(int) (x) was a constant expression + * sizeof(int) != sizeof(void) (x) was not a constant expression + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +/* * Whether 'type' is a signed type or an unsigned type. Supports scalar types, * bool and also pointer types. */ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 00efa35c350f..8bdf6e0918c1 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -95,6 +95,19 @@ #endif /* + * Optional: only supported since gcc >= 15 + * Optional: only supported since clang >= 18 + * + * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 + * clang: https://github.com/llvm/llvm-project/pull/76348 + */ +#if __has_attribute(__counted_by__) +# define __counted_by(member) __attribute__((__counted_by__(member))) +#else +# define __counted_by(member) +#endif + +/* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 * @@ -130,19 +143,6 @@ #endif /* - * Optional: only supported since gcc >= 14 - * Optional: only supported since clang >= 17 - * - * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://reviews.llvm.org/D148381 - */ -#if __has_attribute(__element_count__) -# define __counted_by(member) __attribute__((__element_count__(#member))) -#else -# define __counted_by(member) -#endif - -/* * Optional: only supported since clang >= 14.0 * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute @@ -334,6 +334,18 @@ #define __section(section) __attribute__((__section__(section))) /* + * Optional: only supported since gcc >= 12 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-uninitialized-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#uninitialized + */ +#if __has_attribute(__uninitialized__) +# define __uninitialized __attribute__((__uninitialized__)) +#else +# define __uninitialized +#endif + +/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 547ea1ff806e..8f8236317d5b 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -2,6 +2,15 @@ #ifndef __LINUX_COMPILER_TYPES_H #define __LINUX_COMPILER_TYPES_H +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + #ifndef __ASSEMBLY__ /* @@ -90,31 +99,48 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute * * When -falign-functions=N is in use, we must avoid the cold attribute as - * contemporary versions of GCC drop the alignment for cold functions. Worse, - * GCC can implicitly mark callees of cold functions as cold themselves, so - * it's not sufficient to add __function_aligned here as that will not ensure - * that callees are correctly aligned. + * GCC drops the alignment for cold functions. Worse, GCC can implicitly mark + * callees of cold functions as cold themselves, so it's not sufficient to add + * __function_aligned here as that will not ensure that callees are correctly + * aligned. * * See: * * https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9 */ -#if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0) +#if defined(CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT) || (CONFIG_FUNCTION_ALIGNMENT == 0) #define __cold __attribute__((__cold__)) #else #define __cold #endif -/* Builtins */ - /* - * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. - * In the meantime, to support gcc < 10, we implement __has_builtin - * by hand. + * On x86-64 and arm64 targets, __preserve_most changes the calling convention + * of a function to make the code in the caller as unintrusive as possible. This + * convention behaves identically to the C calling convention on how arguments + * and return values are passed, but uses a different set of caller- and callee- + * saved registers. + * + * The purpose is to alleviates the burden of saving and recovering a large + * register set before and after the call in the caller. This is beneficial for + * rarely taken slow paths, such as error-reporting functions that may be called + * from hot paths. + * + * Note: This may conflict with instrumentation inserted on function entry which + * does not use __preserve_most or equivalent convention (if in assembly). Since + * function tracing assumes the normal C calling convention, where the attribute + * is supported, __preserve_most implies notrace. It is recommended to restrict + * use of the attribute to functions that should or already disable tracing. + * + * Optional: not supported by gcc. + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#preserve-most */ -#ifndef __has_builtin -#define __has_builtin(x) (0) +#if __has_attribute(__preserve_most__) && (defined(CONFIG_X86_64) || defined(CONFIG_ARM64)) +# define __preserve_most notrace __attribute__((__preserve_most__)) +#else +# define __preserve_most #endif /* Compiler specific macros. */ @@ -252,15 +278,33 @@ struct ftrace_likely_data { # define __no_kcsan #endif +#ifdef __SANITIZE_MEMORY__ +/* + * Similarly to KASAN and KCSAN, KMSAN loses function attributes of inlined + * functions, therefore disabling KMSAN checks also requires disabling inlining. + * + * __no_sanitize_or_inline effectively prevents KMSAN from reporting errors + * within the function and marks all its outputs as initialized. + */ +# define __no_sanitize_or_inline __no_kmsan_checks notrace __maybe_unused +#endif + #ifndef __no_sanitize_or_inline #define __no_sanitize_or_inline __always_inline #endif +/* Do not trap wrapping arithmetic within an annotated function. */ +#ifdef CONFIG_UBSAN_SIGNED_WRAP +# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) +#else +# define __signed_wrap +#endif + /* Section for code which can't be instrumented at all */ #define __noinstr_section(section) \ noinline notrace __attribute((__section__(section))) \ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ - __no_sanitize_memory + __no_sanitize_memory __signed_wrap #define noinstr __noinstr_section(".noinstr.text") @@ -324,8 +368,27 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +/* + * When the size of an allocated object is needed, use the best available + * mechanism to find it. (For cases where sizeof() cannot be used.) + */ +#if __has_builtin(__builtin_dynamic_object_size) +#define __struct_size(p) __builtin_dynamic_object_size(p, 0) +#define __member_size(p) __builtin_dynamic_object_size(p, 1) +#else +#define __struct_size(p) __builtin_object_size(p, 0) +#define __member_size(p) __builtin_object_size(p, 1) +#endif + +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ +#ifndef asm_goto_output +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/include/linux/completion.h b/include/linux/completion.h index 62b32b19e0a8..fb2915676574 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x); extern bool completion_done(struct completion *x); extern void complete(struct completion *); +extern void complete_on_current_cpu(struct completion *x); extern void complete_all(struct completion *); #endif diff --git a/include/linux/connector.h b/include/linux/connector.h index 487350bb19c3..70bc1160f3d8 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -90,13 +90,18 @@ void cn_del_callback(const struct cb_id *id); * If @group is not zero, then message will be delivered * to the specified group. * @gfp_mask: GFP mask. + * @filter: Filter function to be used at netlink layer. + * @filter_data:Filter data to be supplied to the filter function * * It can be safely called from softirq context, but may silently * fail under strong memory pressure. * * If there are no listeners for given group %-ESRCH can be returned. */ -int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask); +int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, + u32 group, gfp_t gfp_mask, + netlink_filter_fn filter, + void *filter_data); /** * cn_netlink_send - Sends message to the specified groups. diff --git a/include/linux/console.h b/include/linux/console.h index d3195664baa5..31a8f5b85f5d 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -18,6 +18,7 @@ #include <linux/bits.h> #include <linux/rculist.h> #include <linux/types.h> +#include <linux/vesa.h> struct vc_data; struct console_font_op; @@ -36,63 +37,91 @@ enum vc_intensity; /** * struct consw - callbacks for consoles * + * @owner: the module to get references of when this console is used + * @con_startup: set up the console and return its name (like VGA, EGA, ...) + * @con_init: initialize the console on @vc. @init is true for the very first + * call on this @vc. + * @con_deinit: deinitialize the console from @vc. + * @con_clear: erase @count characters at [@x, @y] on @vc. @count >= 1. + * @con_putc: emit one character with attributes @ca to [@x, @y] on @vc. + * (optional -- @con_putcs would be called instead) + * @con_putcs: emit @count characters with attributes @s to [@x, @y] on @vc. + * @con_cursor: enable/disable cursor depending on @enable * @con_scroll: move lines from @top to @bottom in direction @dir by @lines. * Return true if no generic handling should be done. * Invoked by csi_M and printing to the console. - * @con_set_palette: sets the palette of the console to @table (optional) + * @con_switch: notifier about the console switch; it is supposed to return + * true if a redraw is needed. + * @con_blank: blank/unblank the console. The target mode is passed in @blank. + * @mode_switch is set if changing from/to text/graphics. The hook + * is supposed to return true if a redraw is needed. + * @con_font_set: set console @vc font to @font with height @vpitch. @flags can + * be %KD_FONT_FLAG_DONT_RECALC. (optional) + * @con_font_get: fetch the current font on @vc of height @vpitch into @font. + * (optional) + * @con_font_default: set default font on @vc. @name can be %NULL or font name + * to search for. @font can be filled back. (optional) + * @con_resize: resize the @vc console to @width x @height. @from_user is true + * when this change comes from the user space. + * @con_set_palette: sets the palette of the console @vc to @table (optional) * @con_scrolldelta: the contents of the console should be scrolled by @lines. * Invoked by user. (optional) + * @con_set_origin: set origin (see &vc_data::vc_origin) of the @vc. If not + * provided or returns false, the origin is set to + * @vc->vc_screenbuf. (optional) + * @con_save_screen: save screen content into @vc->vc_screenbuf. Called e.g. + * upon entering graphics. (optional) + * @con_build_attr: build attributes based on @color, @intensity and other + * parameters. The result is used for both normal and erase + * characters. (optional) + * @con_invert_region: invert a region of length @count on @vc starting at @p. + * (optional) + * @con_debug_enter: prepare the console for the debugger. This includes, but + * is not limited to, unblanking the console, loading an + * appropriate palette, and allowing debugger generated output. + * (optional) + * @con_debug_leave: restore the console to its pre-debug state as closely as + * possible. (optional) */ struct consw { struct module *owner; const char *(*con_startup)(void); - void (*con_init)(struct vc_data *vc, int init); + void (*con_init)(struct vc_data *vc, bool init); void (*con_deinit)(struct vc_data *vc); - void (*con_clear)(struct vc_data *vc, int sy, int sx, int height, - int width); - void (*con_putc)(struct vc_data *vc, int c, int ypos, int xpos); - void (*con_putcs)(struct vc_data *vc, const unsigned short *s, - int count, int ypos, int xpos); - void (*con_cursor)(struct vc_data *vc, int mode); + void (*con_clear)(struct vc_data *vc, unsigned int y, + unsigned int x, unsigned int count); + void (*con_putc)(struct vc_data *vc, u16 ca, unsigned int y, + unsigned int x); + void (*con_putcs)(struct vc_data *vc, const u16 *s, + unsigned int count, unsigned int ypos, + unsigned int xpos); + void (*con_cursor)(struct vc_data *vc, bool enable); bool (*con_scroll)(struct vc_data *vc, unsigned int top, unsigned int bottom, enum con_scroll dir, unsigned int lines); - int (*con_switch)(struct vc_data *vc); - int (*con_blank)(struct vc_data *vc, int blank, int mode_switch); - int (*con_font_set)(struct vc_data *vc, struct console_font *font, - unsigned int vpitch, unsigned int flags); + bool (*con_switch)(struct vc_data *vc); + bool (*con_blank)(struct vc_data *vc, enum vesa_blank_mode blank, + bool mode_switch); + int (*con_font_set)(struct vc_data *vc, + const struct console_font *font, + unsigned int vpitch, unsigned int flags); int (*con_font_get)(struct vc_data *vc, struct console_font *font, unsigned int vpitch); int (*con_font_default)(struct vc_data *vc, - struct console_font *font, char *name); + struct console_font *font, const char *name); int (*con_resize)(struct vc_data *vc, unsigned int width, - unsigned int height, unsigned int user); + unsigned int height, bool from_user); void (*con_set_palette)(struct vc_data *vc, const unsigned char *table); void (*con_scrolldelta)(struct vc_data *vc, int lines); - int (*con_set_origin)(struct vc_data *vc); + bool (*con_set_origin)(struct vc_data *vc); void (*con_save_screen)(struct vc_data *vc); u8 (*con_build_attr)(struct vc_data *vc, u8 color, enum vc_intensity intensity, bool blink, bool underline, bool reverse, bool italic); void (*con_invert_region)(struct vc_data *vc, u16 *p, int count); - u16 *(*con_screen_pos)(const struct vc_data *vc, int offset); - unsigned long (*con_getxy)(struct vc_data *vc, unsigned long position, - int *px, int *py); - /* - * Flush the video console driver's scrollback buffer - */ - void (*con_flush_scrollback)(struct vc_data *vc); - /* - * Prepare the console for the debugger. This includes, but is not - * limited to, unblanking the console, loading an appropriate - * palette, and allowing debugger generated output. - */ - int (*con_debug_enter)(struct vc_data *vc); - /* - * Restore the console to its pre-debug state as closely as possible. - */ - int (*con_debug_leave)(struct vc_data *vc); + void (*con_debug_enter)(struct vc_data *vc); + void (*con_debug_leave)(struct vc_data *vc); }; extern const struct consw *conswitchp; @@ -101,36 +130,32 @@ extern const struct consw dummy_con; /* dummy console buffer */ extern const struct consw vga_con; /* VGA text console */ extern const struct consw newport_con; /* SGI Newport console */ +struct screen_info; +#ifdef CONFIG_VGA_CONSOLE +void vgacon_register_screen(struct screen_info *si); +#else +static inline void vgacon_register_screen(struct screen_info *si) { } +#endif + int con_is_bound(const struct consw *csw); int do_unregister_con_driver(const struct consw *csw); int do_take_over_console(const struct consw *sw, int first, int last, int deflt); void give_up_console(const struct consw *sw); -#ifdef CONFIG_HW_CONSOLE -int con_debug_enter(struct vc_data *vc); -int con_debug_leave(void); +#ifdef CONFIG_VT +void con_debug_enter(struct vc_data *vc); +void con_debug_leave(void); #else -static inline int con_debug_enter(struct vc_data *vc) -{ - return 0; -} -static inline int con_debug_leave(void) -{ - return 0; -} +static inline void con_debug_enter(struct vc_data *vc) { } +static inline void con_debug_leave(void) { } #endif -/* cursor */ -#define CM_DRAW (1) -#define CM_ERASE (2) -#define CM_MOVE (3) - /* * The interface for a console, or any other device that wants to capture * console messages (printer driver?) */ /** - * cons_flags - General console flags + * enum cons_flags - General console flags * @CON_PRINTBUFFER: Used by newly registered consoles to avoid duplicate * output of messages that were already shown by boot * consoles or read by userspace via syslog() syscall. @@ -154,6 +179,10 @@ static inline int con_debug_leave(void) * receiving the printk spam for obvious reasons. * @CON_EXTENDED: The console supports the extended output format of * /dev/kmesg which requires a larger output buffer. + * @CON_SUSPENDED: Indicates if a console is suspended. If true, the + * printing callbacks must not be called. + * @CON_NBCON: Console can operate outside of the legacy style console_lock + * constraints. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @@ -163,6 +192,112 @@ enum cons_flags { CON_ANYTIME = BIT(4), CON_BRL = BIT(5), CON_EXTENDED = BIT(6), + CON_SUSPENDED = BIT(7), + CON_NBCON = BIT(8), +}; + +/** + * struct nbcon_state - console state for nbcon consoles + * @atom: Compound of the state fields for atomic operations + * + * @req_prio: The priority of a handover request + * @prio: The priority of the current owner + * @unsafe: Console is busy in a non takeover region + * @unsafe_takeover: A hostile takeover in an unsafe state happened in the + * past. The console cannot be safe until re-initialized. + * @cpu: The CPU on which the owner runs + * + * To be used for reading and preparing of the value stored in the nbcon + * state variable @console::nbcon_state. + * + * The @prio and @req_prio fields are particularly important to allow + * spin-waiting to timeout and give up without the risk of a waiter being + * assigned the lock after giving up. + */ +struct nbcon_state { + union { + unsigned int atom; + struct { + unsigned int prio : 2; + unsigned int req_prio : 2; + unsigned int unsafe : 1; + unsigned int unsafe_takeover : 1; + unsigned int cpu : 24; + }; + }; +}; + +/* + * The nbcon_state struct is used to easily create and interpret values that + * are stored in the @console::nbcon_state variable. Ensure this struct stays + * within the size boundaries of the atomic variable's underlying type in + * order to avoid any accidental truncation. + */ +static_assert(sizeof(struct nbcon_state) <= sizeof(int)); + +/** + * enum nbcon_prio - console owner priority for nbcon consoles + * @NBCON_PRIO_NONE: Unused + * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage + * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) + * @NBCON_PRIO_PANIC: Panic output + * @NBCON_PRIO_MAX: The number of priority levels + * + * A higher priority context can takeover the console when it is + * in the safe state. The final attempt to flush consoles in panic() + * can be allowed to do so even in an unsafe state (Hope and pray). + */ +enum nbcon_prio { + NBCON_PRIO_NONE = 0, + NBCON_PRIO_NORMAL, + NBCON_PRIO_EMERGENCY, + NBCON_PRIO_PANIC, + NBCON_PRIO_MAX, +}; + +struct console; +struct printk_buffers; + +/** + * struct nbcon_context - Context for console acquire/release + * @console: The associated console + * @spinwait_max_us: Limit for spin-wait acquire + * @prio: Priority of the context + * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can + * be used only with NBCON_PRIO_PANIC @prio. It + * might cause a system freeze when the console + * is used later. + * @backlog: Ringbuffer has pending records + * @pbufs: Pointer to the text buffer for this context + * @seq: The sequence number to print for this context + */ +struct nbcon_context { + /* members set by caller */ + struct console *console; + unsigned int spinwait_max_us; + enum nbcon_prio prio; + unsigned int allow_unsafe_takeover : 1; + + /* members set by emit */ + unsigned int backlog : 1; + + /* members set by acquire */ + struct printk_buffers *pbufs; + u64 seq; +}; + +/** + * struct nbcon_write_context - Context handed to the nbcon write callbacks + * @ctxt: The core console context + * @outbuf: Pointer to the text buffer for output + * @len: Length to write + * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred + */ +struct nbcon_write_context { + struct nbcon_context __private ctxt; + char *outbuf; + unsigned int len; + bool unsafe_takeover; }; /** @@ -184,6 +319,11 @@ enum cons_flags { * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list + * + * @write_atomic: Write callback for atomic context + * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print + * @pbufs: Pointer to nbcon private buffer */ struct console { char name[16]; @@ -203,6 +343,13 @@ struct console { unsigned long dropped; void *data; struct hlist_node node; + + /* nbcon console specific members */ + bool (*write_atomic)(struct console *con, + struct nbcon_write_context *wctxt); + atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; + struct printk_buffers *pbufs; }; #ifdef CONFIG_LOCKDEP @@ -321,7 +468,7 @@ static inline bool console_is_registered(const struct console *con) * for_each_console() - Iterator over registered consoles * @con: struct console pointer used as loop cursor * - * The console list and the console->flags are immutable while iterating. + * The console list and the &console.flags are immutable while iterating. * * Requires console_list_lock to be held. */ @@ -329,6 +476,16 @@ static inline bool console_is_registered(const struct console *con) lockdep_assert_console_list_lock_held(); \ hlist_for_each_entry(con, &console_list, node) +#ifdef CONFIG_PRINTK +extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); +extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); +extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +#else +static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +#endif + extern int console_set_on_cmdline; extern struct console *early_console; @@ -337,7 +494,7 @@ enum con_flush_mode { CONSOLE_REPLAY_ALL, }; -extern int add_preferred_console(char *name, int idx, char *options); +extern int add_preferred_console(const char *name, const short idx, char *options); extern void console_force_preferred_locked(struct console *con); extern void register_console(struct console *); extern int unregister_console(struct console *); @@ -381,12 +538,6 @@ void vcs_remove_sysfs(int index); */ extern atomic_t ignore_console_lock_warning; -/* VESA Blanking Levels */ -#define VESA_NO_BLANKING 0 -#define VESA_VSYNC_SUSPEND 1 -#define VESA_HSYNC_SUSPEND 2 -#define VESA_POWERDOWN 3 - extern void console_init(void); /* For deferred console takeover */ diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 539f1cd45309..20f564e98552 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -151,7 +151,6 @@ struct vc_data { DECLARE_BITMAP(vc_tab_stop, VC_TABSTOPS_COUNT); /* Tab stops. 256 columns. */ unsigned char vc_palette[16*3]; /* Colour palette for VGA+ */ unsigned short * vc_translate; - unsigned int vc_resize_user; /* resize request from user */ unsigned int vc_bell_pitch; /* Console bell pitch */ unsigned int vc_bell_duration; /* Console bell duration */ unsigned short vc_cur_blink_ms; /* Cursor blink duration */ diff --git a/include/linux/const.h b/include/linux/const.h index 435ddd72d2c4..81b8aae5a855 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -3,12 +3,4 @@ #include <vdso/const.h> -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - #endif /* _LINUX_CONST_H */ diff --git a/include/linux/container.h b/include/linux/container.h index 2566a1baa736..dd00cc918a92 100644 --- a/include/linux/container.h +++ b/include/linux/container.h @@ -12,7 +12,7 @@ #include <linux/device.h> /* drivers/base/power/container.c */ -extern struct bus_type container_subsys; +extern const struct bus_type container_subsys; struct container_dev { struct device dev; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index bf70987240e4..5f288d475490 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -6,6 +6,8 @@ #ifndef _LINUX_CORESIGHT_H #define _LINUX_CORESIGHT_H +#include <linux/amba/bus.h> +#include <linux/clk.h> #include <linux/device.h> #include <linux/io.h> #include <linux/perf_event.h> @@ -33,7 +35,7 @@ #define CORESIGHT_UNLOCK 0xc5acce55 -extern struct bus_type coresight_bustype; +extern const struct bus_type coresight_bustype; enum coresight_dev_type { CORESIGHT_DEV_TYPE_SINK, @@ -62,6 +64,7 @@ enum coresight_dev_subtype_source { CORESIGHT_DEV_SUBTYPE_SOURCE_PROC, CORESIGHT_DEV_SUBTYPE_SOURCE_BUS, CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE, + CORESIGHT_DEV_SUBTYPE_SOURCE_TPDM, CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS, }; @@ -223,13 +226,26 @@ struct coresight_sysfs_link { * by @coresight_ops. * @access: Device i/o access abstraction for this device. * @dev: The device entity associated to this component. - * @refcnt: keep track of what is in use. + * @mode: This tracer's mode, i.e sysFS, Perf or disabled. This is + * actually an 'enum cs_mode', but is stored in an atomic type. + * This is always accessed through local_read() and local_set(), + * but wherever it's done from within the Coresight device's lock, + * a non-atomic read would also work. This is the main point of + * synchronisation between code happening inside the sysfs mode's + * coresight_mutex and outside when running in Perf mode. A compare + * and exchange swap is done to atomically claim one mode or the + * other. + * @refcnt: keep track of what is in use. Only access this outside of the + * device's spinlock when the coresight_mutex held and mode == + * CS_MODE_SYSFS. Otherwise it must be accessed from inside the + * spinlock. * @orphan: true if the component has connections that haven't been linked. - * @enable: 'true' if component is currently part of an active path. - * @activated: 'true' only if a _sink_ has been activated. A sink can be - * activated but not yet enabled. Enabling for a _sink_ - * happens when a source has been selected and a path is enabled - * from source to that sink. + * @sysfs_sink_activated: 'true' when a sink has been selected for use via sysfs + * by writing a 1 to the 'enable_sink' file. A sink can be + * activated but not yet enabled. Enabling for a _sink_ happens + * when a source has been selected and a path is enabled from + * source to that sink. A sink can also become enabled but not + * activated if it's used via Perf. * @ea: Device attribute for sink representation under PMU directory. * @def_sink: cached reference to default sink found for this device. * @nr_links: number of sysfs links created to other components from this @@ -247,11 +263,11 @@ struct coresight_device { const struct coresight_ops *ops; struct csdev_access access; struct device dev; - atomic_t refcnt; + local_t mode; + int refcnt; bool orphan; - bool enable; /* true only if configured as part of a path */ /* sink specific fields */ - bool activated; /* true only if a sink is part of a path */ + bool sysfs_sink_activated; struct dev_ext_attribute *ea; struct coresight_device *def_sink; /* sysfs links between components */ @@ -375,8 +391,6 @@ struct coresight_ops { const struct coresight_ops_helper *helper_ops; }; -#if IS_ENABLED(CONFIG_CORESIGHT) - static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, u32 offset) { @@ -386,6 +400,63 @@ static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, return csa->read(offset, true, false); } +#define CORESIGHT_CIDRn(i) (0xFF0 + ((i) * 4)) + +static inline u32 coresight_get_cid(void __iomem *base) +{ + u32 i, cid = 0; + + for (i = 0; i < 4; i++) + cid |= readl(base + CORESIGHT_CIDRn(i)) << (i * 8); + + return cid; +} + +static inline bool is_coresight_device(void __iomem *base) +{ + u32 cid = coresight_get_cid(base); + + return cid == CORESIGHT_CID; +} + +/* + * Attempt to find and enable "APB clock" for the given device + * + * Returns: + * + * clk - Clock is found and enabled + * NULL - clock is not found + * ERROR - Clock is found but failed to enable + */ +static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) +{ + struct clk *pclk; + int ret; + + pclk = clk_get(dev, "apb_pclk"); + if (IS_ERR(pclk)) + return NULL; + + ret = clk_prepare_enable(pclk); + if (ret) { + clk_put(pclk); + return ERR_PTR(ret); + } + return pclk; +} + +#define CORESIGHT_PIDRn(i) (0xFE0 + ((i) * 4)) + +static inline u32 coresight_get_pid(struct csdev_access *csa) +{ + u32 i, pid = 0; + + for (i = 0; i < 4; i++) + pid |= csdev_access_relaxed_read32(csa, CORESIGHT_PIDRn(i)) << (i * 8); + + return pid; +} + static inline u64 csdev_access_relaxed_read_pair(struct csdev_access *csa, u32 lo_offset, u32 hi_offset) { @@ -509,11 +580,43 @@ static inline bool coresight_is_percpu_sink(struct coresight_device *csdev) (csdev->subtype.sink_subtype == CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM); } +/* + * Atomically try to take the device and set a new mode. Returns true on + * success, false if the device is already taken by someone else. + */ +static inline bool coresight_take_mode(struct coresight_device *csdev, + enum cs_mode new_mode) +{ + return local_cmpxchg(&csdev->mode, CS_MODE_DISABLED, new_mode) == + CS_MODE_DISABLED; +} + +static inline enum cs_mode coresight_get_mode(struct coresight_device *csdev) +{ + return local_read(&csdev->mode); +} + +static inline void coresight_set_mode(struct coresight_device *csdev, + enum cs_mode new_mode) +{ + enum cs_mode current_mode = coresight_get_mode(csdev); + + /* + * Changing to a new mode must be done from an already disabled state + * unless it's synchronized with coresight_take_mode(). Otherwise the + * device is already in use and signifies a locking issue. + */ + WARN(new_mode != CS_MODE_DISABLED && current_mode != CS_MODE_DISABLED && + current_mode != new_mode, "Device already in use\n"); + + local_set(&csdev->mode, new_mode); +} + extern struct coresight_device * coresight_register(struct coresight_desc *desc); extern void coresight_unregister(struct coresight_device *csdev); -extern int coresight_enable(struct coresight_device *csdev); -extern void coresight_disable(struct coresight_device *csdev); +extern int coresight_enable_sysfs(struct coresight_device *csdev); +extern void coresight_disable_sysfs(struct coresight_device *csdev); extern int coresight_timeout(struct csdev_access *csa, u32 offset, int position, int value); @@ -538,83 +641,6 @@ void coresight_relaxed_write64(struct coresight_device *csdev, u64 val, u32 offset); void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset); -#else -static inline struct coresight_device * -coresight_register(struct coresight_desc *desc) { return NULL; } -static inline void coresight_unregister(struct coresight_device *csdev) {} -static inline int -coresight_enable(struct coresight_device *csdev) { return -ENOSYS; } -static inline void coresight_disable(struct coresight_device *csdev) {} - -static inline int coresight_timeout(struct csdev_access *csa, u32 offset, - int position, int value) -{ - return 1; -} - -static inline int coresight_claim_device_unlocked(struct coresight_device *csdev) -{ - return -EINVAL; -} - -static inline int coresight_claim_device(struct coresight_device *csdev) -{ - return -EINVAL; -} - -static inline void coresight_disclaim_device(struct coresight_device *csdev) {} -static inline void coresight_disclaim_device_unlocked(struct coresight_device *csdev) {} - -static inline bool coresight_loses_context_with_cpu(struct device *dev) -{ - return false; -} - -static inline u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline u32 coresight_read32(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset) -{ -} - -static inline void coresight_relaxed_write32(struct coresight_device *csdev, - u32 val, u32 offset) -{ -} - -static inline u64 coresight_relaxed_read64(struct coresight_device *csdev, - u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline u64 coresight_read64(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline void coresight_relaxed_write64(struct coresight_device *csdev, - u64 val, u32 offset) -{ -} - -static inline void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset) -{ -} - -#endif /* IS_ENABLED(CONFIG_CORESIGHT) */ - extern int coresight_get_cpu(struct device *dev); struct coresight_platform_data *coresight_get_platform_data(struct device *dev); diff --git a/include/linux/counter.h b/include/linux/counter.h index b63746637de2..702e9108bbb4 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -399,7 +399,7 @@ struct counter_device { struct mutex ops_exist_lock; }; -void *counter_priv(const struct counter_device *const counter); +void *counter_priv(const struct counter_device *const counter) __attribute_const__; struct counter_device *counter_alloc(size_t sizeof_priv); void counter_put(struct counter_device *const counter); diff --git a/include/linux/cper.h b/include/linux/cper.h index c1a7dc325121..265b0f8fc0b3 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -90,6 +90,29 @@ enum { GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CPER_SEC_CXL_GEN_MEDIA_GUID \ + GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CPER_SEC_CXL_DRAM_GUID \ + GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CPER_SEC_CXL_MEM_MODULE_GUID \ + GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) + /* * Flags bits definitions for flags in struct cper_record_header * If set, the error has been recovered diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e006c719182b..861c3bfc5f17 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -18,6 +18,7 @@ #include <linux/compiler.h> #include <linux/cpumask.h> #include <linux/cpuhotplug.h> +#include <linux/cpu_smt.h> struct device; struct device_node; @@ -74,17 +75,26 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern bool arch_cpu_is_hotpluggable(int cpu); +extern int arch_register_cpu(int cpu); +extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); extern ssize_t arch_cpu_release(const char *, size_t); #endif +#ifdef CONFIG_GENERIC_CPU_DEVICES +DECLARE_PER_CPU(struct cpu, cpu_devices); +#endif + /* * These states are not related to the core CPU hotplug mechanism. They are * used by various (sub)architectures to track internal state @@ -104,7 +114,7 @@ void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); -void bringup_nonboot_cpus(unsigned int setup_max_cpus); +void bringup_nonboot_cpus(unsigned int max_cpus); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -120,7 +130,7 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} #endif /* CONFIG_SMP */ -extern struct bus_type cpu_subsys; +extern const struct bus_type cpu_subsys; extern int lockdep_is_cpus_held(void); @@ -152,6 +162,8 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); @@ -186,6 +198,8 @@ void arch_cpu_idle(void); void arch_cpu_idle_prepare(void); void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); +void arch_tick_broadcast_enter(void); +void arch_tick_broadcast_exit(void); void __noreturn arch_cpu_idle_dead(void); #ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT @@ -194,7 +208,6 @@ void arch_cpu_finalize_init(void); static inline void arch_cpu_finalize_init(void) { } #endif -void cpu_set_state_online(int cpu); void play_idle_precise(u64 duration_ns, u64 latency_ns); static inline void play_idle(unsigned long duration_us) @@ -208,31 +221,18 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -enum cpuhp_smt_control { - CPU_SMT_ENABLED, - CPU_SMT_DISABLED, - CPU_SMT_FORCE_DISABLED, - CPU_SMT_NOT_SUPPORTED, - CPU_SMT_NOT_IMPLEMENTED, -}; - -#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) -extern enum cpuhp_smt_control cpu_smt_control; -extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology(void); -extern bool cpu_smt_possible(void); -extern int cpuhp_smt_enable(void); -extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); -#else -# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) -static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology(void) { } -static inline bool cpu_smt_possible(void) { return false; } -static inline int cpuhp_smt_enable(void) { return 0; } -static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } -#endif - +#ifdef CONFIG_CPU_MITIGATIONS extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); +#else +static inline bool cpu_mitigations_off(void) +{ + return true; +} +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return false; +} +#endif #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h new file mode 100644 index 000000000000..0c1664294b57 --- /dev/null +++ b/include/linux/cpu_smt.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CPU_SMT_H_ +#define _LINUX_CPU_SMT_H_ + +enum cpuhp_smt_control { + CPU_SMT_ENABLED, + CPU_SMT_DISABLED, + CPU_SMT_FORCE_DISABLED, + CPU_SMT_NOT_SUPPORTED, + CPU_SMT_NOT_IMPLEMENTED, +}; + +#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) +extern enum cpuhp_smt_control cpu_smt_control; +extern unsigned int cpu_smt_num_threads; +extern void cpu_smt_disable(bool force); +extern void cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads); +extern bool cpu_smt_possible(void); +extern int cpuhp_smt_enable(void); +extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); +#else +# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) +# define cpu_smt_num_threads 1 +static inline void cpu_smt_disable(bool force) { } +static inline void cpu_smt_set_num_threads(unsigned int num_threads, + unsigned int max_threads) { } +static inline bool cpu_smt_possible(void) { return false; } +static inline int cpuhp_smt_enable(void) { return 0; } +static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } +#endif + +#endif /* _LINUX_CPU_SMT_H_ */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 172ff51c1b2a..9956afb9acc2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -19,6 +19,7 @@ #include <linux/pm_qos.h> #include <linux/spinlock.h> #include <linux/sysfs.h> +#include <linux/minmax.h> /********************************************************************* * CPUFREQ INTERFACE * @@ -140,6 +141,9 @@ struct cpufreq_policy { */ bool dvfs_possible_from_any_cpu; + /* Per policy boost enabled flag. */ + bool boost_enabled; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; unsigned int cached_resolved_idx; @@ -259,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void) return false; } static inline void disable_cpufreq(void) { } +static inline void cpufreq_update_limits(unsigned int cpu) { } #endif #ifdef CONFIG_CPU_FREQ_STAT @@ -370,7 +375,7 @@ struct cpufreq_driver { int (*target_intermediate)(struct cpufreq_policy *policy, unsigned int index); - /* should be defined, if possible */ + /* should be defined, if possible, return 0 on error */ unsigned int (*get)(unsigned int cpu); /* Called to update policy limits on firmware notifications. */ @@ -467,17 +472,8 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *poli unsigned int min, unsigned int max) { - if (policy->min < min) - policy->min = min; - if (policy->max < min) - policy->max = min; - if (policy->min > max) - policy->min = max; - if (policy->max > max) - policy->max = max; - if (policy->min > policy->max) - policy->min = policy->max; - return; + policy->max = clamp(policy->max, min, max); + policy->min = clamp(policy->min, min, policy->max); } static inline void @@ -573,9 +569,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, /* * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. The - * ondemand governor will work on any processor with transition latency <= 10ms, - * using appropriate sampling rate. + * polling frequency is 1000 times the transition latency of the processor. */ #define LATENCY_MULTIPLIER (1000) @@ -699,26 +693,6 @@ struct cpufreq_frequency_table { * order */ }; -#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) -int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table); -void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table); -#else -static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table - **table) -{ - return -EINVAL; -} - -static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table - **table) -{ -} -#endif - /* * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table * @pos: the cpufreq_frequency_table * to use as a loop cursor. @@ -1026,6 +1000,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +{ + unsigned int freq; + + if (idx < 0) + return false; + + freq = policy->freq_table[idx].frequency; + + return freq == clamp_val(freq, policy->min, policy->max); +} + static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -1059,7 +1045,8 @@ retry: return 0; } - if (idx < 0 && efficiencies) { + /* Limit frequency index to honor policy->min/max */ + if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { efficiencies = false; goto retry; } @@ -1154,8 +1141,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ if (ret < 0) continue; - if (pargs->np == args.np && pargs->args_count == args.args_count && - !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count)) + if (of_phandle_args_equal(pargs, &args)) cpumask_set_cpu(cpu, cpumask); of_node_put(args.np); @@ -1198,14 +1184,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ } #endif -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov); -#else -static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov) { } -#endif - extern unsigned int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale @@ -1216,6 +1194,7 @@ void arch_set_freq_scale(const struct cpumask *cpus, { } #endif + /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 25b6e6e6ba6b..35e78ddb2b37 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -48,7 +48,7 @@ * same section. * * If neither #1 nor #2 apply, please use the dynamic state space when - * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE + * setting up a state by using CPUHP_BP_PREPARE_DYN or CPUHP_AP_ONLINE_DYN * for the @state argument of the setup function. * * See Documentation/core-api/cpu_hotplug.rst for further information and @@ -66,15 +66,12 @@ enum cpuhp_state { CPUHP_PERF_POWER, CPUHP_PERF_SUPERH, CPUHP_X86_HPET_DEAD, - CPUHP_X86_APB_DEAD, CPUHP_X86_MCE_DEAD, CPUHP_VIRT_NET_DEAD, CPUHP_IBMVNIC_DEAD, CPUHP_SLUB_DEAD, CPUHP_DEBUG_OBJ_DEAD, CPUHP_MM_WRITEBACK_DEAD, - /* Must be after CPUHP_MM_VMSTAT_DEAD */ - CPUHP_MM_DEMOTION_DEAD, CPUHP_MM_VMSTAT_DEAD, CPUHP_SOFTIRQ_DEAD, CPUHP_NET_MVNETA_DEAD, @@ -90,14 +87,12 @@ enum cpuhp_state { CPUHP_FS_BUFF_DEAD, CPUHP_PRINTK_DEAD, CPUHP_MM_MEMCQ_DEAD, - CPUHP_XFS_DEAD, CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_IOVA_DEAD, - CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, CPUHP_AP_DTPM_CPU_DEAD, @@ -109,7 +104,6 @@ enum cpuhp_state { CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, - CPUHP_SLAB_PREPARE, CPUHP_MD_RAID5_PREPARE, CPUHP_RCUTREE_PREP, CPUHP_CPUIDLE_COUPLED_PREPARE, @@ -119,13 +113,11 @@ enum cpuhp_state { CPUHP_XEN_EVTCHN_PREPARE, CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, - CPUHP_NET_FLOW_PREPARE, CPUHP_TOPOLOGY_PREPARE, CPUHP_NET_IUCV_PREPARE, CPUHP_ARM_BL_PREPARE, CPUHP_TRACE_RB_PREPARE, CPUHP_MM_ZS_PREPARE, - CPUHP_MM_ZSWP_MEM_PREPARE, CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, @@ -152,18 +144,14 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, - CPUHP_AP_IRQ_RISCV_STARTING, CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, - CPUHP_AP_MICROCODE_LOADER, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, CPUHP_AP_PERF_X86_AMD_IBS_STARTING, - CPUHP_AP_PERF_X86_CQM_STARTING, CPUHP_AP_PERF_X86_CSTATE_STARTING, CPUHP_AP_PERF_XTENSA_STARTING, - CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_VFP_STARTING, CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, @@ -173,13 +161,13 @@ enum cpuhp_state { CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, + CPUHP_AP_ARM_ARCH_TIMER_EVTSTRM_STARTING, CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, CPUHP_AP_JCORE_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_TEGRA_TIMER_STARTING, CPUHP_AP_ARMADA_TIMER_STARTING, - CPUHP_AP_MARCO_TIMER_STARTING, CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_RISCV_TIMER_STARTING, @@ -190,10 +178,13 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, + CPUHP_AP_ARM_XEN_RUNSTATE_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, + CPUHP_AP_HRTIMERS_DYING, + CPUHP_AP_TICK_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, @@ -205,7 +196,6 @@ enum cpuhp_state { CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, - CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_BLK_MQ_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, @@ -216,9 +206,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, CPUHP_AP_PERF_X86_RAPL_ONLINE, - CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, - CPUHP_AP_PERF_X86_IDXD_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, @@ -244,15 +232,14 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, CPUHP_AP_PERF_CSKY_ONLINE, + CPUHP_AP_TMIGR_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, - CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, - /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */ - CPUHP_AP_MM_DEMOTION_ONLINE, + CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40, CPUHP_AP_X86_HPET_ONLINE, CPUHP_AP_X86_KVM_CLK_ONLINE, CPUHP_AP_ACTIVE, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f10fb87d49db..1c29947db848 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -4,9 +4,10 @@ /* * Cpumasks provide a bitmap suitable for representing the - * set of CPU's in a system, one bit position per CPU number. In general, + * set of CPUs in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ +#include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/threads.h> #include <linux/bitmap.h> @@ -97,7 +98,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * - * The cpu_possible_mask is fixed at boot time, as the set of CPU id's + * The cpu_possible_mask is fixed at boot time, as the set of CPU IDs * that it is possible might ever be plugged in at anytime during the * life of that system boot. The cpu_present_mask is dynamic(*), * representing which CPUs are currently plugged in. And @@ -112,7 +113,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: - * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode + * 1) UP ARCHes (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() @@ -155,7 +156,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { @@ -166,7 +167,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) * cpumask_first_zero - get the first unset cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if all cpus are set. + * Return: >= nr_cpu_ids if all cpus are set. */ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { @@ -178,7 +179,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) * @srcp1: the first input * @srcp2: the second input * - * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -190,7 +191,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer * - * Returns >= nr_cpumask_bits if no CPUs set. + * Return: >= nr_cpumask_bits if no CPUs set. */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { @@ -199,10 +200,10 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) /** * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set. + * Return: >= nr_cpu_ids if no further cpus set. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) @@ -215,10 +216,10 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) /** * cpumask_next_zero - get the next unset cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus unset. + * Return: >= nr_cpu_ids if no further cpus unset. */ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { @@ -254,11 +255,11 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp); /** * cpumask_next_and - get the next cpu in *src1p & *src2p - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @src1p: the first cpumask pointer * @src2p: the second cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set in both. + * Return: >= nr_cpu_ids if no further cpus set in both. */ static inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, @@ -373,7 +374,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * @cpu: the cpu to ignore. * * Often used to find any cpu but smp_processor_id() in a mask. - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) @@ -388,11 +389,11 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } /** - * cpumask_nth - get the first cpu in a cpumask + * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { @@ -400,12 +401,12 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s } /** - * cpumask_nth_and - get the first cpu in 2 cpumasks + * cpumask_nth_and - get the Nth cpu in 2 cpumasks * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, @@ -416,12 +417,12 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, } /** - * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd. + * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd. * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -436,9 +437,9 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer * @srcp3: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static __always_inline unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -497,7 +498,7 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in @cpumask, else returns false + * Return: true if @cpu is set in @cpumask, else returns false */ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { @@ -509,9 +510,9 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_set_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { @@ -523,9 +524,9 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_clear_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { @@ -560,7 +561,7 @@ static inline void cpumask_clear(struct cpumask *dstp) * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, @@ -603,7 +604,7 @@ static inline void cpumask_xor(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, @@ -617,6 +618,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp, * cpumask_equal - *src1p == *src2p * @src1p: the first input * @src2p: the second input + * + * Return: true if the cpumasks are equal, false if not */ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) @@ -630,6 +633,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * @src3p: the third input + * + * Return: true if first cpumask ORed with second cpumask == third cpumask, + * otherwise false */ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, @@ -643,6 +649,9 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, * cpumask_intersects - (*src1p & *src2p) != 0 * @src1p: the first input * @src2p: the second input + * + * Return: true if first cpumask ANDed with second cpumask is non-empty, + * otherwise false */ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) @@ -656,7 +665,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * - * Returns true if *@src1p is a subset of *@src2p, else returns false + * Return: true if *@src1p is a subset of *@src2p, else returns false */ static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) @@ -668,6 +677,8 @@ static inline bool cpumask_subset(const struct cpumask *src1p, /** * cpumask_empty - *srcp == 0 * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. + * + * Return: true if srcp is empty (has no bits set), else false */ static inline bool cpumask_empty(const struct cpumask *srcp) { @@ -677,6 +688,8 @@ static inline bool cpumask_empty(const struct cpumask *srcp) /** * cpumask_full - *srcp == 0xFFFFFFFF... * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. + * + * Return: true if srcp is full (has all bits set), else false */ static inline bool cpumask_full(const struct cpumask *srcp) { @@ -686,6 +699,8 @@ static inline bool cpumask_full(const struct cpumask *srcp) /** * cpumask_weight - Count of bits in *srcp * @srcp: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in *srcp */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { @@ -696,6 +711,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2) * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 */ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -704,6 +721,19 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, } /** + * cpumask_weight_andnot - Count of bits in (*srcp1 & ~*srcp2) + * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. + * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 + */ +static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); +} + +/** * cpumask_shift_right - *dstp = *srcp >> n * @dstp: the cpumask result * @srcp: the input to shift @@ -744,7 +774,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * cpumask_any - pick a "random" cpu from *srcp * @srcp: the input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any(srcp) cpumask_first(srcp) @@ -753,7 +783,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @mask1: the first input cpumask * @mask2: the second input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) @@ -769,7 +799,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) @@ -783,7 +813,7 @@ static inline int cpumask_parse_user(const char __user *buf, int len, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) @@ -797,7 +827,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) { @@ -809,7 +839,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { @@ -817,7 +847,9 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) } /** - * cpumask_size - size to allocate for a 'struct cpumask' in bytes + * cpumask_size - calculate size to allocate for a 'struct cpumask' in bytes + * + * Return: size to allocate for a &struct cpumask in bytes */ static inline unsigned int cpumask_size(void) { @@ -831,7 +863,7 @@ static inline unsigned int cpumask_size(void) * little more difficult, we typedef cpumask_var_t to an array or a * pointer: doing &mask on an array is a noop, so it still works. * - * ie. + * i.e. * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; @@ -887,6 +919,8 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) * a nop returning a constant 1 (in <linux/cpumask.h>). * * See alloc_cpumask_var_node. + * + * Return: %true if allocation succeeded, %false if not */ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) @@ -957,6 +991,8 @@ static inline bool cpumask_available(cpumask_var_t mask) } #endif /* CONFIG_CPUMASK_OFFSTACK */ +DEFINE_FREE(free_cpumask_var, struct cpumask *, if (_T) free_cpumask_var(_T)); + /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); @@ -1025,7 +1061,7 @@ set_cpu_dying(unsigned int cpu, bool dying) } /** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap * * There are a few places where cpumask_var_t isn't appropriate and @@ -1068,6 +1104,8 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) * interface gives only a momentary snapshot and is not protected against * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held * region. + * + * Return: momentary snapshot of the number of online CPUs */ static __always_inline unsigned int num_online_cpus(void) { @@ -1160,7 +1198,7 @@ static inline bool cpu_dying(unsigned int cpu) * @mask: the cpumask to copy * @buf: the buffer to copy into * - * Returns the length of the (null-terminated) @buf string, zero if + * Return: the length of the (null-terminated) @buf string, zero if * nothing is copied. */ static inline ssize_t @@ -1183,7 +1221,7 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * cpumask; Typically used by bin_attribute to export cpumask bitmask * ABI. * - * Returns the length of how many bytes have been copied, excluding + * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ static inline ssize_t @@ -1204,6 +1242,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, * * Everything is same with the above cpumap_print_bitmask_to_buf() * except the print format. + * + * Return: the length of how many bytes have been copied, excluding + * terminating '\0'. */ static inline ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index d629094fac6e..0ce6ff0d9c9a 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -77,6 +77,7 @@ extern void cpuset_lock(void); extern void cpuset_unlock(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p); +extern bool cpuset_cpu_is_isolated(int cpu); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -120,11 +121,6 @@ static inline int cpuset_do_page_mem_spread(void) return task_spread_page(current); } -static inline int cpuset_do_slab_mem_spread(void) -{ - return task_spread_slab(current); -} - extern bool current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); @@ -207,6 +203,11 @@ static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p) return false; } +static inline bool cpuset_cpu_is_isolated(int cpu) +{ + return false; +} + static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { return node_possible_map; @@ -258,11 +259,6 @@ static inline int cpuset_do_page_mem_spread(void) return 0; } -static inline int cpuset_do_slab_mem_spread(void) -{ - return 0; -} - static inline bool current_cpuset_is_being_rebound(void) { return false; diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index de62a722431e..d33352c2e386 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -6,82 +6,90 @@ #include <linux/elfcore.h> #include <linux/elf.h> -#define CRASH_CORE_NOTE_NAME "CORE" -#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) -#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) -#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) +struct kimage; +struct crash_mem { + unsigned int max_nr_ranges; + unsigned int nr_ranges; + struct range ranges[] __counted_by(max_nr_ranges); +}; + +#ifdef CONFIG_CRASH_DUMP + +int crash_shrink_memory(unsigned long new_size); +ssize_t crash_get_memory_size(void); + +#ifndef arch_kexec_protect_crashkres /* - * The per-cpu notes area is a list of notes terminated by a "NULL" - * note header. For kdump, the code in vmcore.c runs in the context - * of the second kernel to combine them into one note. + * Protection mechanism for crashkernel reserved memory after + * the kdump kernel is loaded. + * + * Provide an empty default implementation here -- architecture + * code may override this */ -#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ - CRASH_CORE_NOTE_NAME_BYTES + \ - CRASH_CORE_NOTE_DESC_BYTES) - -#define VMCOREINFO_BYTES PAGE_SIZE -#define VMCOREINFO_NOTE_NAME "VMCOREINFO" -#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) -#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ - VMCOREINFO_NOTE_NAME_BYTES + \ - VMCOREINFO_BYTES) - -typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; - -void crash_update_vmcoreinfo_safecopy(void *ptr); -void crash_save_vmcoreinfo(void); -void arch_crash_save_vmcoreinfo(void); -__printf(1, 2) -void vmcoreinfo_append_str(const char *fmt, ...); -phys_addr_t paddr_vmcoreinfo_note(void); - -#define VMCOREINFO_OSRELEASE(value) \ - vmcoreinfo_append_str("OSRELEASE=%s\n", value) -#define VMCOREINFO_BUILD_ID() \ - ({ \ - static_assert(sizeof(vmlinux_build_id) == 20); \ - vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ - }) - -#define VMCOREINFO_PAGESIZE(value) \ - vmcoreinfo_append_str("PAGESIZE=%ld\n", value) -#define VMCOREINFO_SYMBOL(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) -#define VMCOREINFO_SYMBOL_ARRAY(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) -#define VMCOREINFO_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(name)) -#define VMCOREINFO_STRUCT_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(struct name)) -#define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -#define VMCOREINFO_TYPE_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(name, field)) -#define VMCOREINFO_LENGTH(name, value) \ - vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) -#define VMCOREINFO_NUMBER(name) \ - vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) -#define VMCOREINFO_CONFIG(name) \ - vmcoreinfo_append_str("CONFIG_%s=y\n", #name) - -extern unsigned char *vmcoreinfo_data; -extern size_t vmcoreinfo_size; -extern u32 *vmcoreinfo_note; - -Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, - void *data, size_t data_len); -void final_note(Elf_Word *buf); - -int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); +static inline void arch_kexec_protect_crashkres(void) { } +#endif + +#ifndef arch_kexec_unprotect_crashkres +static inline void arch_kexec_unprotect_crashkres(void) { } +#endif + + + +#ifndef arch_crash_handle_hotplug_event +static inline void arch_crash_handle_hotplug_event(struct kimage *image) { } +#endif + +int crash_check_update_elfcorehdr(void); + +#ifndef crash_hotplug_cpu_support +static inline int crash_hotplug_cpu_support(void) { return 0; } +#endif + +#ifndef crash_hotplug_memory_support +static inline int crash_hotplug_memory_support(void) { return 0; } +#endif + +#ifndef crash_get_elfcorehdr_size +static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; } +#endif + +/* Alignment required for elf header segment */ +#define ELF_CORE_HEADER_ALIGN 4096 + +extern int crash_exclude_mem_range(struct crash_mem *mem, + unsigned long long mstart, + unsigned long long mend); +extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, + void **addr, unsigned long *sz); + +struct kimage; +struct kexec_segment; + +#define KEXEC_CRASH_HP_NONE 0 +#define KEXEC_CRASH_HP_ADD_CPU 1 +#define KEXEC_CRASH_HP_REMOVE_CPU 2 +#define KEXEC_CRASH_HP_ADD_MEMORY 3 +#define KEXEC_CRASH_HP_REMOVE_MEMORY 4 +#define KEXEC_CRASH_HP_INVALID_CPU -1U + +extern void __crash_kexec(struct pt_regs *regs); +extern void crash_kexec(struct pt_regs *regs); +int kexec_should_crash(struct task_struct *p); +int kexec_crash_loaded(void); +void crash_save_cpu(struct pt_regs *regs, int cpu); +extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); + +#else /* !CONFIG_CRASH_DUMP*/ +struct pt_regs; +struct task_struct; +struct kimage; +static inline void __crash_kexec(struct pt_regs *regs) { } +static inline void crash_kexec(struct pt_regs *regs) { } +static inline int kexec_should_crash(struct task_struct *p) { return 0; } +static inline int kexec_crash_loaded(void) { return 0; } +static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {}; +static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; }; +#endif /* CONFIG_CRASH_DUMP*/ #endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0f3a656293b0..acc55626afdc 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -50,6 +50,7 @@ void vmcore_cleanup(void); #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif +#ifndef is_kdump_kernel /* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel @@ -64,6 +65,7 @@ static inline bool is_kdump_kernel(void) { return elfcorehdr_addr != ELFCORE_ADDR_MAX; } +#endif /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. @@ -75,7 +77,8 @@ static inline bool is_kdump_kernel(void) static inline int is_vmcore_usable(void) { - return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; + return elfcorehdr_addr != ELFCORE_ADDR_ERR && + elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0; } /* vmcore_unusable() marks the vmcore as unusable, @@ -84,8 +87,7 @@ static inline int is_vmcore_usable(void) static inline void vmcore_unusable(void) { - if (is_kdump_kernel()) - elfcorehdr_addr = ELFCORE_ADDR_ERR; + elfcorehdr_addr = ELFCORE_ADDR_ERR; } /** diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h new file mode 100644 index 000000000000..5a9df944fb80 --- /dev/null +++ b/include/linux/crash_reserve.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_CRASH_RESERVE_H +#define LINUX_CRASH_RESERVE_H + +#include <linux/linkage.h> +#include <linux/elfcore.h> +#include <linux/elf.h> +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#include <asm/crash_reserve.h> +#endif + +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; + +int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base, + unsigned long long *low_size, bool *high); + +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#ifndef CRASH_ALIGN +#define CRASH_ALIGN SZ_2M +#endif +#ifndef CRASH_ADDR_LOW_MAX +#define CRASH_ADDR_LOW_MAX SZ_4G +#endif +#ifndef CRASH_ADDR_HIGH_MAX +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() +#endif + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); +#else +static inline void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{} +#endif +#endif /* LINUX_CRASH_RESERVE_H */ diff --git a/include/linux/crc-ccitt.h b/include/linux/crc-ccitt.h index 72c92c396bb8..cd4f420231ba 100644 --- a/include/linux/crc-ccitt.h +++ b/include/linux/crc-ccitt.h @@ -5,19 +5,12 @@ #include <linux/types.h> extern u16 const crc_ccitt_table[256]; -extern u16 const crc_ccitt_false_table[256]; extern u16 crc_ccitt(u16 crc, const u8 *buffer, size_t len); -extern u16 crc_ccitt_false(u16 crc, const u8 *buffer, size_t len); static inline u16 crc_ccitt_byte(u16 crc, const u8 c) { return (crc >> 8) ^ crc_ccitt_table[(crc ^ c) & 0xff]; } -static inline u16 crc_ccitt_false_byte(u16 crc, const u8 c) -{ - return (crc << 8) ^ crc_ccitt_false_table[(crc >> 8) ^ c]; -} - #endif /* _LINUX_CRC_CCITT_H */ diff --git a/include/linux/cred.h b/include/linux/cred.h index 9ed9232af934..2976f534a7a3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/key.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/uidgid.h> #include <linux/sched.h> #include <linux/sched/user.h> @@ -23,7 +24,7 @@ struct inode; * COW Supplementary groups list */ struct group_info { - atomic_t usage; + refcount_t usage; int ngroups; kgid_t gid[]; } __randomize_layout; @@ -39,7 +40,7 @@ struct group_info { */ static inline struct group_info *get_group_info(struct group_info *gi) { - atomic_inc(&gi->usage); + refcount_inc(&gi->usage); return gi; } @@ -49,7 +50,7 @@ static inline struct group_info *get_group_info(struct group_info *gi) */ #define put_group_info(group_info) \ do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ + if (refcount_dec_and_test(&(group_info)->usage)) \ groups_free(group_info); \ } while (0) @@ -108,14 +109,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) * same context as task->real_cred. */ struct cred { - atomic_t usage; -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_t subscribers; /* number of processes subscribed */ - void *put_addr; - unsigned magic; -#define CRED_MAGIC 0x43736564 -#define CRED_MAGIC_DEAD 0x44656144 -#endif + atomic_long_t usage; kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ @@ -164,7 +158,6 @@ extern void abort_creds(struct cred *); extern const struct cred *override_creds(const struct cred *); extern void revert_creds(const struct cred *); extern struct cred *prepare_kernel_cred(struct task_struct *); -extern int change_create_files_as(struct cred *, struct inode *); extern int set_security_override(struct cred *, u32); extern int set_security_override_from_ctx(struct cred *, const char *); extern int set_create_files_as(struct cred *, struct inode *); @@ -172,46 +165,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); -/* - * check for validity of credentials - */ -#ifdef CONFIG_DEBUG_CREDENTIALS -extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); -extern void __validate_process_creds(struct task_struct *, - const char *, unsigned); - -extern bool creds_are_invalid(const struct cred *cred); - -static inline void __validate_creds(const struct cred *cred, - const char *file, unsigned line) -{ - if (unlikely(creds_are_invalid(cred))) - __invalid_creds(cred, file, line); -} - -#define validate_creds(cred) \ -do { \ - __validate_creds((cred), __FILE__, __LINE__); \ -} while(0) - -#define validate_process_creds() \ -do { \ - __validate_process_creds(current, __FILE__, __LINE__); \ -} while(0) - -extern void validate_creds_for_do_exit(struct task_struct *); -#else -static inline void validate_creds(const struct cred *cred) -{ -} -static inline void validate_creds_for_do_exit(struct task_struct *tsk) -{ -} -static inline void validate_process_creds(void) -{ -} -#endif - static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, @@ -220,6 +173,20 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) } /** + * get_new_cred_many - Get references on a new set of credentials + * @cred: The new credentials to reference + * @nr: Number of references to acquire + * + * Get references on the specified set of new credentials. The caller must + * release all acquired references. + */ +static inline struct cred *get_new_cred_many(struct cred *cred, int nr) +{ + atomic_long_add(nr, &cred->usage); + return cred; +} + +/** * get_new_cred - Get a reference on a new set of credentials * @cred: The new credentials to reference * @@ -228,16 +195,16 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred_many(cred, 1); } /** - * get_cred - Get a reference on a set of credentials + * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference + * @nr: Number of references to acquire * - * Get a reference on the specified set of credentials. The caller must - * release the reference. If %NULL is passed, it is returned with no action. + * Get references on the specified set of credentials. The caller must release + * all acquired reference. If %NULL is passed, it is returned with no action. * * This is used to deal with a committed set of credentials. Although the * pointer is const, this will temporarily discard the const and increment the @@ -245,14 +212,27 @@ static inline struct cred *get_new_cred(struct cred *cred) * accidental alteration of a set of credentials that should be considered * immutable. */ -static inline const struct cred *get_cred(const struct cred *cred) +static inline const struct cred *get_cred_many(const struct cred *cred, int nr) { struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; - validate_creds(cred); nonconst_cred->non_rcu = 0; - return get_new_cred(nonconst_cred); + return get_new_cred_many(nonconst_cred, nr); +} + +/* + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. If %NULL is passed, it is returned with no action. + * + * This is used to deal with a committed set of credentials. + */ +static inline const struct cred *get_cred(const struct cred *cred) +{ + return get_cred_many(cred, 1); } static inline const struct cred *get_cred_rcu(const struct cred *cred) @@ -260,9 +240,8 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; - if (!atomic_inc_not_zero(&nonconst_cred->usage)) + if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; - validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } @@ -270,6 +249,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) /** * put_cred - Release a reference to a set of credentials * @cred: The credentials to release + * @nr: Number of references to release * * Release a reference to a set of credentials, deleting them when the last ref * is released. If %NULL is passed, nothing is done. @@ -278,17 +258,28 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) * on task_struct are attached by const pointers to prevent accidental * alteration of otherwise immutable credential sets. */ -static inline void put_cred(const struct cred *_cred) +static inline void put_cred_many(const struct cred *_cred, int nr) { struct cred *cred = (struct cred *) _cred; if (cred) { - validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) + if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } } +/* + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. If %NULL is passed, nothing is done. + */ +static inline void put_cred(const struct cred *cred) +{ + put_cred_many(cred, 1); +} + /** * current_cred - Access the current task's subjective credentials * diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 31f6fee0c36c..b164da5e129e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -24,6 +24,7 @@ #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 +#define CRYPTO_ALG_TYPE_LSKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_AKCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_SIG 0x00000007 @@ -35,8 +36,6 @@ #define CRYPTO_ALG_TYPE_SHASH 0x0000000e #define CRYPTO_ALG_TYPE_AHASH 0x0000000f -#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e -#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 @@ -111,7 +110,6 @@ * crypto_aead_walksize() (with the remainder going at the end), no chunk * can cross a page boundary or a scatterlist element boundary. * ahash: - * - The result buffer must be aligned to the algorithm's alignmask. * - crypto_ahash_finup() must not be used unless the algorithm implements * ->finup() natively. */ @@ -279,18 +277,20 @@ struct compress_alg { * @cra_ctxsize: Size of the operational context of the transformation. This * value informs the kernel crypto API about the memory size * needed to be allocated for the transformation context. - * @cra_alignmask: Alignment mask for the input and output data buffer. The data - * buffer containing the input data for the algorithm must be - * aligned to this alignment mask. The data buffer for the - * output data must be aligned to this alignment mask. Note that - * the Crypto API will do the re-alignment in software, but - * only under special conditions and there is a performance hit. - * The re-alignment happens at these occasions for different - * @cra_u types: cipher -- For both input data and output data - * buffer; ahash -- For output hash destination buf; shash -- - * For output hash destination buf. - * This is needed on hardware which is flawed by design and - * cannot pick data from arbitrary addresses. + * @cra_alignmask: For cipher, skcipher, lskcipher, and aead algorithms this is + * 1 less than the alignment, in bytes, that the algorithm + * implementation requires for input and output buffers. When + * the crypto API is invoked with buffers that are not aligned + * to this alignment, the crypto API automatically utilizes + * appropriately aligned temporary buffers to comply with what + * the algorithm needs. (For scatterlists this happens only if + * the algorithm uses the skcipher_walk helper functions.) This + * misalignment handling carries a performance penalty, so it is + * preferred that algorithms do not set a nonzero alignmask. + * Also, crypto API users may wish to allocate buffers aligned + * to the alignmask of the algorithm being used, in order to + * avoid the API having to realign them. Note: the alignmask is + * not supported for hash algorithms and is always 0 for them. * @cra_priority: Priority of this transformation implementation. In case * multiple transformations with same @cra_name are available to * the Crypto API, the kernel will use the one with highest diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h new file mode 100644 index 000000000000..03fa6d50d46f --- /dev/null +++ b/include/linux/cxl-event.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Intel Corporation. */ +#ifndef _LINUX_CXL_EVENT_H +#define _LINUX_CXL_EVENT_H + +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_generic { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + +union cxl_event { + struct cxl_event_generic generic; + struct cxl_event_gen_media gen_media; + struct cxl_event_dram dram; + struct cxl_event_mem_module mem_module; +} __packed; + +/* + * Common Event Record Format; in event logs + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_raw { + uuid_t id; + union cxl_event event; +} __packed; + +enum cxl_event_type { + CXL_CPER_EVENT_GENERIC, + CXL_CPER_EVENT_GEN_MEDIA, + CXL_CPER_EVENT_DRAM, + CXL_CPER_EVENT_MEM_MODULE, +}; + +#define CPER_CXL_DEVICE_ID_VALID BIT(0) +#define CPER_CXL_DEVICE_SN_VALID BIT(1) +#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2) +struct cxl_cper_event_rec { + struct { + u32 length; + u64 validation_bits; + struct cper_cxl_event_devid { + u16 vendor_id; + u16 device_id; + u8 func_num; + u8 device_num; + u8 bus_num; + u16 segment_num; + u16 slot_num; /* bits 2:0 reserved */ + u8 reserved; + } __packed device_id; + struct cper_cxl_event_sn { + u32 lower_dw; + u32 upper_dw; + } __packed dev_serial_num; + } __packed hdr; + + union cxl_event event; +} __packed; + +#endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/damon.h b/include/linux/damon.h index d5d4d19928e0..886d07294f4e 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -2,7 +2,7 @@ /* * DAMON api * - * Author: SeongJae Park <sjpark@amazon.de> + * Author: SeongJae Park <sj@kernel.org> */ #ifndef _DAMON_H_ @@ -40,9 +40,24 @@ struct damon_addr_range { * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. + * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for + * each sampling interval. * @list: List head for siblings. * @age: Age of this region. * + * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be + * increased for every &damon_attrs->sample_interval if an access to the region + * during the last sampling interval is found. The update of this field should + * not be done with direct access but with the helper function, + * damon_update_region_access_rate(). + * + * @nr_accesses_bp is another representation of @nr_accesses in basis point + * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a + * manner similar to moving sum. By the algorithm, this value becomes + * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can + * be used when the aggregation interval is too huge and therefore cannot wait + * for it before getting the access monitoring results. + * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two * regions are merged into a new region, both @nr_accesses and @age of the new @@ -52,6 +67,7 @@ struct damon_region { struct damon_addr_range ar; unsigned long sampling_addr; unsigned int nr_accesses; + unsigned int nr_accesses_bp; struct list_head list; unsigned int age; @@ -111,10 +127,55 @@ enum damos_action { }; /** + * enum damos_quota_goal_metric - Represents the metric to be used as the goal + * + * @DAMOS_QUOTA_USER_INPUT: User-input value. + * @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us. + * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics. + * + * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported. + */ +enum damos_quota_goal_metric { + DAMOS_QUOTA_USER_INPUT, + DAMOS_QUOTA_SOME_MEM_PSI_US, + NR_DAMOS_QUOTA_GOAL_METRICS, +}; + +/** + * struct damos_quota_goal - DAMOS scheme quota auto-tuning goal. + * @metric: Metric to be used for representing the goal. + * @target_value: Target value of @metric to achieve with the tuning. + * @current_value: Current value of @metric. + * @last_psi_total: Last measured total PSI + * @list: List head for siblings. + * + * Data structure for getting the current score of the quota tuning goal. The + * score is calculated by how close @current_value and @target_value are. Then + * the score is entered to DAMON's internal feedback loop mechanism to get the + * auto-tuned quota. + * + * If @metric is DAMOS_QUOTA_USER_INPUT, @current_value should be manually + * entered by the user, probably inside the kdamond callbacks. Otherwise, + * DAMON sets @current_value with self-measured value of @metric. + */ +struct damos_quota_goal { + enum damos_quota_goal_metric metric; + unsigned long target_value; + unsigned long current_value; + /* metric-dependent fields */ + union { + u64 last_psi_total; + }; + struct list_head list; +}; + +/** * struct damos_quota - Controls the aggressiveness of the given scheme. + * @reset_interval: Charge reset interval in milliseconds. * @ms: Maximum milliseconds that the scheme can use. * @sz: Maximum bytes of memory that the action can be applied. - * @reset_interval: Charge reset interval in milliseconds. + * @goals: Head of quota tuning goals (&damos_quota_goal) list. + * @esz: Effective size quota in bytes. * * @weight_sz: Weight of the region's size for prioritization. * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. @@ -132,6 +193,13 @@ enum damos_action { * throughput of the scheme's action. DAMON then compares it against &sz and * uses smaller one as the effective quota. * + * If @goals is not empt, DAMON calculates yet another size quota based on the + * goals using its internal feedback loop algorithm, for every @reset_interval. + * Then, if the new size quota is smaller than the effective quota, it uses the + * new size quota as the effective quota. + * + * The resulting effective size quota in bytes is set to @esz. + * * For selecting regions within the quota, DAMON prioritizes current scheme's * target memory regions using the &struct damon_operations->get_scheme_score. * You could customize the prioritization logic by setting &weight_sz, @@ -139,9 +207,11 @@ enum damos_action { * encouraged to respect those. */ struct damos_quota { + unsigned long reset_interval; unsigned long ms; unsigned long sz; - unsigned long reset_interval; + struct list_head goals; + unsigned long esz; unsigned int weight_sz; unsigned int weight_nr_accesses; @@ -152,8 +222,6 @@ struct damos_quota { unsigned long total_charged_sz; unsigned long total_charged_ns; - unsigned long esz; /* Effective size quota in bytes */ - /* For charging the quota */ unsigned long charged_sz; unsigned long charged_from; @@ -163,6 +231,9 @@ struct damos_quota { /* For prioritization */ unsigned long histogram[DAMOS_MAX_SCORE + 1]; unsigned int min_score; + + /* For feedback loop */ + unsigned long esz_bp; }; /** @@ -226,16 +297,26 @@ struct damos_stat { * enum damos_filter_type - Type of memory for &struct damos_filter * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. + * @DAMOS_FILTER_TYPE_ADDR: Address range. + * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target. * @NR_DAMOS_FILTER_TYPES: Number of filter types. * - * The support of each filter type is up to running &struct damon_operations. - * &enum DAMON_OPS_PADDR is supporting all filter types, while - * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR are not supporting any - * filter types. + * The anon pages type and memcg type filters are handled by underlying + * &struct damon_operations as a part of scheme action trying, and therefore + * accounted as 'tried'. In contrast, other types are handled by core layer + * before trying of the action and therefore not accounted as 'tried'. + * + * The support of the filters that handled by &struct damon_operations depend + * on the running &struct damon_operations. + * &enum DAMON_OPS_PADDR supports both anon pages type and memcg type filters, + * while &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR don't support any of + * the two types. */ enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, DAMOS_FILTER_TYPE_MEMCG, + DAMOS_FILTER_TYPE_ADDR, + DAMOS_FILTER_TYPE_TARGET, NR_DAMOS_FILTER_TYPES, }; @@ -244,18 +325,24 @@ enum damos_filter_type { * @type: Type of the page. * @matching: If the matching page should filtered out or in. * @memcg_id: Memcg id of the question if @type is DAMOS_FILTER_MEMCG. + * @addr_range: Address range if @type is DAMOS_FILTER_TYPE_ADDR. + * @target_idx: Index of the &struct damon_target of + * &damon_ctx->adaptive_targets if @type is + * DAMOS_FILTER_TYPE_TARGET. * @list: List head for siblings. * * Before applying the &damos->action to a memory region, DAMOS checks if each * page of the region matches to this and avoid applying the action if so. - * Note that the check support is up to &struct damon_operations - * implementation. + * Support of each filter type depends on the running &struct damon_operations + * and the type. Refer to &enum damos_filter_type for more detai. */ struct damos_filter { enum damos_filter_type type; bool matching; union { unsigned short memcg_id; + struct damon_addr_range addr_range; + int target_idx; }; struct list_head list; }; @@ -282,24 +369,24 @@ struct damos_access_pattern { * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @pattern: Access pattern of target regions. * @action: &damo_action to be applied to the target regions. + * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. * @filters: Additional set of &struct damos_filter for &action. * @stat: Statistics of this scheme. * @list: List head for siblings. * - * For each aggregation interval, DAMON finds regions which fit in the + * For each @apply_interval_us, DAMON finds regions which fit in the * &pattern and applies &action to those. To avoid consuming too much * CPU time or IO resources for the &action, "a is used. * + * If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead. + * * To do the work only when needed, schemes can be activated for specific * system situations using &wmarks. If all schemes that registered to the * monitoring context are inactive, DAMON stops monitoring either, and just * repeatedly checks the watermarks. * - * If all schemes that registered to a &struct damon_ctx are inactive, DAMON - * stops monitoring and just repeatedly checks the watermarks. - * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect * &filters @@ -311,6 +398,14 @@ struct damos_access_pattern { struct damos { struct damos_access_pattern pattern; enum damos_action action; + unsigned long apply_interval_us; +/* private: internal use only */ + /* + * number of sample intervals that should be passed before applying + * @action + */ + unsigned long next_apply_sis; +/* public: */ struct damos_quota quota; struct damos_watermarks wmarks; struct list_head filters; @@ -456,13 +551,14 @@ struct damon_callback { * regions. * * For each @sample_interval, DAMON checks whether each region is accessed or - * not. It aggregates and keeps the access information (number of accesses to - * each region) for @aggr_interval time. DAMON also checks whether the target - * memory regions need update (e.g., by ``mmap()`` calls from the application, - * in case of virtual memory monitoring) and applies the changes for each - * @ops_update_interval. All time intervals are in micro-seconds. - * Please refer to &struct damon_operations and &struct damon_callback for more - * detail. + * not during the last @sample_interval. If such access is found, DAMON + * aggregates the information by increasing &damon_region->nr_accesses for + * @aggr_interval time. For each @aggr_interval, the count is reset. DAMON + * also checks whether the target memory regions need update (e.g., by + * ``mmap()`` calls from the application, in case of virtual memory monitoring) + * and applies the changes for each @ops_update_interval. All time intervals + * are in micro-seconds. Please refer to &struct damon_operations and &struct + * damon_callback for more detail. */ struct damon_attrs { unsigned long sample_interval; @@ -506,8 +602,20 @@ struct damon_ctx { struct damon_attrs attrs; /* private: internal use only */ - struct timespec64 last_aggregation; - struct timespec64 last_ops_update; + /* number of sample intervals that passed since this context started */ + unsigned long passed_sample_intervals; + /* + * number of sample intervals that should be passed before next + * aggregation + */ + unsigned long next_aggregation_sis; + /* + * number of sample intervals that should be passed before next ops + * update + */ + unsigned long next_ops_update_sis; + /* for waiting until the execution of the kdamond_fn is started */ + struct completion kdamond_started; /* public: */ struct task_struct *kdamond; @@ -567,6 +675,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damon_for_each_scheme_safe(s, next, ctx) \ list_for_each_entry_safe(s, next, &(ctx)->schemes, list) +#define damos_for_each_quota_goal(goal, quota) \ + list_for_each_entry(goal, "a->goals, list) + +#define damos_for_each_quota_goal_safe(goal, next, quota) \ + list_for_each_entry_safe(goal, next, &(quota)->goals, list) + #define damos_for_each_filter(f, scheme) \ list_for_each_entry(f, &(scheme)->filters, list) @@ -592,14 +706,24 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs); struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching); void damos_add_filter(struct damos *s, struct damos_filter *f); void damos_destroy_filter(struct damos_filter *f); +struct damos_quota_goal *damos_new_quota_goal( + enum damos_quota_goal_metric metric, + unsigned long target_value); +void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g); +void damos_destroy_quota_goal(struct damos_quota_goal *goal); + struct damos *damon_new_scheme(struct damos_access_pattern *pattern, - enum damos_action action, struct damos_quota *quota, + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, struct damos_watermarks *wmarks); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); @@ -626,6 +750,13 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx) return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR; } +static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs) +{ + /* {aggr,sample}_interval are unsigned long, hence could overflow */ + return min(attrs->aggr_interval / attrs->sample_interval, + (unsigned long)UINT_MAX); +} + int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); diff --git a/include/linux/dax.h b/include/linux/dax.h index 261944ec0887..9d3e3327af4c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -63,6 +63,8 @@ void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); bool dax_synchronous(struct dax_device *dax_dev); +void set_dax_nocache(struct dax_device *dax_dev); +void set_dax_nomc(struct dax_device *dax_dev); void set_dax_synchronous(struct dax_device *dax_dev); size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); @@ -86,11 +88,7 @@ static inline void *dax_holder(struct dax_device *dax_dev) static inline struct dax_device *alloc_dax(void *private, const struct dax_operations *ops) { - /* - * Callers should check IS_ENABLED(CONFIG_DAX) to know if this - * NULL is an error or expected. - */ - return NULL; + return ERR_PTR(-EOPNOTSUPP); } static inline void put_dax(struct dax_device *dax_dev) { @@ -109,6 +107,12 @@ static inline bool dax_synchronous(struct dax_device *dax_dev) { return true; } +static inline void set_dax_nocache(struct dax_device *dax_dev) +{ +} +static inline void set_dax_nomc(struct dax_device *dax_dev) +{ +} static inline void set_dax_synchronous(struct dax_device *dax_dev) { } @@ -124,9 +128,6 @@ static inline size_t dax_recovery_write(struct dax_device *dax_dev, } #endif -void set_dax_nocache(struct dax_device *dax_dev); -void set_dax_nomc(struct dax_device *dax_dev); - struct writeback_control; #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); @@ -159,8 +160,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); -dax_entry_t dax_lock_page(struct page *page); -void dax_unlock_page(struct page *page, dax_entry_t cookie); +dax_entry_t dax_lock_folio(struct folio *folio); +void dax_unlock_folio(struct folio *folio, dax_entry_t cookie); dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, unsigned long index, struct page **page); void dax_unlock_mapping_entry(struct address_space *mapping, @@ -182,14 +183,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, return -EOPNOTSUPP; } -static inline dax_entry_t dax_lock_page(struct page *page) +static inline dax_entry_t dax_lock_folio(struct folio *folio) { - if (IS_DAX(page->mapping->host)) + if (IS_DAX(folio->mapping->host)) return ~0UL; return 0; } -static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) +static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie) { } @@ -241,10 +242,10 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, +vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, pfn_t *pfnp, int *errp, const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, - enum page_entry_size pe_size, pfn_t pfn); + unsigned int order, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6b351e009f59..bf53e3894aae 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -68,12 +68,12 @@ extern const struct qstr dotdot_name; * large memory footprint increase). */ #ifdef CONFIG_64BIT -# define DNAME_INLINE_LEN 32 /* 192 bytes */ +# define DNAME_INLINE_LEN 40 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 36 /* 128 bytes */ -# else # define DNAME_INLINE_LEN 40 /* 128 bytes */ +# else +# define DNAME_INLINE_LEN 44 /* 128 bytes */ # endif #endif @@ -101,8 +101,8 @@ struct dentry { struct list_head d_lru; /* LRU list */ wait_queue_head_t *d_wait; /* in-lookup ones only */ }; - struct list_head d_child; /* child of parent list */ - struct list_head d_subdirs; /* our children */ + struct hlist_node d_sib; /* child of parent list */ + struct hlist_head d_children; /* our children */ /* * d_alias and d_rcu can share memory */ @@ -111,7 +111,7 @@ struct dentry { struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; -} __randomize_layout; +}; /* * dentry->d_lock spinlock nesting subclasses: @@ -125,6 +125,11 @@ enum dentry_d_lock_class DENTRY_D_LOCK_NESTED }; +enum d_real_type { + D_REAL_DATA, + D_REAL_METADATA, +}; + struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); @@ -139,7 +144,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *); + struct dentry *(*d_real)(struct dentry *, enum d_real_type type); } ____cacheline_aligned; /* @@ -151,13 +156,13 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH 0x00000001 -#define DCACHE_OP_COMPARE 0x00000002 -#define DCACHE_OP_REVALIDATE 0x00000004 -#define DCACHE_OP_DELETE 0x00000008 -#define DCACHE_OP_PRUNE 0x00000010 +#define DCACHE_OP_HASH BIT(0) +#define DCACHE_OP_COMPARE BIT(1) +#define DCACHE_OP_REVALIDATE BIT(2) +#define DCACHE_OP_DELETE BIT(3) +#define DCACHE_OP_PRUNE BIT(4) -#define DCACHE_DISCONNECTED 0x00000020 +#define DCACHE_DISCONNECTED BIT(5) /* This dentry is possibly not currently connected to the dcache tree, in * which case its parent will either be itself, or will have this flag as * well. nfsd will not use a dentry with this bit set, but will first @@ -168,50 +173,47 @@ struct dentry_operations { * dentry into place and return that dentry rather than the passed one, * typically using d_splice_alias. */ -#define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */ +#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ -#define DCACHE_DONTCACHE 0x00000080 /* Purge from memory on final dput() */ +#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ -#define DCACHE_CANT_MOUNT 0x00000100 -#define DCACHE_GENOCIDE 0x00000200 -#define DCACHE_SHRINK_LIST 0x00000400 +#define DCACHE_CANT_MOUNT BIT(8) +#define DCACHE_GENOCIDE BIT(9) +#define DCACHE_SHRINK_LIST BIT(10) -#define DCACHE_OP_WEAK_REVALIDATE 0x00000800 +#define DCACHE_OP_WEAK_REVALIDATE BIT(11) -#define DCACHE_NFSFS_RENAMED 0x00001000 +#define DCACHE_NFSFS_RENAMED BIT(12) /* this dentry has been "silly renamed" and has to be deleted on the last * dput() */ -#define DCACHE_COOKIE 0x00002000 /* For use by dcookie subsystem */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x00004000 +#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(14) /* Parent inode is watched by some fsnotify listener */ -#define DCACHE_DENTRY_KILLED 0x00008000 +#define DCACHE_DENTRY_KILLED BIT(15) -#define DCACHE_MOUNTED 0x00010000 /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT 0x00020000 /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT 0x00040000 /* manage transit from this dirent */ +#define DCACHE_MOUNTED BIT(16) /* is a mountpoint */ +#define DCACHE_NEED_AUTOMOUNT BIT(17) /* handle automount on this dir */ +#define DCACHE_MANAGE_TRANSIT BIT(18) /* manage transit from this dirent */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST 0x00080000 +#define DCACHE_LRU_LIST BIT(19) -#define DCACHE_ENTRY_TYPE 0x00700000 -#define DCACHE_MISS_TYPE 0x00000000 /* Negative dentry (maybe fallthru to nowhere) */ -#define DCACHE_WHITEOUT_TYPE 0x00100000 /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE 0x00200000 /* Normal directory */ -#define DCACHE_AUTODIR_TYPE 0x00300000 /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE 0x00400000 /* Regular file type (or fallthru to such) */ -#define DCACHE_SPECIAL_TYPE 0x00500000 /* Other file type (or fallthru to such) */ -#define DCACHE_SYMLINK_TYPE 0x00600000 /* Symlink (or fallthru to such) */ +#define DCACHE_ENTRY_TYPE (7 << 20) /* bits 20..22 are for storing type: */ +#define DCACHE_MISS_TYPE (0 << 20) /* Negative dentry */ +#define DCACHE_WHITEOUT_TYPE (1 << 20) /* Whiteout dentry (stop pathwalk) */ +#define DCACHE_DIRECTORY_TYPE (2 << 20) /* Normal directory */ +#define DCACHE_AUTODIR_TYPE (3 << 20) /* Lookupless directory (presumed automount) */ +#define DCACHE_REGULAR_TYPE (4 << 20) /* Regular file type */ +#define DCACHE_SPECIAL_TYPE (5 << 20) /* Other file type */ +#define DCACHE_SYMLINK_TYPE (6 << 20) /* Symlink */ -#define DCACHE_MAY_FREE 0x00800000 -#define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ -#define DCACHE_NOKEY_NAME 0x02000000 /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL 0x04000000 +#define DCACHE_NOKEY_NAME BIT(25) /* Encrypted name encoded without key */ +#define DCACHE_OP_REAL BIT(26) -#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR 0x20000000 -#define DCACHE_NORCU 0x40000000 /* No RCU delay for freeing */ +#define DCACHE_PAR_LOOKUP BIT(28) /* being looked up (with parent locked shared) */ +#define DCACHE_DENTRY_CURSOR BIT(29) +#define DCACHE_NORCU BIT(30) /* No RCU delay for freeing */ extern seqlock_t rename_lock; @@ -220,8 +222,6 @@ extern seqlock_t rename_lock; */ extern void d_instantiate(struct dentry *, struct inode *); extern void d_instantiate_new(struct dentry *, struct inode *); -extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); -extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); @@ -242,15 +242,12 @@ extern struct dentry * d_obtain_alias(struct inode *); extern struct dentry * d_obtain_root(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); -extern void shrink_dcache_for_umount(struct super_block *); extern void d_invalidate(struct dentry *); /* only used at mount-time */ extern struct dentry * d_make_root(struct inode *); -/* <clickety>-<click> the ramfs-type tree */ -extern void d_genocide(struct dentry *); - +extern void d_mark_tmpfile(struct file *, struct inode *); extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); @@ -273,12 +270,8 @@ extern void d_move(struct dentry *, struct dentry *); extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); -/* appendix may either be NULL or be used for transname suffixes */ extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); -extern struct dentry *__d_lookup_rcu(const struct dentry *parent, - const struct qstr *name, unsigned *seq); static inline unsigned d_count(const struct dentry *dentry) { @@ -300,20 +293,40 @@ extern char *dentry_path(const struct dentry *, char *, int); /* Allocation counts.. */ /** - * dget, dget_dlock - get a reference to a dentry - * @dentry: dentry to get a reference to + * dget_dlock - get a reference to a dentry + * @dentry: dentry to get a reference to * - * Given a dentry or %NULL pointer increment the reference count - * if appropriate and return the dentry. A dentry will not be - * destroyed when it has references. + * Given a live dentry, increment the reference count and return the dentry. + * Caller must hold @dentry->d_lock. Making sure that dentry is alive is + * caller's resonsibility. There are many conditions sufficient to guarantee + * that; e.g. anything with non-negative refcount is alive, so's anything + * hashed, anything positive, anyone's parent, etc. */ static inline struct dentry *dget_dlock(struct dentry *dentry) { - if (dentry) - dentry->d_lockref.count++; + dentry->d_lockref.count++; return dentry; } + +/** + * dget - get a reference to a dentry + * @dentry: dentry to get a reference to + * + * Given a dentry or %NULL pointer increment the reference count + * if appropriate and return the dentry. A dentry will not be + * destroyed when it has references. Conversely, a dentry with + * no references can disappear for any number of reasons, starting + * with memory pressure. In other words, that primitive is + * used to clone an existing reference; using it on something with + * zero refcount is a bug. + * + * NOTE: it will spin if @dentry->d_lock is held. From the deadlock + * avoidance point of view it is equivalent to spin_lock()/increment + * refcount/spin_unlock(), so calling it under @dentry->d_lock is + * always a bug; so's calling it under ->d_lock on any of its descendents. + * + */ static inline struct dentry *dget(struct dentry *dentry) { if (dentry) @@ -324,12 +337,11 @@ static inline struct dentry *dget(struct dentry *dentry) extern struct dentry *dget_parent(struct dentry *dentry); /** - * d_unhashed - is dentry hashed - * @dentry: entry to check + * d_unhashed - is dentry hashed + * @dentry: entry to check * - * Returns true if the dentry passed is not currently hashed. + * Returns true if the dentry passed is not currently hashed. */ - static inline int d_unhashed(const struct dentry *dentry) { return hlist_bl_unhashed(&dentry->d_hash); @@ -489,14 +501,6 @@ static inline int simple_positive(const struct dentry *dentry) return d_really_is_positive(dentry) && !d_unhashed(dentry); } -extern void d_set_fallthru(struct dentry *dentry); - -static inline bool d_is_fallthru(const struct dentry *dentry) -{ - return dentry->d_flags & DCACHE_FALLTHRU; -} - - extern int sysctl_vfs_cache_pressure; static inline unsigned long vfs_pressure_ratio(unsigned long val) @@ -546,41 +550,25 @@ static inline struct inode *d_backing_inode(const struct dentry *upper) } /** - * d_backing_dentry - Get upper or lower dentry we should be using - * @upper: The upper layer - * - * This is the helper that should be used to get the dentry of the inode that - * will be used if this dentry were opened as a file. It may be the upper - * dentry or it may be a lower dentry pinned by the upper. - * - * Normal filesystems should not use this to access their own dentries. - */ -static inline struct dentry *d_backing_dentry(struct dentry *upper) -{ - return upper; -} - -/** * d_real - Return the real dentry * @dentry: the dentry to query - * @inode: inode to select the dentry from multiple layers (can be NULL) + * @type: the type of real dentry (data or metadata) * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.rst */ -static inline struct dentry *d_real(struct dentry *dentry, - const struct inode *inode) +static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, inode); + return dentry->d_op->d_real(dentry, type); else return dentry; } /** - * d_real_inode - Return the real inode + * d_real_inode - Return the real inode hosting the data * @dentry: The dentry to query * * If dentry is on a union/overlay, then return the underlying, real inode. @@ -589,7 +577,7 @@ static inline struct dentry *d_real(struct dentry *dentry, static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ - return d_backing_inode(d_real((struct dentry *) dentry, NULL)); + return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA)); } struct name_snapshot { @@ -599,4 +587,14 @@ struct name_snapshot { void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); +static inline struct dentry *d_first_child(const struct dentry *dentry) +{ + return hlist_entry_safe(dentry->d_children.first, struct dentry, d_sib); +} + +static inline struct dentry *d_next_sibling(const struct dentry *dentry) +{ + return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib); +} + #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index ea2d919fd9c7..c9c65b132c0f 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -171,6 +171,25 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, size_t count, loff_t *ppos); +/** + * struct debugfs_cancellation - cancellation data + * @list: internal, for keeping track + * @cancel: callback to call + * @cancel_data: extra data for the callback to call + */ +struct debugfs_cancellation { + struct list_head list; + void (*cancel)(struct dentry *, void *); + void *cancel_data; +}; + +void __acquires(cancellation) +debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *cancellation); +void __releases(cancellation) +debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *cancellation); + #else #include <linux/err.h> diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 9192986b1a73..ac862422df15 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size) if (!malloc_ptr) malloc_ptr = free_mem_ptr; - malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */ p = (void *)malloc_ptr; malloc_ptr += size; diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index 8904063d4c9f..6bfe70decc9f 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -274,4 +274,6 @@ do { \ WARN_ONCE(condition, "%s %s: " format, \ dev_driver_string(dev), dev_name(dev), ## arg) +__printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); + #endif /* _DEVICE_PRINTK_H_ */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 69d0435c7ebb..82b2195efaca 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -165,6 +165,7 @@ void dm_error(const char *message); struct dm_dev { struct block_device *bdev; + struct file *bdev_file; struct dax_device *dax_dev; blk_mode_t mode; char name[16]; diff --git a/include/linux/device.h b/include/linux/device.h index bbaeabd04b0d..b9f5464f44ed 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -42,7 +42,6 @@ struct class; struct subsys_private; struct device_node; struct fwnode_handle; -struct iommu_ops; struct iommu_group; struct dev_pin_info; struct dev_iommu; @@ -63,7 +62,7 @@ struct msi_device_data; */ struct subsys_interface { const char *name; - struct bus_type *subsys; + const struct bus_type *subsys; struct list_head node; int (*add_dev)(struct device *dev, struct subsys_interface *sif); void (*remove_dev)(struct device *dev, struct subsys_interface *sif); @@ -72,9 +71,9 @@ struct subsys_interface { int subsys_interface_register(struct subsys_interface *sif); void subsys_interface_unregister(struct subsys_interface *sif); -int subsys_system_register(struct bus_type *subsys, +int subsys_system_register(const struct bus_type *subsys, const struct attribute_group **groups); -int subsys_virtual_register(struct bus_type *subsys, +int subsys_virtual_register(const struct bus_type *subsys, const struct attribute_group **groups); /* @@ -349,6 +348,7 @@ unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); void devm_free_pages(struct device *dev, unsigned long addr); +#ifdef CONFIG_HAS_IOMEM void __iomem *devm_ioremap_resource(struct device *dev, const struct resource *res); void __iomem *devm_ioremap_resource_wc(struct device *dev, @@ -357,14 +357,39 @@ void __iomem *devm_ioremap_resource_wc(struct device *dev, void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, resource_size_t *size); +#else + +static inline +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res) +{ + return ERR_PTR(-EINVAL); +} + +static inline +void __iomem *devm_ioremap_resource_wc(struct device *dev, + const struct resource *res) +{ + return ERR_PTR(-EINVAL); +} + +static inline +void __iomem *devm_of_iomap(struct device *dev, + struct device_node *node, int index, + resource_size_t *size) +{ + return ERR_PTR(-EINVAL); +} + +#endif /* allows to add/remove a custom action to devres stack */ void devm_remove_action(struct device *dev, void (*action)(void *), void *data); void devm_release_action(struct device *dev, void (*action)(void *), void *data); int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); -#define devm_add_action(release, action, data) \ - __devm_add_action(release, action, data, #action) +#define devm_add_action(dev, action, data) \ + __devm_add_action(dev, action, data, #action) static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), void *data, const char *name) @@ -377,8 +402,8 @@ static inline int __devm_add_action_or_reset(struct device *dev, void (*action)( return ret; } -#define devm_add_action_or_reset(release, action, data) \ - __devm_add_action_or_reset(release, action, data, #action) +#define devm_add_action_or_reset(dev, action, data) \ + __devm_add_action_or_reset(dev, action, data, #action) /** * devm_alloc_percpu - Resource-managed alloc_percpu @@ -625,7 +650,10 @@ struct device_physical_location { * @dma_pools: Dma pools (if dma'ble device). * @dma_mem: Internal for coherent mem override. * @cma_area: Contiguous memory area for dma allocations - * @dma_io_tlb_mem: Pointer to the swiotlb pool used. Not for driver use. + * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. + * @dma_io_tlb_pools: List of transient swiotlb memory pools. + * @dma_io_tlb_lock: Protects changes to the list of active pools. + * @dma_uses_io_tlb: %true if device has used the software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -633,7 +661,6 @@ struct device_physical_location { * @id: device instance * @devres_lock: Spinlock to protect the resource of the device. * @devres_head: The resources list of the device. - * @knode_class: The node used to add the device to the class list. * @class: The class of the device. * @groups: Optional attribute groups. * @release: Callback to free the device after all references have @@ -732,6 +759,11 @@ struct device { #ifdef CONFIG_SWIOTLB struct io_tlb_mem *dma_io_tlb_mem; #endif +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct list_head dma_io_tlb_pools; + spinlock_t dma_io_tlb_lock; + bool dma_uses_io_tlb; +#endif /* arch specific additions */ struct dev_archdata archdata; @@ -973,6 +1005,8 @@ static inline void device_unlock(struct device *dev) mutex_unlock(&dev->mutex); } +DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T)) + static inline void device_lock_assert(struct device *dev) { lockdep_assert_held(&dev->mutex); @@ -1037,7 +1071,6 @@ int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); -int device_is_dependent(struct device *dev, void *target); static inline bool device_supports_offline(struct device *dev) { @@ -1214,8 +1247,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); - -__printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +void device_link_wait_removal(void); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index ae10c4322754..5ef4ec1c36c3 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -62,9 +62,6 @@ struct fwnode_handle; * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. - * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU - * driver implementations to a bus and allow the driver to do - * bus-specific setup * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. * @@ -104,8 +101,6 @@ struct bus_type { const struct dev_pm_ops *pm; - const struct iommu_ops *iommu_ops; - bool need_parent_lock; }; @@ -232,7 +227,7 @@ bus_find_device_by_acpi_dev(const struct bus_type *bus, const void *adev) int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); -void bus_sort_breadthfirst(struct bus_type *bus, +void bus_sort_breadthfirst(const struct bus_type *bus, int (*compare)(const struct device *a, const struct device *b)); /* diff --git a/include/linux/device/class.h b/include/linux/device/class.h index abf3d3bfb6fe..c576b49c55c2 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -40,8 +40,6 @@ struct fwnode_handle; * for the devices belonging to the class. Usually tied to * device's namespace. * @pm: The default device power management operations of this class. - * @p: The private data of the driver core, no one other than the - * driver core can touch this. * * A class is a higher-level view of a device that abstracts out low-level * implementation details. Drivers may see a SCSI disk or an ATA disk, but, diff --git a/include/linux/dio.h b/include/linux/dio.h index 5abd07361eb5..2b5923909f96 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -68,7 +68,7 @@ struct dio_bus { }; extern struct dio_bus dio_bus; /* Single DIO bus */ -extern struct bus_type dio_bus_type; +extern const struct bus_type dio_bus_type; /* * DIO device IDs diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h index e6d76e8715a6..15fc856d198c 100644 --- a/include/linux/dlm_plock.h +++ b/include/linux/dlm_plock.h @@ -11,6 +11,8 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, int cmd, struct file_lock *fl); int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, struct file_lock *fl); +int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl); int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, struct file_lock *fl); #endif diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 75e7d8cbb532..d1503b815a78 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -64,6 +64,9 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start); void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp, unsigned short ioprio); + /* * Like dm_bufio_read, but return buffer from cache, don't read * it. If the buffer is not in the cache, return NULL. @@ -86,6 +89,10 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks); +void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c, + sector_t block, unsigned int n_blocks, + unsigned short ioprio); + /* * Release a reference obtained with dm_bufio_{read,get,new}. The data * pointer and dm_buffer pointer is no longer valid after this call. diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 7595142f3fc5..7b2968612b7e 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -80,7 +80,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * error occurred doing io to the corresponding region. */ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *region, unsigned int long *sync_error_bits); + struct dm_io_region *region, unsigned int long *sync_error_bits, + unsigned short ioprio); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h index 552b817ab102..3ac6dbaeaa37 100644 --- a/include/linux/dm-verity-loadpin.h +++ b/include/linux/dm-verity-loadpin.h @@ -12,7 +12,7 @@ extern struct list_head dm_verity_loadpin_trusted_root_digests; struct dm_verity_loadpin_trusted_root_digest { struct list_head node; unsigned int len; - u8 data[]; + u8 data[] __counted_by(len); }; #if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY) diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 3f31baa3293f..8ff4add71f88 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -343,16 +343,19 @@ struct dma_buf { /** * @exp_name: * - * Name of the exporter; useful for debugging. See the - * DMA_BUF_SET_NAME IOCTL. + * Name of the exporter; useful for debugging. Must not be NULL */ const char *exp_name; /** * @name: * - * Userspace-provided name; useful for accounting and debugging, - * protected by dma_resv_lock() on @resv and @name_lock for read access. + * Userspace-provided name. Default value is NULL. If not NULL, + * length cannot be longer than DMA_BUF_NAME_LEN, including NIL + * char. Useful for accounting and debugging. Read/Write accesses + * are protected by @name_lock + * + * See the IOCTLs DMA_BUF_SET_NAME or DMA_BUF_SET_NAME_A/B */ const char *name; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 18aade195884..3eb3589ff43e 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -21,7 +21,6 @@ struct bus_dma_region { phys_addr_t cpu_start; dma_addr_t dma_start; u64 size; - u64 offset; }; static inline dma_addr_t translate_phys_to_dma(struct device *dev, @@ -29,9 +28,12 @@ static inline dma_addr_t translate_phys_to_dma(struct device *dev, { const struct bus_dma_region *m; - for (m = dev->dma_range_map; m->size; m++) - if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size) - return (dma_addr_t)paddr - m->offset; + for (m = dev->dma_range_map; m->size; m++) { + u64 offset = paddr - m->cpu_start; + + if (paddr >= m->cpu_start && offset < m->size) + return m->dma_start + offset; + } /* make sure dma_capable fails when no translation is available */ return DMA_MAPPING_ERROR; @@ -42,9 +44,12 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev, { const struct bus_dma_region *m; - for (m = dev->dma_range_map; m->size; m++) - if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size) - return (phys_addr_t)dma_addr + m->offset; + for (m = dev->dma_range_map; m->size; m++) { + u64 offset = dma_addr - m->dma_start; + + if (dma_addr >= m->dma_start && offset < m->size) + return m->cpu_start + offset; + } return (phys_addr_t)-1; } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 0d678e9a7b24..e06bad467f55 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/printk.h> #include <linux/rcupdate.h> +#include <linux/timekeeping.h> struct dma_fence; struct dma_fence_ops; @@ -499,6 +500,21 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, } /** + * dma_fence_is_later_or_same - return true if f1 is later or same as f2 + * @f1: the first fence from the same context + * @f2: the second fence from the same context + * + * Returns true if f1 is chronologically later than f2 or the same fence. Both + * fences must be from the same context, since a seqno is not re-used across + * contexts. + */ +static inline bool dma_fence_is_later_or_same(struct dma_fence *f1, + struct dma_fence *f2) +{ + return f1 == f2 || dma_fence_is_later(f1, f2); +} + +/** * dma_fence_later - return the chronologically later fence * @f1: the first fence from the same context * @f2: the second fence from the same context @@ -568,6 +584,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence, fence->error = error; } +/** + * dma_fence_timestamp - helper to get the completion timestamp of a fence + * @fence: fence to get the timestamp from. + * + * After a fence is signaled the timestamp is updated with the signaling time, + * but setting the timestamp can race with tasks waiting for the signaling. This + * helper busy waits for the correct timestamp to appear. + */ +static inline ktime_t dma_fence_timestamp(struct dma_fence *fence) +{ + if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return ktime_get(); + + while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + + return fence->timestamp; +} + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 9bf19b5bf755..4abc60f04209 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -11,6 +11,7 @@ #include <linux/slab.h> struct cma; +struct iommu_ops; /* * Values for struct dma_map_ops.flags: @@ -169,12 +170,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, } #endif /* CONFIG_DMA_CMA*/ -#ifdef CONFIG_DMA_PERNUMA_CMA -void dma_pernuma_cma_reserve(void); -#else -static inline void dma_pernuma_cma_reserve(void) { } -#endif /* CONFIG_DMA_PERNUMA_CMA */ - #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); @@ -343,6 +338,12 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK +void arch_dma_set_mask(struct device *dev, u64 mask); +#else +#define arch_dma_set_mask(dev, mask) do { } while (0) +#endif + #ifdef CONFIG_MMU /* * Page protection so that devices that can't snoop CPU caches can use the @@ -426,10 +427,10 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent); + bool coherent); #else static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, const struct iommu_ops *iommu, bool coherent) + u64 size, bool coherent) { } #endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ @@ -443,10 +444,10 @@ static inline void arch_teardown_dma_ops(struct device *dev) #endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ #ifdef CONFIG_DMA_API_DEBUG -void dma_debug_add_bus(struct bus_type *bus); +void dma_debug_add_bus(const struct bus_type *bus); void debug_dma_dump_mappings(struct device *dev); #else -static inline void dma_debug_add_bus(struct bus_type *bus) +static inline void dma_debug_add_bus(const struct bus_type *bus) { } static inline void debug_dma_dump_mappings(struct device *dev) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e13050eb9777..4a658de44ee9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -144,6 +144,7 @@ bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); +bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); @@ -264,6 +265,10 @@ static inline u64 dma_get_required_mask(struct device *dev) { return 0; } +static inline bool dma_addressing_limited(struct device *dev) +{ + return false; +} static inline size_t dma_max_mapping_size(struct device *dev) { return 0; @@ -418,6 +423,8 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); + static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { @@ -463,20 +470,6 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) return dma_set_mask_and_coherent(dev, mask); } -/** - * dma_addressing_limited - return if the device is addressing limited - * @dev: device to check - * - * Return %true if the devices DMA mask is too small to address all memory in - * the system, else %false. Lack of addressing bits is the prime reason for - * bounce buffering, but might not be the only one. - */ -static inline bool dma_addressing_limited(struct device *dev) -{ - return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev); -} - static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index e443be4d3b4b..1e491c5dcac2 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -26,6 +26,11 @@ struct k3_udma_glue_tx_channel; struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev, const char *name, struct k3_udma_glue_tx_channel_cfg *cfg); +struct k3_udma_glue_tx_channel * +k3_udma_glue_request_tx_chn_for_thread_id(struct device *dev, + struct k3_udma_glue_tx_channel_cfg *cfg, + struct device_node *udmax_np, u32 thread_id); + void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn); int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, struct cppi5_host_desc_t *desc_tx, @@ -109,6 +114,11 @@ struct k3_udma_glue_rx_channel *k3_udma_glue_request_rx_chn( const char *name, struct k3_udma_glue_rx_channel_cfg *cfg); +struct k3_udma_glue_rx_channel * +k3_udma_glue_request_remote_rx_chn_for_thread_id(struct device *dev, + struct k3_udma_glue_rx_channel_cfg *cfg, + struct device_node *udmax_np, u32 thread_id); + void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c3656e590213..752dbde4cec1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -517,8 +517,6 @@ static inline const char *dma_chan_name(struct dma_chan *chan) return dev_name(&chan->dev->device); } -void dma_chan_cleanup(struct kref *kref); - /** * typedef dma_filter_fn - callback filter for dma_request_channel * @chan: channel to be reviewed @@ -955,7 +953,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan, static inline bool is_slave_direction(enum dma_transfer_direction direction) { - return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM); + return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) || + (direction == DMA_DEV_TO_DEV); } static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 27dbd4c64860..e34b601b71fd 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -106,8 +106,6 @@ static inline bool dmar_rcu_check(void) extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); extern void dmar_register_bus_notifier(void); -extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct dmar_dev_scope **devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); extern void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt); extern int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index b1d26f9f1c9f..9f183a679277 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -30,7 +30,7 @@ struct dnotify_struct { FS_MOVED_FROM | FS_MOVED_TO) extern void dnotify_flush(struct file *, fl_owner_t); -extern int fcntl_dirnotify(int, struct file *, unsigned long); +extern int fcntl_dirnotify(int, struct file *, unsigned int); #else @@ -38,7 +38,7 @@ static inline void dnotify_flush(struct file *filp, fl_owner_t id) { } -static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) +static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) { return -EINVAL; } diff --git a/include/linux/dpll.h b/include/linux/dpll.h new file mode 100644 index 000000000000..d275736230b3 --- /dev/null +++ b/include/linux/dpll.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +#ifndef __DPLL_H__ +#define __DPLL_H__ + +#include <uapi/linux/dpll.h> +#include <linux/device.h> +#include <linux/netlink.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> + +struct dpll_device; +struct dpll_pin; + +struct dpll_device_ops { + int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_mode *mode, struct netlink_ext_ack *extack); + int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_lock_status *status, + enum dpll_lock_status_error *status_error, + struct netlink_ext_ack *extack); + int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, + s32 *temp, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_ops { + int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u64 frequency, + struct netlink_ext_ack *extack); + int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack); + int (*direction_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_pin_direction direction, + struct netlink_ext_ack *extack); + int (*direction_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack); + int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*prio_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 *prio, struct netlink_ext_ack *extack); + int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u32 prio, struct netlink_ext_ack *extack); + int (*phase_offset_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *phase_offset, + struct netlink_ext_ack *extack); + int (*phase_adjust_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 *phase_adjust, + struct netlink_ext_ack *extack); + int (*phase_adjust_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const s32 phase_adjust, + struct netlink_ext_ack *extack); + int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *ffo, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_frequency { + u64 min; + u64 max; +}; + +#define DPLL_PIN_FREQUENCY_RANGE(_min, _max) \ + { \ + .min = _min, \ + .max = _max, \ + } + +#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val) +#define DPLL_PIN_FREQUENCY_1PPS \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ) +#define DPLL_PIN_FREQUENCY_10MHZ \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ) +#define DPLL_PIN_FREQUENCY_IRIG_B \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ) +#define DPLL_PIN_FREQUENCY_DCF77 \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) + +struct dpll_pin_phase_adjust_range { + s32 min; + s32 max; +}; + +struct dpll_pin_properties { + const char *board_label; + const char *panel_label; + const char *package_label; + enum dpll_pin_type type; + unsigned long capabilities; + u32 freq_supported_num; + struct dpll_pin_frequency *freq_supported; + struct dpll_pin_phase_adjust_range phase_range; +}; + +#if IS_ENABLED(CONFIG_DPLL) +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void dpll_netdev_pin_clear(struct net_device *dev); + +size_t dpll_netdev_pin_handle_size(const struct net_device *dev); +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev); +#else +static inline void +dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) { } +static inline void dpll_netdev_pin_clear(struct net_device *dev) { } + +static inline size_t dpll_netdev_pin_handle_size(const struct net_device *dev) +{ + return 0; +} + +static inline int +dpll_netdev_add_pin_handle(struct sk_buff *msg, const struct net_device *dev) +{ + return 0; +} +#endif + +struct dpll_device * +dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); + +void dpll_device_put(struct dpll_device *dpll); + +int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, + const struct dpll_device_ops *ops, void *priv); + +void dpll_device_unregister(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv); + +struct dpll_pin * +dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module, + const struct dpll_pin_properties *prop); + +int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_put(struct dpll_pin *pin); + +int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +int dpll_device_change_ntf(struct dpll_device *dpll); + +int dpll_pin_change_ntf(struct dpll_pin *pin); + +#endif diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index c177322f793d..b9dd35d4b8f5 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -28,7 +28,7 @@ /* Source and Destination MAC of follow-up meta frames. * Whereas the choice of SMAC only affects the unique identification of the * switch as sender of meta frames, the DMAC must be an address that is present - * in the DSA master port's multicast MAC filter. + * in the DSA conduit port's multicast MAC filter. * 01-80-C2-00-00-0E is a good choice for this, as all profiles of IEEE 1588 * over L2 use this address for some purpose already. */ diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 061dd84d09f3..4fcbf4d4fd0a 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -37,10 +37,12 @@ struct _ddebug { #define _DPRINTK_FLAGS_INCL_FUNCNAME (1<<2) #define _DPRINTK_FLAGS_INCL_LINENO (1<<3) #define _DPRINTK_FLAGS_INCL_TID (1<<4) +#define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5) #define _DPRINTK_FLAGS_INCL_ANY \ (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\ - _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID) + _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID |\ + _DPRINTK_FLAGS_INCL_SOURCENAME) #if defined DEBUG #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 407c2f281b64..5693a4be0d9a 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -38,14 +38,22 @@ #ifdef __KERNEL__ +#include <linux/bitops.h> #include <asm/bug.h> +#define DQL_HIST_LEN 4 +#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN]) + struct dql { /* Fields accessed in enqueue path (dql_queued) */ unsigned int num_queued; /* Total ever queued */ unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ + unsigned long history_head; /* top 58 bits of jiffies */ + /* stall entries, a bit per entry */ + unsigned long history[DQL_HIST_LEN]; + /* Fields accessed only by completion path (dql_completed) */ unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */ @@ -62,6 +70,13 @@ struct dql { unsigned int max_limit; /* Max limit */ unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ + + /* Stall threshold (in jiffies), defined by user */ + unsigned short stall_thrs; + /* Longest stall detected, reported to user */ + unsigned short stall_max; + unsigned long last_reap; /* Last reap (in jiffies) */ + unsigned long stall_cnt; /* Number of stalls */ }; /* Set some static maximums */ @@ -74,6 +89,8 @@ struct dql { */ static inline void dql_queued(struct dql *dql, unsigned int count) { + unsigned long map, now, now_hi, i; + BUG_ON(count > DQL_MAX_OBJECT); dql->last_obj_cnt = count; @@ -86,6 +103,34 @@ static inline void dql_queued(struct dql *dql, unsigned int count) barrier(); dql->num_queued += count; + + now = jiffies; + now_hi = now / BITS_PER_LONG; + + /* The following code set a bit in the ring buffer, where each + * bit trackes time the packet was queued. The dql->history buffer + * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot + */ + if (unlikely(now_hi != dql->history_head)) { + /* About to reuse slots, clear them */ + for (i = 0; i < DQL_HIST_LEN; i++) { + /* Multiplication masks high bits */ + if (now_hi * BITS_PER_LONG == + (dql->history_head + i) * BITS_PER_LONG) + break; + DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0; + } + /* pairs with smp_rmb() in dql_check_stall() */ + smp_wmb(); + WRITE_ONCE(dql->history_head, now_hi); + } + + /* __set_bit() does not guarantee WRITE_ONCE() semantics */ + map = DQL_HIST_ENT(dql, now_hi); + + /* Populate the history with an entry (bit) per queued */ + if (!(map & BIT_MASK(now))) + WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); } /* Returns how many objects can be queued, < 0 indicates over limit. */ diff --git a/include/linux/edac.h b/include/linux/edac.h index fa4bda2a70f6..b4ee8961e623 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -30,7 +30,7 @@ struct device; extern int edac_op_state; -struct bus_type *edac_get_sysfs_subsys(void); +const struct bus_type *edac_get_sysfs_subsys(void); static inline void opstate_init(void) { @@ -187,6 +187,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_NVDIMM: Non-volatile RAM * @MEM_WIO2: Wide I/O 2. * @MEM_HBM2: High bandwidth Memory Gen 2. + * @MEM_HBM3: High bandwidth Memory Gen 3. */ enum mem_type { MEM_EMPTY = 0, @@ -218,6 +219,7 @@ enum mem_type { MEM_NVDIMM, MEM_WIO2, MEM_HBM2, + MEM_HBM3, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -248,6 +250,7 @@ enum mem_type { #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_HBM2 BIT(MEM_HBM2) +#define MEM_FLAG_HBM3 BIT(MEM_HBM3) /** * enum edac_type - Error Detection and Correction capabilities and mode @@ -492,7 +495,7 @@ struct edac_raw_error_desc { */ struct mem_ctl_info { struct device dev; - struct bus_type *bus; + const struct bus_type *bus; struct list_head link; /* for global list of mem_ctl_info structs */ diff --git a/include/linux/efi.h b/include/linux/efi.h index ab088c662e88..d59b0947fba0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -24,10 +24,11 @@ #include <linux/range.h> #include <linux/reboot.h> #include <linux/uuid.h> -#include <linux/screen_info.h> #include <asm/page.h> +struct screen_info; + #define EFI_SUCCESS 0 #define EFI_LOAD_ERROR ( 1 | (1UL << (BITS_PER_LONG-1))) #define EFI_INVALID_PARAMETER ( 2 | (1UL << (BITS_PER_LONG-1))) @@ -39,6 +40,7 @@ #define EFI_WRITE_PROTECTED ( 8 | (1UL << (BITS_PER_LONG-1))) #define EFI_OUT_OF_RESOURCES ( 9 | (1UL << (BITS_PER_LONG-1))) #define EFI_NOT_FOUND (14 | (1UL << (BITS_PER_LONG-1))) +#define EFI_ACCESS_DENIED (15 | (1UL << (BITS_PER_LONG-1))) #define EFI_TIMEOUT (18 | (1UL << (BITS_PER_LONG-1))) #define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) @@ -357,13 +359,10 @@ void efi_native_runtime_setup(void); * where the UEFI SPEC breaks the line. */ #define NULL_GUID EFI_GUID(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) -#define MPS_TABLE_GUID EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) -#define SAL_SYSTEM_TABLE_GUID EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) -#define HCDP_TABLE_GUID EFI_GUID(0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98) #define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) @@ -387,6 +386,7 @@ void efi_native_runtime_setup(void); #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f) +#define EFI_TCG2_FINAL_EVENTS_TABLE_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d) #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) @@ -401,6 +401,8 @@ void efi_native_runtime_setup(void); #define EFI_CERT_X509_GUID EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72) #define EFI_CERT_X509_SHA256_GUID EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed) #define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42) +#define EFI_CC_MEASUREMENT_PROTOCOL_GUID EFI_GUID(0x96751a3d, 0x72f4, 0x41a6, 0xa7, 0x94, 0xed, 0x5d, 0x0e, 0x67, 0xae, 0x6b) +#define EFI_CC_FINAL_EVENTS_TABLE_GUID EFI_GUID(0xdd4a4648, 0x2de7, 0x4665, 0x96, 0x4d, 0x21, 0xd9, 0xef, 0x5f, 0xb4, 0x46) /* * This GUID is used to pass to the kernel proper the struct screen_info @@ -412,7 +414,6 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) -#define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) #define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89) @@ -693,6 +694,11 @@ extern struct efi { extern struct mm_struct efi_mm; +static inline bool mm_is_efi(struct mm_struct *mm) +{ + return IS_ENABLED(CONFIG_EFI) && mm == &efi_mm; +} + static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right) { @@ -726,7 +732,6 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, return EFI_SUCCESS; } #endif -extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init __efi_memmap_init(struct efi_memory_map_data *data); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); @@ -851,10 +856,6 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) return 1; } -#ifdef CONFIG_EFI_PCDP -extern int __init efi_setup_pcdp_console(char *); -#endif - /* * We play games with efi_enabled so that the compiler will, if * possible, remove EFI-related code altogether. @@ -1130,7 +1131,7 @@ extern bool efi_runtime_disabled(void); static inline bool efi_runtime_disabled(void) { return true; } #endif -extern void efi_call_virt_check_flags(unsigned long flags, const char *call); +extern void efi_call_virt_check_flags(unsigned long flags, const void *caller); extern unsigned long efi_call_virt_save_flags(void); enum efi_secureboot_mode { @@ -1171,8 +1172,7 @@ static inline void efi_check_for_embedded_firmwares(void) { } #define arch_efi_call_virt(p, f, args...) ((p)->f(args)) /* - * Arch code can implement the following three template macros, avoiding - * reptition for the void/non-void return cases of {__,}efi_call_virt(): + * Arch code must implement the following three routines: * * * arch_efi_call_virt_setup() * @@ -1181,9 +1181,8 @@ static inline void efi_check_for_embedded_firmwares(void) { } * * * arch_efi_call_virt() * - * Performs the call. The last expression in the macro must be the call - * itself, allowing the logic to be shared by the void and non-void - * cases. + * Performs the call. This routine takes a variable number of arguments so + * it must be implemented as a variadic preprocessor macro. * * * arch_efi_call_virt_teardown() * @@ -1192,33 +1191,20 @@ static inline void efi_check_for_embedded_firmwares(void) { } #define efi_call_virt_pointer(p, f, args...) \ ({ \ - efi_status_t __s; \ + typeof((p)->f(args)) __s; \ unsigned long __flags; \ \ arch_efi_call_virt_setup(); \ \ __flags = efi_call_virt_save_flags(); \ __s = arch_efi_call_virt(p, f, args); \ - efi_call_virt_check_flags(__flags, __stringify(f)); \ + efi_call_virt_check_flags(__flags, NULL); \ \ arch_efi_call_virt_teardown(); \ \ __s; \ }) -#define __efi_call_virt_pointer(p, f, args...) \ -({ \ - unsigned long __flags; \ - \ - arch_efi_call_virt_setup(); \ - \ - __flags = efi_call_virt_save_flags(); \ - arch_efi_call_virt(p, f, args); \ - efi_call_virt_check_flags(__flags, __stringify(f)); \ - \ - arch_efi_call_virt_teardown(); \ -}) - #define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE struct linux_efi_random_seed { @@ -1244,6 +1230,10 @@ extern int efi_tpm_final_log_size; extern unsigned long rci2_table_phys; +efi_status_t +efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *), + u64 param_buffer_addr, void *context); + /* * efi_runtime_service() function identifiers. * "NONE" is used by efi_recover_from_page_fault() to check if the page @@ -1263,25 +1253,26 @@ enum efi_rts_ids { EFI_RESET_SYSTEM, EFI_UPDATE_CAPSULE, EFI_QUERY_CAPSULE_CAPS, + EFI_ACPI_PRM_HANDLER, }; +union efi_rts_args; + /* * efi_runtime_work: Details of EFI Runtime Service work - * @arg<1-5>: EFI Runtime Service function arguments + * @args: Pointer to union describing the arguments * @status: Status of executing EFI Runtime Service * @efi_rts_id: EFI Runtime Service function identifier * @efi_rts_comp: Struct used for handling completions + * @caller: The caller of the runtime service */ struct efi_runtime_work { - void *arg1; - void *arg2; - void *arg3; - void *arg4; - void *arg5; - efi_status_t status; - struct work_struct work; - enum efi_rts_ids efi_rts_id; - struct completion efi_rts_comp; + union efi_rts_args *args; + efi_status_t status; + struct work_struct work; + enum efi_rts_ids efi_rts_id; + struct completion efi_rts_comp; + const void *caller; }; extern struct efi_runtime_work efi_rts_work; @@ -1365,4 +1356,15 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n); +/* + * efivar ops event type + */ +#define EFIVAR_OPS_RDONLY 0 +#define EFIVAR_OPS_RDWR 1 + +extern struct blocking_notifier_head efivar_ops_nh; + +void efivars_generic_ops_register(void); +void efivars_generic_ops_unregister(void); + #endif /* _LINUX_EFI_H */ diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h new file mode 100644 index 000000000000..624ff6ff41f9 --- /dev/null +++ b/include/linux/einj-cxl.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CXL protocol Error INJection support. + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Ben Cheatham <benjamin.cheatham@amd.com> + */ +#ifndef EINJ_CXL_H +#define EINJ_CXL_H + +#include <linux/errno.h> +#include <linux/types.h> + +struct pci_dev; +struct seq_file; + +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) +int einj_cxl_available_error_type_show(struct seq_file *m, void *v); +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); +int einj_cxl_inject_rch_error(u64 rcrb, u64 type); +bool einj_cxl_is_initialized(void); +#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */ +static inline int einj_cxl_available_error_type_show(struct seq_file *m, + void *v) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) +{ + return -ENXIO; +} + +static inline bool einj_cxl_is_initialized(void) { return false; } +#endif /* CONFIG_ACPI_APEI_EINJ_CXL */ + +#endif /* EINJ_CXL_H */ diff --git a/include/linux/elf-fdpic.h b/include/linux/elf-fdpic.h index 3bea95a1af53..e533f4513194 100644 --- a/include/linux/elf-fdpic.h +++ b/include/linux/elf-fdpic.h @@ -10,13 +10,25 @@ #include <uapi/linux/elf-fdpic.h> +#if ELF_CLASS == ELFCLASS32 +#define Elf_Sword Elf32_Sword +#define elf_fdpic_loadseg elf32_fdpic_loadseg +#define elf_fdpic_loadmap elf32_fdpic_loadmap +#define ELF_FDPIC_LOADMAP_VERSION ELF32_FDPIC_LOADMAP_VERSION +#else +#define Elf_Sword Elf64_Sxword +#define elf_fdpic_loadmap elf64_fdpic_loadmap +#define elf_fdpic_loadseg elf64_fdpic_loadseg +#define ELF_FDPIC_LOADMAP_VERSION ELF64_FDPIC_LOADMAP_VERSION +#endif + /* * binfmt binary parameters structure */ struct elf_fdpic_params { struct elfhdr hdr; /* ref copy of ELF header */ struct elf_phdr *phdrs; /* ref copy of PT_PHDR table */ - struct elf32_fdpic_loadmap *loadmap; /* loadmap to be passed to userspace */ + struct elf_fdpic_loadmap *loadmap; /* loadmap to be passed to userspace */ unsigned long elfhdr_addr; /* mapped ELF header user address */ unsigned long ph_addr; /* mapped PT_PHDR user address */ unsigned long map_addr; /* mapped loadmap user address */ diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index b9caa01dfac4..70cd7258cd29 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -5,6 +5,7 @@ #include <linux/device.h> #include <linux/jump_label.h> #include <linux/kobject.h> +#include <linux/kref.h> #include <linux/rcupdate.h> #include <linux/sched/cpufreq.h> #include <linux/sched/topology.h> @@ -12,6 +13,7 @@ /** * struct em_perf_state - Performance state of a performance domain + * @performance: CPU performance (capacity) at a given frequency * @frequency: The frequency in KHz, for consistency with CPUFreq * @power: The power consumed at this level (by 1 CPU or by a registered * device). It can be a total power: static and dynamic. @@ -20,6 +22,7 @@ * @flags: see "em_perf_state flags" description below. */ struct em_perf_state { + unsigned long performance; unsigned long frequency; unsigned long power; unsigned long cost; @@ -37,8 +40,20 @@ struct em_perf_state { #define EM_PERF_STATE_INEFFICIENT BIT(0) /** + * struct em_perf_table - Performance states table + * @rcu: RCU used for safe access and destruction + * @kref: Reference counter to track the users + * @state: List of performance states, in ascending order + */ +struct em_perf_table { + struct rcu_head rcu; + struct kref kref; + struct em_perf_state state[]; +}; + +/** * struct em_perf_domain - Performance domain - * @table: List of performance states, in ascending order + * @em_table: Pointer to the runtime modifiable em_perf_table * @nr_perf_states: Number of performance states * @flags: See "em_perf_domain flags" * @cpus: Cpumask covering the CPUs of the domain. It's here @@ -53,7 +68,7 @@ struct em_perf_state { * field is unused. */ struct em_perf_domain { - struct em_perf_state *table; + struct em_perf_table __rcu *em_table; int nr_perf_states; unsigned long flags; unsigned long cpus[]; @@ -98,27 +113,6 @@ struct em_perf_domain { #define EM_MAX_NUM_CPUS 16 #endif -/* - * To avoid an overflow on 32bit machines while calculating the energy - * use a different order in the operation. First divide by the 'cpu_scale' - * which would reduce big value stored in the 'cost' field, then multiply by - * the 'sum_util'. This would allow to handle existing platforms, which have - * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. - * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' - * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. - * This reordering of operations has some limitations, we lose small - * precision in the estimation (comparing to 64bit platform w/o reordering). - * - * We are safe on 64bit machine. - */ -#ifdef CONFIG_64BIT -#define em_estimate_energy(cost, sum_util, scale_cpu) \ - (((cost) * (sum_util)) / (scale_cpu)) -#else -#define em_estimate_energy(cost, sum_util, scale_cpu) \ - (((cost) / (scale_cpu)) * (sum_util)) -#endif - struct em_data_callback { /** * active_power() - Provide power at the next performance state of @@ -168,40 +162,48 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); +void em_table_free(struct em_perf_table __rcu *table); +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM - * @pd : Performance domain for which we want an efficient frequency - * @freq : Frequency to map with the EM + * @table: List of performance states, in ascending order + * @nr_perf_states: Number of performance states + * @max_util: Max utilization to map with the EM + * @pd_flags: Performance Domain flags * * It is called from the scheduler code quite frequently and as a consequence * doesn't implement any check. * - * Return: An efficient performance state, high enough to meet @freq + * Return: An efficient performance state id, high enough to meet @max_util * requirement. */ -static inline -struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd, - unsigned long freq) +static inline int +em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states, + unsigned long max_util, unsigned long pd_flags) { struct em_perf_state *ps; int i; - for (i = 0; i < pd->nr_perf_states; i++) { - ps = &pd->table[i]; - if (ps->frequency >= freq) { - if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && + for (i = 0; i < nr_perf_states; i++) { + ps = &table[i]; + if (ps->performance >= max_util) { + if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && ps->flags & EM_PERF_STATE_INEFFICIENT) continue; - break; + return i; } } - return ps; + return nr_perf_states - 1; } /** @@ -224,9 +226,13 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util, unsigned long allowed_cpu_cap) { - unsigned long freq, scale_cpu; + struct em_perf_table *em_table; struct em_perf_state *ps; - int cpu; + int i; + +#ifdef CONFIG_SCHED_DEBUG + WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n"); +#endif if (!sum_util) return 0; @@ -234,32 +240,29 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, /* * In order to predict the performance state, map the utilization of * the most utilized CPU of the performance domain to a requested - * frequency, like schedutil. Take also into account that the real - * frequency might be set lower (due to thermal capping). Thus, clamp + * performance, like schedutil. Take also into account that the real + * performance might be set lower (due to thermal capping). Thus, clamp * max utilization to the allowed CPU capacity before calculating - * effective frequency. + * effective performance. */ - cpu = cpumask_first(to_cpumask(pd->cpus)); - scale_cpu = arch_scale_cpu_capacity(cpu); - ps = &pd->table[pd->nr_perf_states - 1]; - - max_util = map_util_perf(max_util); max_util = min(max_util, allowed_cpu_cap); - freq = map_util_freq(max_util, ps->frequency, scale_cpu); /* * Find the lowest performance state of the Energy Model above the - * requested frequency. + * requested performance. */ - ps = em_pd_get_efficient_state(pd, freq); + em_table = rcu_dereference(pd->em_table); + i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states, + max_util, pd->flags); + ps = &em_table->state[i]; /* - * The capacity of a CPU in the domain at the performance state (ps) - * can be computed as: + * The performance (capacity) of a CPU in the domain at the performance + * state (ps) can be computed as: * - * ps->freq * scale_cpu - * ps->cap = -------------------- (1) - * cpu_max_freq + * ps->freq * scale_cpu + * ps->performance = -------------------- (1) + * cpu_max_freq * * So, ignoring the costs of idle states (which are not available in * the EM), the energy consumed by this CPU at that performance state @@ -267,9 +270,10 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * * ps->power * cpu_util * cpu_nrg = -------------------- (2) - * ps->cap + * ps->performance * - * since 'cpu_util / ps->cap' represents its percentage of busy time. + * since 'cpu_util / ps->performance' represents its percentage of busy + * time. * * NOTE: Although the result of this computation actually is in * units of power, it can be manipulated as an energy value @@ -279,9 +283,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product * of two terms: * - * ps->power * cpu_max_freq cpu_util - * cpu_nrg = ------------------------ * --------- (3) - * ps->freq scale_cpu + * ps->power * cpu_max_freq + * cpu_nrg = ------------------------ * cpu_util (3) + * ps->freq * scale_cpu * * The first term is static, and is stored in the em_perf_state struct * as 'ps->cost'. @@ -291,11 +295,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * total energy of the domain (which is the simple sum of the energy of * all of its CPUs) can be factorized as: * - * ps->cost * \Sum cpu_util - * pd_nrg = ------------------------ (4) - * scale_cpu + * pd_nrg = ps->cost * \Sum cpu_util (4) */ - return em_estimate_energy(ps->cost, sum_util, scale_cpu); + return ps->cost * sum_util; } /** @@ -310,6 +312,23 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) return pd->nr_perf_states; } +/** + * em_perf_state_from_pd() - Get the performance states table of perf. + * domain + * @pd : performance domain for which this must be done + * + * To use this function the rcu_read_lock() should be hold. After the usage + * of the performance states table is finished, the rcu_read_unlock() should + * be called. + * + * Return: the pointer to performance states table of the performance domain + */ +static inline +struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd) +{ + return rcu_dereference(pd->em_table)->state; +} + #else struct em_data_callback {}; #define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { } @@ -344,6 +363,29 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) { return 0; } +static inline +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +{ + return NULL; +} +static inline void em_table_free(struct em_perf_table __rcu *table) {} +static inline +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table) +{ + return -EINVAL; +} +static inline +struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd) +{ + return NULL; +} +static inline +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states) +{ + return -EINVAL; +} #endif #endif diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index d95ab85f96ba..b0fb775a600d 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -7,6 +7,11 @@ #include <linux/syscalls.h> #include <linux/seccomp.h> #include <linux/sched.h> +#include <linux/context_tracking.h> +#include <linux/livepatch.h> +#include <linux/resume_user_mode.h> +#include <linux/tick.h> +#include <linux/kmsan.h> #include <asm/entry-common.h> @@ -98,7 +103,19 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} * done between establishing state and enabling interrupts. The caller must * enable interrupts before invoking syscall_enter_from_user_mode_work(). */ -void enter_from_user_mode(struct pt_regs *regs); +static __always_inline void enter_from_user_mode(struct pt_regs *regs) +{ + arch_enter_from_user_mode(regs); + lockdep_hardirqs_off(CALLER_ADDR0); + + CT_WARN_ON(__ct_state() != CONTEXT_USER); + user_exit_irqoff(); + + instrumentation_begin(); + kmsan_unpoison_entry_regs(regs); + trace_hardirqs_off_finish(); + instrumentation_end(); +} /** * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts @@ -117,6 +134,9 @@ void enter_from_user_mode(struct pt_regs *regs); */ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); +long syscall_trace_enter(struct pt_regs *regs, long syscall, + unsigned long work); + /** * syscall_enter_from_user_mode_work - Check and handle work before invoking * a syscall @@ -140,7 +160,15 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() * 2) Invocation of audit_syscall_entry() */ -long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); +static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + + if (work & SYSCALL_WORK_ENTER) + syscall = syscall_trace_enter(regs, syscall, work); + + return syscall; +} /** * syscall_enter_from_user_mode - Establish state and check and handle work @@ -159,7 +187,19 @@ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); * Returns: The original or a modified syscall number. See * syscall_enter_from_user_mode_work() for further explanation. */ -long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); +static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +{ + long ret; + + enter_from_user_mode(regs); + + instrumentation_begin(); + local_irq_enable(); + ret = syscall_enter_from_user_mode_work(regs, syscall); + instrumentation_end(); + + return ret; +} /** * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() @@ -259,6 +299,43 @@ static __always_inline void arch_exit_to_user_mode(void) { } void arch_do_signal_or_restart(struct pt_regs *regs); /** + * exit_to_user_mode_loop - do any pending work before leaving to user space + */ +unsigned long exit_to_user_mode_loop(struct pt_regs *regs, + unsigned long ti_work); + +/** + * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required + * @regs: Pointer to pt_regs on entry stack + * + * 1) check that interrupts are disabled + * 2) call tick_nohz_user_enter_prepare() + * 3) call exit_to_user_mode_loop() if any flags from + * EXIT_TO_USER_MODE_WORK are set + * 4) check that interrupts are still disabled + */ +static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs) +{ + unsigned long ti_work; + + lockdep_assert_irqs_disabled(); + + /* Flush pending rcuog wakeup before the last need_resched() check */ + tick_nohz_user_enter_prepare(); + + ti_work = read_thread_flags(); + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) + ti_work = exit_to_user_mode_loop(regs, ti_work); + + arch_exit_to_user_mode_prepare(regs, ti_work); + + /* Ensure that kernel state is sane for a return to userspace */ + kmap_assert_nomap(); + lockdep_assert_irqs_disabled(); + lockdep_sys_exit(); +} + +/** * exit_to_user_mode - Fixup state when exiting to user mode * * Syscall/interrupt exit enables interrupts, but the kernel state is @@ -276,7 +353,17 @@ void arch_do_signal_or_restart(struct pt_regs *regs); * non-instrumentable. * The caller has to invoke syscall_exit_to_user_mode_work() before this. */ -void exit_to_user_mode(void); +static __always_inline void exit_to_user_mode(void) +{ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(); + instrumentation_end(); + + user_enter_irqoff(); + arch_exit_to_user_mode(); + lockdep_hardirqs_on(CALLER_ADDR0); +} /** * syscall_exit_to_user_mode_work - Handle work before returning to user mode diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 224645f17c33..297231854ada 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -608,6 +608,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, } /** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + +/** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad * diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 62b61527bcc4..9901e563f706 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -95,6 +95,7 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len + * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), @@ -102,6 +103,7 @@ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_TX_PUSH = BIT(2), ETHTOOL_RING_USE_RX_PUSH = BIT(3), ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), + ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -220,6 +222,16 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +struct ethtool_keee { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertised); + u32 tx_lpi_timer; + bool tx_lpi_enabled; + bool eee_active; + bool eee_enabled; +}; + struct kernel_ethtool_coalesce { u8 use_cqe_mode_tx; u8 use_cqe_mode_rx; @@ -409,8 +421,10 @@ struct ethtool_pause_stats { * not entire FEC data blocks. This is a non-standard statistic. * Reported to user space as %ETHTOOL_A_FEC_STAT_CORR_BITS. * - * @lane: per-lane/PCS-instance counts as defined by the standard - * @total: error counts for the entire port, for drivers incapable of reporting + * For each of the above fields, the two substructure members are: + * + * - @lanes: per-lane/PCS-instance counts as defined by the standard + * - @total: error counts for the entire port, for drivers incapable of reporting * per-lane stats * * Drivers should fill in either only total or per-lane statistics, core @@ -595,9 +609,46 @@ struct ethtool_mm_stats { }; /** + * struct ethtool_rxfh_param - RXFH (RSS) parameters + * @hfunc: Defines the current RSS hash function used by HW (or to be set to). + * Valid values are one of the %ETH_RSS_HASH_*. + * @indir_size: On SET, the array size of the user buffer for the + * indirection table, which may be zero, or + * %ETH_RXFH_INDIR_NO_CHANGE. On GET (read from the driver), + * the array size of the hardware indirection table. + * @indir: The indirection table of size @indir_size entries. + * @key_size: On SET, the array size of the user buffer for the hash key, + * which may be zero. On GET (read from the driver), the size of the + * hardware hash key. + * @key: The hash key of size @key_size bytes. + * @rss_context: RSS context identifier. Context 0 is the default for normal + * traffic; other contexts can be referenced as the destination for RX flow + * classification rules. On SET, %ETH_RXFH_CONTEXT_ALLOC is used + * to allocate a new RSS context; on return this field will + * contain the ID of the newly allocated context. + * @rss_delete: Set to non-ZERO to remove the @rss_context context. + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. + */ +struct ethtool_rxfh_param { + u8 hfunc; + u32 indir_size; + u32 *indir; + u32 key_size; + u8 *key; + u32 rss_context; + u8 rss_delete; + u8 input_xfrm; +}; + +/** * struct ethtool_ops - optional netdev operations * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. + * @cap_rss_ctx_supported: indicates if the driver supports RSS + * contexts. + * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor + * RSS. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -694,15 +745,6 @@ struct ethtool_mm_stats { * will remain unchanged. * Returns a negative error code or zero. An error code must be returned * if at least one unsupported change was requested. - * @get_rxfh_context: Get the contents of the RX flow hash indirection table, - * hash key, and/or hash function assiciated to the given rss context. - * Returns a negative error code or zero. - * @set_rxfh_context: Create, remove and configure RSS contexts. Allows setting - * the contents of the RX flow hash indirection table, hash key, and/or - * hash function associated to the given context. Arguments which are set - * to %NULL or zero will remain unchanged. - * Returns a negative error code or zero. An error code must be returned - * if at least one unsupported change was requested. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -785,6 +827,8 @@ struct ethtool_mm_stats { */ struct ethtool_ops { u32 cap_link_lanes_supported:1; + u32 cap_rss_ctx_supported:1; + u32 cap_rss_sym_xor_supported:1; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); @@ -844,15 +888,9 @@ struct ethtool_ops { int (*reset)(struct net_device *, u32 *); u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); - int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key, - u8 *hfunc); - int (*set_rxfh)(struct net_device *, const u32 *indir, - const u8 *key, const u8 hfunc); - int (*get_rxfh_context)(struct net_device *, u32 *indir, u8 *key, - u8 *hfunc, u32 rss_context); - int (*set_rxfh_context)(struct net_device *, const u32 *indir, - const u8 *key, const u8 hfunc, - u32 *rss_context, bool delete); + int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); + int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *, + struct netlink_ext_ack *extack); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); @@ -864,8 +902,8 @@ struct ethtool_ops { struct ethtool_modinfo *); int (*get_module_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*get_eee)(struct net_device *, struct ethtool_eee *); - int (*set_eee)(struct net_device *, struct ethtool_eee *); + int (*get_eee)(struct net_device *dev, struct ethtool_keee *eee); + int (*set_eee)(struct net_device *dev, struct ethtool_keee *eee); int (*get_tunable)(struct net_device *, const struct ethtool_tunable *, void *); int (*set_tunable)(struct net_device *, @@ -1044,12 +1082,52 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, } /** + * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer. + * @dev: pointer to net_device structure + * @info: buffer to hold the result + * Returns zero on success, non-zero otherwise. + */ +int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info); + +/** * ethtool_sprintf - Write formatted string to ethtool string data - * @data: Pointer to start of string to update + * @data: Pointer to a pointer to the start of string to update * @fmt: Format of string to write * - * Write formatted string to data. Update data to point at start of + * Write formatted string to *data. Update *data to point at start of * next string. */ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); + +/** + * ethtool_puts - Write string to ethtool string data + * @data: Pointer to a pointer to the start of string to update + * @str: String to write + * + * Write string to *data without a trailing newline. Update *data + * to point at start of next string. + * + * Prefer this function to ethtool_sprintf() when given only + * two arguments or if @fmt is just "%s". + */ +extern void ethtool_puts(u8 **data, const char *str); + +/* Link mode to forced speed capabilities maps */ +struct ethtool_forced_speed_map { + u32 speed; + __ETHTOOL_DECLARE_LINK_MODE_MASK(caps); + + const u32 *cap_arr; + u32 arr_size; +}; + +#define ETHTOOL_FORCED_SPEED_MAP(prefix, value) \ +{ \ + .speed = SPEED_##value, \ + .cap_arr = prefix##_##value, \ + .arr_size = ARRAY_SIZE(prefix##_##value), \ +} + +void +ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index b9d83652c097..e32bee4345fb 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -35,8 +35,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx); struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); -__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); -__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask); +void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); @@ -58,15 +57,8 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) return ERR_PTR(-ENOSYS); } -static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n) +static inline void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask) { - return -ENOSYS; -} - -static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, - unsigned mask) -{ - return -ENOSYS; } static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) @@ -92,5 +84,10 @@ static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) #endif +static inline void eventfd_signal(struct eventfd_ctx *ctx) +{ + eventfd_signal_mask(ctx, 0); +} + #endif /* _LINUX_EVENTFD_H */ diff --git a/include/linux/evm.h b/include/linux/evm.h index 7dc1ee74169f..d48d6da32315 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -12,53 +12,15 @@ #include <linux/integrity.h> #include <linux/xattr.h> -struct integrity_iint_cache; - #ifdef CONFIG_EVM extern int evm_set_key(void *key, size_t keylen); extern enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, - size_t xattr_value_len, - struct integrity_iint_cache *iint); -extern int evm_inode_setattr(struct mnt_idmap *idmap, - struct dentry *dentry, struct iattr *attr); -extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid); -extern int evm_inode_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *name, - const void *value, size_t size); -extern void evm_inode_post_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len); -extern int evm_inode_removexattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *xattr_name); -extern void evm_inode_post_removexattr(struct dentry *dentry, - const char *xattr_name); -static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - evm_inode_post_removexattr(dentry, acl_name); -} -extern int evm_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl); -static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return evm_inode_set_acl(idmap, dentry, acl_name, NULL); -} -static inline void evm_inode_post_set_acl(struct dentry *dentry, - const char *acl_name, - struct posix_acl *kacl) -{ - return evm_inode_post_setxattr(dentry, acl_name, NULL, 0); -} -extern int evm_inode_init_security(struct inode *inode, - const struct xattr *xattr_array, - struct xattr *evm); + size_t xattr_value_len); +int evm_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct xattr *xattrs, + int *xattr_count); extern bool evm_revalidate_status(const char *xattr_name); extern int evm_protected_xattr_if_enabled(const char *req_xattr_name); extern int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer, @@ -83,83 +45,16 @@ static inline int evm_set_key(void *key, size_t keylen) static inline enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, - size_t xattr_value_len, - struct integrity_iint_cache *iint) + size_t xattr_value_len) { return INTEGRITY_UNKNOWN; } #endif -static inline int evm_inode_setattr(struct mnt_idmap *idmap, - struct dentry *dentry, struct iattr *attr) -{ - return 0; -} - -static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid) -{ - return; -} - -static inline int evm_inode_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *name, - const void *value, size_t size) -{ - return 0; -} - -static inline void evm_inode_post_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len) -{ - return; -} - -static inline int evm_inode_removexattr(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *xattr_name) -{ - return 0; -} - -static inline void evm_inode_post_removexattr(struct dentry *dentry, - const char *xattr_name) -{ - return; -} - -static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return; -} - -static inline int evm_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl) -{ - return 0; -} - -static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return 0; -} - -static inline void evm_inode_post_set_acl(struct dentry *dentry, - const char *acl_name, - struct posix_acl *kacl) -{ - return; -} - -static inline int evm_inode_init_security(struct inode *inode, - const struct xattr *xattr_array, - struct xattr *evm) +static inline int evm_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + struct xattr *xattrs, + int *xattr_count) { return 0; } diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index 1c849db953a5..d445705ac13c 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -16,10 +16,13 @@ * and eliminates the need for absolute relocations that require runtime * processing on relocatable kernels. */ +#define __KSYM_ALIGN ".balign 4" #define __KSYM_REF(sym) ".long " #sym "- ." #elif defined(CONFIG_64BIT) +#define __KSYM_ALIGN ".balign 8" #define __KSYM_REF(sym) ".quad " #sym #else +#define __KSYM_ALIGN ".balign 4" #define __KSYM_REF(sym) ".long " #sym #endif @@ -42,7 +45,7 @@ " .asciz \"" ns "\"" "\n" \ " .previous" "\n" \ " .section \"___ksymtab" sec "+" #name "\", \"a\"" "\n" \ - " .balign 4" "\n" \ + __KSYM_ALIGN "\n" \ "__ksymtab_" #name ":" "\n" \ __KSYM_REF(sym) "\n" \ __KSYM_REF(__kstrtab_ ##name) "\n" \ @@ -50,8 +53,8 @@ " .previous" "\n" \ ) -#ifdef CONFIG_IA64 -#define KSYM_FUNC(name) @fptr(name) +#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT) +#define KSYM_FUNC(name) P%name #else #define KSYM_FUNC(name) name #endif @@ -61,6 +64,7 @@ #define SYMBOL_CRC(sym, crc, sec) \ asm(".section \"___kcrctab" sec "+" #sym "\",\"a\"" "\n" \ + ".balign 4" "\n" \ "__crc_" #sym ":" "\n" \ ".long " #crc "\n" \ ".previous" "\n") diff --git a/include/linux/export.h b/include/linux/export.h index beed8387e0a4..0bbd02fd351d 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -7,15 +7,6 @@ #include <linux/stringify.h> /* - * Export symbols from the kernel to modules. Forked from module.h - * to reduce the amount of pointless cruft we feed to gcc when only - * exporting a simple symbol or two. - * - * Try not to add #includes here. It slows compilation and makes kernel - * hackers place grumpy comments in header files. - */ - -/* * This comment block is used by fixdep. Please do not remove. * * When CONFIG_MODVERSIONS is changed from n to y, all source files having @@ -23,15 +14,6 @@ * side effect of the *.o build rule. */ -#ifndef __ASSEMBLY__ -#ifdef MODULE -extern struct module __this_module; -#define THIS_MODULE (&__this_module) -#else -#define THIS_MODULE ((struct module *)0) -#endif -#endif /* __ASSEMBLY__ */ - #ifdef CONFIG_64BIT #define __EXPORT_SYMBOL_REF(sym) \ .balign 8 ASM_NL \ @@ -50,7 +32,7 @@ extern struct module __this_module; __EXPORT_SYMBOL_REF(sym) ASM_NL \ .previous -#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS) +#if defined(__DISABLE_EXPORTS) /* * Allow symbol exports to be disabled completely so that C code may @@ -75,7 +57,7 @@ extern struct module __this_module; __ADDRESSABLE(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) -#endif /* CONFIG_MODULES */ +#endif #ifdef DEFAULT_SYMBOL_NAMESPACE #define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE)) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 11fbd0ee1370..bb37ad5cc954 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -99,12 +99,29 @@ enum fid_type { FILEID_FAT_WITH_PARENT = 0x72, /* + * 64 bit inode number, 32 bit generation number. + */ + FILEID_INO64_GEN = 0x81, + + /* + * 64 bit inode number, 32 bit generation number, + * 64 bit parent inode number, 32 bit parent generation. + */ + FILEID_INO64_GEN_PARENT = 0x82, + + /* * 128 bit child FID (struct lu_fid) * 128 bit parent FID (struct lu_fid) */ FILEID_LUSTRE = 0x97, /* + * 64 bit inode number, 32 bit subvolume, 32 bit generation number: + */ + FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1, + FILEID_BCACHEFS_WITH_PARENT = 0xb2, + + /* * 64 bit unique kernfs id */ FILEID_KERNFS = 0xfe, @@ -123,7 +140,11 @@ struct fid { u32 parent_ino; u32 parent_gen; } i32; - struct { + struct { + u64 ino; + u32 gen; + } __packed i64; + struct { u32 block; u16 partref; u16 parent_partref; @@ -224,15 +245,56 @@ struct export_operations { atomic attribute updates */ #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ +#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */ unsigned long flags; }; +/** + * exportfs_lock_op_is_async() - export op supports async lock operation + * @export_ops: the nfs export operations to check + * + * Returns true if the nfs export_operations structure has + * EXPORT_OP_ASYNC_LOCK in their flags set + */ +static inline bool +exportfs_lock_op_is_async(const struct export_operations *export_ops) +{ + return export_ops->flags & EXPORT_OP_ASYNC_LOCK; +} + extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags); extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int flags); +static inline bool exportfs_can_encode_fid(const struct export_operations *nop) +{ + return !nop || nop->encode_fh; +} + +static inline bool exportfs_can_decode_fh(const struct export_operations *nop) +{ + return nop && nop->fh_to_dentry; +} + +static inline bool exportfs_can_encode_fh(const struct export_operations *nop, + int fh_flags) +{ + /* + * If a non-decodeable file handle was requested, we only need to make + * sure that filesystem did not opt-out of encoding fid. + */ + if (fh_flags & EXPORT_FH_FID) + return exportfs_can_encode_fid(nop); + + /* + * If a decodeable file handle was requested, we need to make sure that + * filesystem can also decode file handles. + */ + return exportfs_can_decode_fh(nop); +} + static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid, int *max_len) { @@ -252,10 +314,12 @@ extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, /* * Generic helpers for filesystems. */ -extern struct dentry *generic_fh_to_dentry(struct super_block *sb, +int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); +struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); -extern struct dentry *generic_fh_to_parent(struct super_block *sb, +struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)); diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 3c45c3846fe9..e596a0abcb27 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -328,16 +328,4 @@ struct extcon_specific_cable_nb { struct extcon_dev *edev; unsigned long previous_value; }; - -static inline int extcon_register_interest(struct extcon_specific_cable_nb *obj, - const char *extcon_name, const char *cable_name, - struct notifier_block *nb) -{ - return -EINVAL; -} - -static inline int extcon_unregister_interest(struct extcon_specific_cable_nb *obj) -{ - return -EINVAL; -} #endif /* __LINUX_EXTCON_H__ */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a82a4bb6ce68..a357287eac1e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -13,10 +13,10 @@ #define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ #define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */ -#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ -#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ -#define F2FS_BLKSIZE 4096 /* support only 4KB block */ -#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ +#define F2FS_MAX_LOG_SECTOR_SIZE PAGE_SHIFT /* Max is Block Size */ +#define F2FS_LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) /* log number for sector/blk */ +#define F2FS_BLKSIZE PAGE_SIZE /* support only block == page */ +#define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) @@ -27,6 +27,7 @@ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) +#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 @@ -40,12 +41,6 @@ #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_IO_SIZE(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits + 2) /* KB */ -#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ -#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) -#define F2FS_IO_ALIGNED(sbi) (F2FS_IO_SIZE(sbi) > 1) - /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) @@ -81,6 +76,7 @@ enum stop_cp_reason { STOP_CP_REASON_CORRUPTED_SUMMARY, STOP_CP_REASON_UPDATE_INODE, STOP_CP_REASON_FLUSH_FAIL, + STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_MAX, }; @@ -104,6 +100,7 @@ enum f2fs_error { ERROR_CORRUPTED_VERITY_XATTR, ERROR_CORRUPTED_XATTR, ERROR_INVALID_NODE_REFERENCE, + ERROR_INCONSISTENT_NAT, ERROR_MAX, }; @@ -210,14 +207,14 @@ struct f2fs_checkpoint { unsigned char sit_nat_version_bitmap[]; } __packed; -#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */ +#define CP_CHKSUM_OFFSET (F2FS_BLKSIZE - sizeof(__le32)) /* default chksum offset in checkpoint */ #define CP_MIN_CHKSUM_OFFSET \ (offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap)) /* * For orphan inode management */ -#define F2FS_ORPHANS_PER_BLOCK 1020 +#define F2FS_ORPHANS_PER_BLOCK ((F2FS_BLKSIZE - 4 * sizeof(__le32)) / sizeof(__le32)) #define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ F2FS_ORPHANS_PER_BLOCK) @@ -243,14 +240,31 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 /* 200 bytes for inline xattrs by default */ #define DEFAULT_INLINE_XATTR_ADDRS 50 -#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ + +#define OFFSET_OF_END_OF_I_EXT 360 +#define SIZE_OF_I_NID 20 + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode number */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +/* Address Pointers in an Inode */ +#define DEF_ADDRS_PER_INODE ((F2FS_BLKSIZE - OFFSET_OF_END_OF_I_EXT \ + - SIZE_OF_I_NID \ + - sizeof(struct node_footer)) / sizeof(__le32)) #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(inode) addrs_per_inode(inode) -#define DEF_ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +/* Address Pointers in a Direct Block */ +#define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_BLOCK(inode) addrs_per_block(inode) -#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +/* Node IDs in an Indirect Block */ +#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_PAGE(page, inode) \ (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode)) @@ -342,14 +356,6 @@ enum { #define OFFSET_BIT_MASK GENMASK(OFFSET_BIT_SHIFT - 1, 0) -struct node_footer { - __le32 nid; /* node id */ - __le32 ino; /* inode number */ - __le32 flag; /* include cold/fsync/dentry marks and offset */ - __le64 cp_ver; /* checkpoint version */ - __le32 next_blkaddr; /* next node page block address */ -} __packed; - struct f2fs_node { /* can be one of three types: inode, direct, and indirect types */ union { @@ -363,7 +369,7 @@ struct f2fs_node { /* * For NAT entries */ -#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_nat_entry)) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ @@ -378,12 +384,13 @@ struct f2fs_nat_block { /* * For SIT entries * - * Each segment is 2MB in size by default so that a bitmap for validity of - * there-in blocks should occupy 64 bytes, 512 bits. + * A validity bitmap of 64 bytes covers 512 blocks of area. For a 4K page size, + * this results in a segment size of 2MB. For 16k pages, the default segment size + * is 8MB. * Not allow to change this. */ #define SIT_VBLOCK_MAP_SIZE 64 -#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) +#define SIT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_sit_entry)) /* * F2FS uses 4 bytes to represent block address. As a result, supported size of @@ -418,7 +425,7 @@ struct f2fs_sit_block { * For segment summary * * One summary block contains exactly 512 summary entries, which represents - * exactly 2MB segment by default. Not allow to change the basic units. + * exactly one segment by default. Not allow to change the basic units. * * NOTE: For initializing fields, you must use set_summary * @@ -429,12 +436,12 @@ struct f2fs_sit_block { * from node's page's beginning to get a data block address. * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) */ -#define ENTRIES_IN_SUM 512 -#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ +#define ENTRIES_IN_SUM (F2FS_BLKSIZE / 8) +#define SUMMARY_SIZE (7) /* sizeof(struct f2fs_summary) */ #define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ #define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) -/* a summary entry for a 4KB-sized block in a segment */ +/* a summary entry for a block in a segment */ struct f2fs_summary { __le32 nid; /* parent node id */ union { @@ -518,7 +525,7 @@ struct f2fs_journal { }; } __packed; -/* 4KB-sized summary block structure */ +/* Block-sized summary block structure */ struct f2fs_summary_block { struct f2fs_summary entries[ENTRIES_IN_SUM]; struct f2fs_journal journal; @@ -559,11 +566,14 @@ typedef __le32 f2fs_hash_t; * Note: there are more reserved space in inline dentry than in regular * dentry, when converting inline dentry we should handle this carefully. */ -#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */ + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK ((BITS_PER_BYTE * F2FS_BLKSIZE) / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * BITS_PER_BYTE + 1)) #define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) -#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ +#define SIZE_OF_RESERVED (F2FS_BLKSIZE - ((SIZE_OF_DIR_ENTRY + \ F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) #define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */ @@ -576,7 +586,7 @@ struct f2fs_dir_entry { __u8 file_type; /* file type */ } __packed; -/* 4KB-sized directory entry block */ +/* Block-sized directory entry block */ struct f2fs_dentry_block { /* validity bitmap for directory entries in each block */ __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; diff --git a/include/linux/fb.h b/include/linux/fb.h index ce7d588edc3e..0dd27364d56f 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -2,28 +2,30 @@ #ifndef _LINUX_FB_H #define _LINUX_FB_H -#include <linux/refcount.h> -#include <linux/kgdb.h> #include <uapi/linux/fb.h> #define FBIO_CURSOR _IOWR('F', 0x08, struct fb_cursor_user) -#include <linux/fs.h> -#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/printk.h> +#include <linux/refcount.h> +#include <linux/types.h> #include <linux/workqueue.h> -#include <linux/notifier.h> -#include <linux/list.h> -#include <linux/backlight.h> -#include <linux/slab.h> #include <asm/fb.h> -struct vm_area_struct; -struct fb_info; +struct backlight_device; struct device; +struct device_node; +struct fb_info; struct file; +struct i2c_adapter; +struct inode; +struct module; +struct notifier_block; +struct page; struct videomode; -struct device_node; +struct vm_area_struct; /* Definitions below are used in the parsed monitor specs */ #define FB_DPMS_ACTIVE_OFF 1 @@ -143,9 +145,13 @@ struct fb_event { void *data; }; +/* Enough for the VT console needs, see its max_font_width/height */ +#define FB_MAX_BLIT_WIDTH 64 +#define FB_MAX_BLIT_HEIGHT 128 + struct fb_blit_caps { - u32 x; - u32 y; + DECLARE_BITMAP(x, FB_MAX_BLIT_WIDTH); + DECLARE_BITMAP(y, FB_MAX_BLIT_HEIGHT); u32 len; u32 flags; }; @@ -192,10 +198,12 @@ struct fb_pixmap { u32 scan_align; /* alignment per scanline */ u32 access_align; /* alignment per read/write (bits) */ u32 flags; /* see FB_PIXMAP_* */ - u32 blit_x; /* supported bit block dimensions (1-32)*/ - u32 blit_y; /* Format: blit_x = 1 << (width - 1) */ - /* blit_y = 1 << (height - 1) */ - /* if 0, will be set to 0xffffffff (all)*/ + /* supported bit block dimensions */ + /* Format: test_bit(width - 1, blit_x) */ + /* test_bit(height - 1, blit_y) */ + /* if zero, will be set to full (all) */ + DECLARE_BITMAP(blit_x, FB_MAX_BLIT_WIDTH); + DECLARE_BITMAP(blit_y, FB_MAX_BLIT_HEIGHT); /* access methods */ void (*writeio)(struct fb_info *info, void __iomem *dst, void *src, unsigned int size); void (*readio) (struct fb_info *info, void *dst, void __iomem *src, unsigned int size); @@ -383,7 +391,6 @@ struct fb_tile_ops { #endif /* CONFIG_FB_TILEBLITTING */ /* FBINFO_* = fb_info.flags bit flags */ -#define FBINFO_DEFAULT 0 #define FBINFO_HWACCEL_DISABLED 0x0002 /* When FBINFO_HWACCEL_DISABLED is set: * Hardware acceleration is turned off. Software implementations @@ -481,7 +488,9 @@ struct fb_info { const struct fb_ops *fbops; struct device *device; /* This is the parent */ +#if defined(CONFIG_FB_DEVICE) struct device *dev; /* This is this fb device */ +#endif int class_flag; /* private sysfs flags */ #ifdef CONFIG_FB_TILEBLITTING struct fb_tile_ops *tileops; /* Tile Blitting */ @@ -502,8 +511,6 @@ struct fb_info { bool skip_vt_switch; /* no VT switch on suspend/resume required */ }; -#define FBINFO_FLAG_DEFAULT FBINFO_DEFAULT - /* This will go away * fbset currently hacks in FB_ACCELF_TEXT into var.accel_flags * when it wants to turn the acceleration engine on. This is @@ -527,7 +534,7 @@ extern int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var); extern int fb_blank(struct fb_info *info, int blank); /* - * Drawing operations where framebuffer is in I/O memory + * Helpers for framebuffers in I/O memory */ extern void cfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); @@ -537,30 +544,27 @@ extern ssize_t fb_io_read(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos); extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos); +int fb_io_mmap(struct fb_info *info, struct vm_area_struct *vma); -/* - * Initializes struct fb_ops for framebuffers in I/O memory. - */ - -#define __FB_DEFAULT_IO_OPS_RDWR \ +#define __FB_DEFAULT_IOMEM_OPS_RDWR \ .fb_read = fb_io_read, \ .fb_write = fb_io_write -#define __FB_DEFAULT_IO_OPS_DRAW \ +#define __FB_DEFAULT_IOMEM_OPS_DRAW \ .fb_fillrect = cfb_fillrect, \ .fb_copyarea = cfb_copyarea, \ .fb_imageblit = cfb_imageblit -#define __FB_DEFAULT_IO_OPS_MMAP \ - .fb_mmap = NULL /* default implementation */ +#define __FB_DEFAULT_IOMEM_OPS_MMAP \ + .fb_mmap = fb_io_mmap -#define FB_DEFAULT_IO_OPS \ - __FB_DEFAULT_IO_OPS_RDWR, \ - __FB_DEFAULT_IO_OPS_DRAW, \ - __FB_DEFAULT_IO_OPS_MMAP +#define FB_DEFAULT_IOMEM_OPS \ + __FB_DEFAULT_IOMEM_OPS_RDWR, \ + __FB_DEFAULT_IOMEM_OPS_DRAW, \ + __FB_DEFAULT_IOMEM_OPS_MMAP /* - * Drawing operations where framebuffer is in system RAM + * Helpers for framebuffers in system memory */ extern void sys_fillrect(struct fb_info *info, const struct fb_fillrect *rect); @@ -571,32 +575,31 @@ extern ssize_t fb_sys_read(struct fb_info *info, char __user *buf, extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos); +#define __FB_DEFAULT_SYSMEM_OPS_RDWR \ + .fb_read = fb_sys_read, \ + .fb_write = fb_sys_write + +#define __FB_DEFAULT_SYSMEM_OPS_DRAW \ + .fb_fillrect = sys_fillrect, \ + .fb_copyarea = sys_copyarea, \ + .fb_imageblit = sys_imageblit + /* - * Initializes struct fb_ops for framebuffers in system memory. + * Helpers for framebuffers in DMA-able memory */ -#define __FB_DEFAULT_SYS_OPS_RDWR \ +#define __FB_DEFAULT_DMAMEM_OPS_RDWR \ .fb_read = fb_sys_read, \ .fb_write = fb_sys_write -#define __FB_DEFAULT_SYS_OPS_DRAW \ +#define __FB_DEFAULT_DMAMEM_OPS_DRAW \ .fb_fillrect = sys_fillrect, \ .fb_copyarea = sys_copyarea, \ .fb_imageblit = sys_imageblit -#define __FB_DEFAULT_SYS_OPS_MMAP \ - .fb_mmap = NULL /* default implementation */ - -#define FB_DEFAULT_SYS_OPS \ - __FB_DEFAULT_SYS_OPS_RDWR, \ - __FB_DEFAULT_SYS_OPS_DRAW, \ - __FB_DEFAULT_SYS_OPS_MMAP - -/* drivers/video/fbmem.c */ +/* fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); -extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); -extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); @@ -607,10 +610,6 @@ extern int fb_get_color_depth(struct fb_var_screeninfo *var, extern int fb_get_options(const char *name, char **option); extern int fb_new_modelist(struct fb_info *info); -extern bool fb_center_logo; -extern int fb_logo_count; -extern struct class *fb_class; - static inline void lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); @@ -636,7 +635,7 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, } } -/* drivers/video/fb_defio.c */ +/* fb_defio.c */ int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma); extern int fb_deferred_io_init(struct fb_info *info); extern void fb_deferred_io_open(struct fb_info *info, @@ -687,11 +686,11 @@ extern int fb_deferred_io_fsync(struct file *file, loff_t start, __damage_area(info, image->dx, image->dy, image->width, image->height); \ } -#define FB_GEN_DEFAULT_DEFERRED_IO_OPS(__prefix, __damage_range, __damage_area) \ +#define FB_GEN_DEFAULT_DEFERRED_IOMEM_OPS(__prefix, __damage_range, __damage_area) \ __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, io) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, cfb) -#define FB_GEN_DEFAULT_DEFERRED_SYS_OPS(__prefix, __damage_range, __damage_area) \ +#define FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(__prefix, __damage_range, __damage_area) \ __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys) @@ -735,14 +734,11 @@ static inline bool fb_be_math(struct fb_info *info) #endif /* CONFIG_FB_FOREIGN_ENDIAN */ } -/* drivers/video/fbsysfs.c */ extern struct fb_info *framebuffer_alloc(size_t size, struct device *dev); extern void framebuffer_release(struct fb_info *info); -extern int fb_init_device(struct fb_info *fb_info); -extern void fb_cleanup_device(struct fb_info *head); extern void fb_bl_default_curve(struct fb_info *fb_info, u8 off, u8 min, u8 max); -/* drivers/video/fbmon.c */ +/* fbmon.c */ #define FB_MAXTIMINGS 0 #define FB_VSYNCTIMINGS 1 #define FB_HSYNCTIMINGS 2 @@ -776,7 +772,7 @@ extern int of_get_fb_videomode(struct device_node *np, extern int fb_videomode_from_videomode(const struct videomode *vm, struct fb_videomode *fbmode); -/* drivers/video/modedb.c */ +/* modedb.c */ #define VESA_MODEDB_SIZE 43 #define DMT_SIZE 0x50 @@ -802,7 +798,7 @@ extern void fb_videomode_to_modelist(const struct fb_videomode *modedb, int num, extern const struct fb_videomode *fb_find_best_display(const struct fb_monspecs *specs, struct list_head *head); -/* drivers/video/fbcmap.c */ +/* fbcmap.c */ extern int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp); extern int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags); extern void fb_dealloc_cmap(struct fb_cmap *cmap); @@ -852,16 +848,12 @@ extern int fb_find_mode(struct fb_var_screeninfo *var, const struct fb_videomode *default_mode, unsigned int default_bpp); -#if defined(CONFIG_VIDEO_NOMODESET) bool fb_modesetting_disabled(const char *drvname); -#else -static inline bool fb_modesetting_disabled(const char *drvname) -{ - return false; -} -#endif -/* Convenience logging macros */ +/* + * Convenience logging macros + */ + #define fb_err(fb_info, fmt, ...) \ pr_err("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_notice(info, fmt, ...) \ @@ -873,4 +865,12 @@ static inline bool fb_modesetting_disabled(const char *drvname) #define fb_dbg(fb_info, fmt, ...) \ pr_debug("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) +#define fb_warn_once(fb_info, fmt, ...) \ + pr_warn_once("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) + +#define fb_WARN_ONCE(fb_info, condition, fmt, ...) \ + WARN_ONCE(condition, "fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) +#define fb_WARN_ON_ONCE(fb_info, x) \ + fb_WARN_ONCE(fb_info, (x), "%s", "fb_WARN_ON_ONCE(" __stringify(x) ")") + #endif /* _LINUX_FB_H */ diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index e066816f3519..78c8326d74ae 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -83,12 +83,17 @@ struct dentry; static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = rcu_dereference_raw(files->fdt); - - if (fd < fdt->max_fds) { - fd = array_index_nospec(fd, fdt->max_fds); - return rcu_dereference_raw(fdt->fd[fd]); - } - return NULL; + unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds); + struct file *needs_masking; + + /* + * 'mask' is zero for an out-of-bounds fd, all ones for ok. + * 'fd&mask' is 'fd' for ok, or 0 for out of bounds. + * + * Accessing fdt->fd[0] is ok, but needs masking of the result. + */ + needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]); + return (struct file *)(mask & (unsigned long)needs_masking); } static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd) @@ -98,20 +103,9 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un return files_lookup_fd_raw(files, fd); } -static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd) -{ - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "suspicious rcu_dereference_check() usage"); - return files_lookup_fd_raw(files, fd); -} - -static inline struct file *lookup_fd_rcu(unsigned int fd) -{ - return files_lookup_fd_rcu(current->files, fd); -} - -struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); -struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd); +struct file *lookup_fdget_rcu(unsigned int fd); +struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd); +struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd); struct task_struct; @@ -125,7 +119,7 @@ int iterate_fd(struct files_struct *, unsigned, extern int close_fd(unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -extern struct file *close_fd_get_file(unsigned int fd); +extern struct file *file_close_fd(unsigned int fd); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, struct files_struct **new_fdp); diff --git a/include/linux/file.h b/include/linux/file.h index 6e9099d29343..169692cb1906 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -24,6 +24,8 @@ struct inode; struct path; extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); +extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount *, + const char *, int flags, const struct file_operations *); extern struct file *alloc_file_clone(struct file *, int flags, const struct file_operations *); @@ -96,18 +98,8 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), extern void fd_install(unsigned int fd, struct file *file); -extern int __receive_fd(struct file *file, int __user *ufd, - unsigned int o_flags); - -extern int receive_fd(struct file *file, unsigned int o_flags); +int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags); -static inline int receive_fd_user(struct file *file, int __user *ufd, - unsigned int o_flags) -{ - if (ufd == NULL) - return -EFAULT; - return __receive_fd(file, ufd, o_flags); -} int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags); extern void flush_delayed_fput(void); diff --git a/include/linux/filelock.h b/include/linux/filelock.h index efcdd1631d9b..daee999d05f3 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -27,6 +27,7 @@ #define FILE_LOCK_DEFERRED 1 struct file_lock; +struct file_lease; struct file_lock_operations { void (*fl_copy_lock)(struct file_lock *, struct file_lock *); @@ -39,14 +40,17 @@ struct lock_manager_operations { void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); - bool (*lm_break)(struct file_lock *); - int (*lm_change)(struct file_lock *, int, struct list_head *); - void (*lm_setup)(struct file_lock *, void **); - bool (*lm_breaker_owns_lease)(struct file_lock *); bool (*lm_lock_expirable)(struct file_lock *cfl); void (*lm_expire_lock)(void); }; +struct lease_manager_operations { + bool (*lm_break)(struct file_lease *); + int (*lm_change)(struct file_lease *, int, struct list_head *); + void (*lm_setup)(struct file_lease *, void **); + bool (*lm_breaker_owns_lease)(struct file_lease *); +}; + struct lock_manager { struct list_head list; /* @@ -85,31 +89,31 @@ bool opens_in_grace(struct net *); * * Obviously, the last two criteria only matter for POSIX locks. */ -struct file_lock { - struct file_lock *fl_blocker; /* The lock, that is blocking us */ - struct list_head fl_list; /* link into file_lock_context */ - struct hlist_node fl_link; /* node in global lists */ - struct list_head fl_blocked_requests; /* list of requests with + +struct file_lock_core { + struct file_lock_core *flc_blocker; /* The lock that is blocking us */ + struct list_head flc_list; /* link into file_lock_context */ + struct hlist_node flc_link; /* node in global lists */ + struct list_head flc_blocked_requests; /* list of requests with * ->fl_blocker pointing here */ - struct list_head fl_blocked_member; /* node in + struct list_head flc_blocked_member; /* node in * ->fl_blocker->fl_blocked_requests */ - fl_owner_t fl_owner; - unsigned int fl_flags; - unsigned char fl_type; - unsigned int fl_pid; - int fl_link_cpu; /* what cpu's list is this on? */ - wait_queue_head_t fl_wait; - struct file *fl_file; + fl_owner_t flc_owner; + unsigned int flc_flags; + unsigned char flc_type; + pid_t flc_pid; + int flc_link_cpu; /* what cpu's list is this on? */ + wait_queue_head_t flc_wait; + struct file *flc_file; +}; + +struct file_lock { + struct file_lock_core c; loff_t fl_start; loff_t fl_end; - struct fasync_struct * fl_fasync; /* for lease break notifications */ - /* for lease breaks: */ - unsigned long fl_break_time; - unsigned long fl_downgrade_time; - const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ union { @@ -126,6 +130,15 @@ struct file_lock { } fl_u; } __randomize_layout; +struct file_lease { + struct file_lock_core c; + struct fasync_struct * fl_fasync; /* for lease break notifications */ + /* for lease breaks: */ + unsigned long fl_break_time; + unsigned long fl_downgrade_time; + const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */ +} __randomize_layout; + struct file_lock_context { spinlock_t flc_lock; struct list_head flc_flock; @@ -144,14 +157,34 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int, struct flock64 *); #endif -int fcntl_setlease(unsigned int fd, struct file *filp, long arg); +int fcntl_setlease(unsigned int fd, struct file *filp, int arg); int fcntl_getlease(struct file *filp); +static inline bool lock_is_unlock(struct file_lock *fl) +{ + return fl->c.flc_type == F_UNLCK; +} + +static inline bool lock_is_read(struct file_lock *fl) +{ + return fl->c.flc_type == F_RDLCK; +} + +static inline bool lock_is_write(struct file_lock *fl) +{ + return fl->c.flc_type == F_WRLCK; +} + +static inline void locks_wake_up(struct file_lock *fl) +{ + wake_up(&fl->c.flc_wait); +} + /* fs/locks.c */ void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); void locks_init_lock(struct file_lock *); -struct file_lock * locks_alloc_lock(void); +struct file_lock *locks_alloc_lock(void); void locks_copy_lock(struct file_lock *, struct file_lock *); void locks_copy_conflock(struct file_lock *, struct file_lock *); void locks_remove_posix(struct file *, fl_owner_t); @@ -165,11 +198,16 @@ int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_l int vfs_cancel_lock(struct file *filp, struct file_lock *fl); bool vfs_inode_has_locks(struct inode *inode); int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); + +void locks_init_lease(struct file_lease *); +void locks_free_lease(struct file_lease *fl); +struct file_lease *locks_alloc_lease(void); int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); void lease_get_mtime(struct inode *, struct timespec64 *time); -int generic_setlease(struct file *, long, struct file_lock **, void **priv); -int vfs_setlease(struct file *, long, struct file_lock **, void **); -int lease_modify(struct file_lock *, int, struct list_head *); +int generic_setlease(struct file *, int, struct file_lease **, void **priv); +int kernel_setlease(struct file *, int, struct file_lease **, void **); +int vfs_setlease(struct file *, int, struct file_lease **, void **); +int lease_modify(struct file_lease *, int, struct list_head *); struct notifier_block; int lease_register_notifier(struct notifier_block *); @@ -213,7 +251,7 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file, return -EACCES; } #endif -static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) +static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg) { return -EINVAL; } @@ -223,6 +261,25 @@ static inline int fcntl_getlease(struct file *filp) return F_UNLCK; } +static inline bool lock_is_unlock(struct file_lock *fl) +{ + return false; +} + +static inline bool lock_is_read(struct file_lock *fl) +{ + return false; +} + +static inline bool lock_is_write(struct file_lock *fl) +{ + return false; +} + +static inline void locks_wake_up(struct file_lock *fl) +{ +} + static inline void locks_free_lock_context(struct inode *inode) { @@ -233,6 +290,11 @@ static inline void locks_init_lock(struct file_lock *fl) return; } +static inline void locks_init_lease(struct file_lease *fl) +{ + return; +} + static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { return; @@ -306,19 +368,25 @@ static inline void lease_get_mtime(struct inode *inode, return; } -static inline int generic_setlease(struct file *filp, long arg, - struct file_lock **flp, void **priv) +static inline int generic_setlease(struct file *filp, int arg, + struct file_lease **flp, void **priv) +{ + return -EINVAL; +} + +static inline int kernel_setlease(struct file *filp, int arg, + struct file_lease **lease, void **priv) { return -EINVAL; } -static inline int vfs_setlease(struct file *filp, long arg, - struct file_lock **lease, void **priv) +static inline int vfs_setlease(struct file *filp, int arg, + struct file_lease **lease, void **priv) { return -EINVAL; } -static inline int lease_modify(struct file_lock *fl, int arg, +static inline int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) { return -EINVAL; @@ -341,6 +409,9 @@ locks_inode_context(const struct inode *inode) #endif /* !CONFIG_FILE_LOCKING */ +/* for walking lists of file_locks linked by fl_list */ +#define for_each_file_lock(_fl, _head) list_for_each_entry(_fl, _head, c.flc_list) + static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); diff --git a/include/linux/filter.h b/include/linux/filter.h index f69114083ec7..219ee7a76874 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -69,6 +69,12 @@ struct ctl_table_header; /* unused opcode to mark special load instruction. Same as BPF_ABS */ #define BPF_PROBE_MEM 0x20 +/* unused opcode to mark special ldsx instruction. Same as BPF_IND */ +#define BPF_PROBE_MEMSX 0x40 + +/* unused opcode to mark special load instruction. Same as BPF_MSH */ +#define BPF_PROBE_MEM32 0xa0 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 @@ -90,39 +96,49 @@ struct ctl_table_header; /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ -#define BPF_ALU64_REG(OP, DST, SRC) \ +#define BPF_ALU64_REG_OFF(OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ - .off = 0, \ + .off = OFF, \ .imm = 0 }) -#define BPF_ALU32_REG(OP, DST, SRC) \ +#define BPF_ALU64_REG(OP, DST, SRC) \ + BPF_ALU64_REG_OFF(OP, DST, SRC, 0) + +#define BPF_ALU32_REG_OFF(OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ - .off = 0, \ + .off = OFF, \ .imm = 0 }) +#define BPF_ALU32_REG(OP, DST, SRC) \ + BPF_ALU32_REG_OFF(OP, DST, SRC, 0) + /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ -#define BPF_ALU64_IMM(OP, DST, IMM) \ +#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU64_IMM(OP, DST, IMM) \ + BPF_ALU64_IMM_OFF(OP, DST, IMM, 0) -#define BPF_ALU32_IMM(OP, DST, IMM) \ +#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU32_IMM(OP, DST, IMM) \ + BPF_ALU32_IMM_OFF(OP, DST, IMM, 0) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ @@ -134,6 +150,16 @@ struct ctl_table_header; .off = 0, \ .imm = LEN }) +/* Byte Swap, bswap16/32/64 */ + +#define BPF_BSWAP(DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ @@ -170,6 +196,24 @@ struct ctl_table_header; .off = 0, \ .imm = IMM }) +/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */ + +#define BPF_MOVSX64_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_MOVSX32_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Special form of mov32, used for doing explicit zero extension on dst. */ #define BPF_ZEXT_REG(DST) \ ((struct bpf_insn) { \ @@ -254,6 +298,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) +/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */ + +#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ @@ -496,24 +550,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) -#define BPF_CALL_x(x, name, ...) \ +#define BPF_CALL_x(x, attr, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) -#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) -#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) -#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) -#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) -#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define __NOATTR +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__) + +#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 @@ -685,7 +742,7 @@ static inline void bpf_compute_and_save_data_end( cb->data_end = skb->data + skb_headlen(skb); } -/* Restore data saved by bpf_compute_data_pointers(). */ +/* Restore data saved by bpf_compute_and_save_data_end(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { @@ -765,23 +822,6 @@ DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); u32 xdp_master_redirect(struct xdp_buff *xdp); -static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, - struct xdp_buff *xdp) -{ - /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus - * under local_bh_disable(), which provides the needed RCU protection - * for accessing map entries. - */ - u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); - - if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { - if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) - act = xdp_master_redirect(xdp); - } - - return act; -} - void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) @@ -920,6 +960,11 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +bool bpf_jit_supports_exceptions(void); +bool bpf_jit_supports_ptr_xchg(void); +bool bpf_jit_supports_arena(void); +u64 bpf_arch_uaddress_limit(void); +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) @@ -989,12 +1034,6 @@ int xdp_do_redirect_frame(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush(void); -/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as - * it is no longer only flushing maps. Keep this define for compatibility - * until all drivers are updated - do not use xdp_do_flush_map() in new code! - */ -#define xdp_do_flush_map xdp_do_flush - void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET @@ -1037,7 +1076,7 @@ struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); -void bpf_prog_pack_free(struct bpf_binary_header *hdr); +void bpf_prog_pack_free(void *ptr, u32 size); static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { @@ -1109,7 +1148,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && bpf_capable()) + if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF)) return false; return true; @@ -1135,6 +1174,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); +struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, @@ -1202,6 +1242,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } +static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +{ + return NULL; +} + static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) @@ -1293,6 +1338,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { @@ -1580,10 +1626,9 @@ static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) return NULL; } -static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, - unsigned long len, bool flush) +static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, + unsigned long len, bool flush) { - return NULL; } #endif /* CONFIG_NET */ diff --git a/include/linux/find.h b/include/linux/find.h index 5e4f39ef2e72..c69598e383c1 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -405,7 +405,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, { unsigned long bit = find_next_and_bit(addr1, addr2, size, offset); - if (bit < size) + if (bit < size || offset == 0) return bit; bit = find_first_and_bit(addr1, addr2, offset); @@ -413,8 +413,8 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, } /** - * find_next_bit_wrap - find the next set bit in both memory regions - * @addr: The first address to base the search on + * find_next_bit_wrap - find the next set bit in a memory region + * @addr: The address to base the search on * @size: The bitmap size in bits * @offset: The bitnumber to start searching at * @@ -427,7 +427,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr, { unsigned long bit = find_next_bit(addr, size, offset); - if (bit < size) + if (bit < size || offset == 0) return bit; bit = find_first_bit(addr, offset); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index bd3fc75d4f14..dd9f2d765e68 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -75,7 +75,7 @@ void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p); int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value); int fw_csr_string(const u32 *directory, int key, char *buf, size_t size); -extern struct bus_type fw_bus_type; +extern const struct bus_type fw_bus_type; struct fw_card_driver; struct fw_node; diff --git a/include/linux/firmware.h b/include/linux/firmware.h index de7fea3bca51..f026f8926d79 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/compiler.h> +#include <linux/cleanup.h> #include <linux/gfp.h> #define FW_ACTION_NOUEVENT 0 @@ -27,6 +28,7 @@ struct firmware { * @FW_UPLOAD_ERR_INVALID_SIZE: invalid firmware image size * @FW_UPLOAD_ERR_RW_ERROR: read or write to HW failed, see kernel log * @FW_UPLOAD_ERR_WEAROUT: FLASH device is approaching wear-out, wait & retry + * @FW_UPLOAD_ERR_FW_INVALID: invalid firmware file * @FW_UPLOAD_ERR_MAX: Maximum error code marker */ enum fw_upload_err { @@ -38,6 +40,7 @@ enum fw_upload_err { FW_UPLOAD_ERR_INVALID_SIZE, FW_UPLOAD_ERR_RW_ERROR, FW_UPLOAD_ERR_WEAROUT, + FW_UPLOAD_ERR_FW_INVALID, FW_UPLOAD_ERR_MAX }; @@ -196,4 +199,6 @@ static inline void firmware_upload_unregister(struct fw_upload *fw_upload) int firmware_request_cache(struct device *device, const char *name); +DEFINE_FREE(firmware, struct firmware *, release_firmware(_T)) + #endif diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 29cd11d5a3cf..23384a54d575 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -123,7 +123,6 @@ struct cs_dsp_client_ops; * @sysclk_mask: Mask of frequency bits within sysclk register (ADSP1 only) * @sysclk_shift: Shift of frequency bits within sysclk register (ADSP1 only) * @alg_regions: List of currently loaded algorithm regions - * @fw_file_name: Filename of the current firmware * @fw_name: Name of the current firmware * @fw_id: ID of the current firmware, obtained from the wmfw * @fw_id_version: Version of the firmware, obtained from the wmfw diff --git a/include/linux/firmware/imx/dsp.h b/include/linux/firmware/imx/dsp.h index 4f7895a3b73c..1f176a2683fe 100644 --- a/include/linux/firmware/imx/dsp.h +++ b/include/linux/firmware/imx/dsp.h @@ -37,17 +37,11 @@ struct imx_dsp_ipc { static inline void imx_dsp_set_data(struct imx_dsp_ipc *ipc, void *data) { - if (!ipc) - return; - ipc->private_data = data; } static inline void *imx_dsp_get_data(struct imx_dsp_ipc *ipc) { - if (!ipc) - return NULL; - return ipc->private_data; } diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h index 5cc63fe7e84d..df17196df5ff 100644 --- a/include/linux/firmware/imx/sci.h +++ b/include/linux/firmware/imx/sci.h @@ -21,31 +21,37 @@ int imx_scu_enable_general_irq_channel(struct device *dev); int imx_scu_irq_register_notifier(struct notifier_block *nb); int imx_scu_irq_unregister_notifier(struct notifier_block *nb); int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable); +int imx_scu_irq_get_status(u8 group, u32 *irq_status); int imx_scu_soc_init(struct device *dev); #else static inline int imx_scu_soc_init(struct device *dev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int imx_scu_enable_general_irq_channel(struct device *dev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int imx_scu_irq_register_notifier(struct notifier_block *nb) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int imx_scu_irq_unregister_notifier(struct notifier_block *nb) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable) { - return -ENOTSUPP; + return -EOPNOTSUPP; +} + +static inline int imx_scu_irq_get_status(u8 group, u32 *irq_status) +{ + return -EOPNOTSUPP; } #endif #endif /* _SC_SCI_H */ diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index a718f853d457..ee80ca4bb0d0 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -467,6 +467,31 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FIRMWARE_VERSION) /** + * SMC call protocol for Mailbox, starting FUNCID from 60 + * + * Call register usage: + * a0 INTEL_SIP_SMC_MBOX_SEND_CMD + * a1 mailbox command code + * a2 physical address that contain mailbox command data (not include header) + * a3 mailbox command data size in word + * a4 set to 0 for CASUAL, set to 1 for URGENT + * a5 physical address for secure firmware to put response data + * (not include header) + * a6 maximum size in word of physical address to store response data + * a7 not used + * + * Return status + * a0 INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_STATUS_REJECTED or + * INTEL_SIP_SMC_STATUS_ERROR + * a1 mailbox error code + * a2 response data length in word + * a3 not used + */ +#define INTEL_SIP_SMC_FUNCID_MBOX_SEND_CMD 60 + #define INTEL_SIP_SMC_MBOX_SEND_CMD \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_MBOX_SEND_CMD) + +/** * Request INTEL_SIP_SMC_SVC_VERSION * * Sync call used to query the SIP SMC API Version diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index 0c16037fd08d..60ed82112680 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -118,6 +118,9 @@ struct stratix10_svc_chan; * @COMMAND_SMC_SVC_VERSION: Non-mailbox SMC SVC API Version, * return status is SVC_STATUS_OK * + * @COMMAND_MBOX_SEND_CMD: send generic mailbox command, return status is + * SVC_STATUS_OK or SVC_STATUS_ERROR + * * @COMMAND_RSU_DCMF_STATUS: query firmware for the DCMF status * return status is SVC_STATUS_OK or SVC_STATUS_ERROR * @@ -164,6 +167,8 @@ enum stratix10_svc_command_code { COMMAND_FCS_RANDOM_NUMBER_GEN, /* for general status poll */ COMMAND_POLL_SERVICE_STATUS = 40, + /* for generic mailbox send command */ + COMMAND_MBOX_SEND_CMD = 100, /* Non-mailbox SMC Call */ COMMAND_SMC_SVC_VERSION = 200, }; diff --git a/include/linux/firmware/mediatek/mtk-adsp-ipc.h b/include/linux/firmware/mediatek/mtk-adsp-ipc.h index 28fd313340b8..5b1d16fa3f56 100644 --- a/include/linux/firmware/mediatek/mtk-adsp-ipc.h +++ b/include/linux/firmware/mediatek/mtk-adsp-ipc.h @@ -46,17 +46,11 @@ struct mtk_adsp_ipc { static inline void mtk_adsp_ipc_set_data(struct mtk_adsp_ipc *ipc, void *data) { - if (!ipc) - return; - ipc->private_data = data; } static inline void *mtk_adsp_ipc_get_data(struct mtk_adsp_ipc *ipc) { - if (!ipc) - return NULL; - return ipc->private_data; } diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 95b0da2326a9..8eaf8922ab02 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -19,7 +19,7 @@ enum { struct meson_sm_firmware; int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index, - u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); + s32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer, unsigned int b_size, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h new file mode 100644 index 000000000000..366243ee9609 --- /dev/null +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Qualcomm Secure Execution Environment (SEE) interface (QSEECOM). + * Responsible for setting up and managing QSEECOM client devices. + * + * Copyright (C) 2023 Maximilian Luz <luzmaximilian@gmail.com> + */ + +#ifndef __QCOM_QSEECOM_H +#define __QCOM_QSEECOM_H + +#include <linux/auxiliary_bus.h> +#include <linux/dma-mapping.h> +#include <linux/types.h> + +#include <linux/firmware/qcom/qcom_scm.h> + +/** + * struct qseecom_client - QSEECOM client device. + * @aux_dev: Underlying auxiliary device. + * @app_id: ID of the loaded application. + */ +struct qseecom_client { + struct auxiliary_device aux_dev; + u32 app_id; +}; + +/** + * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client. + * @client: The QSEECOM client device. + * + * Returns the SCM device under which the provided QSEECOM client device + * operates. This function is intended to be used for DMA allocations. + */ +static inline struct device *qseecom_scm_dev(struct qseecom_client *client) +{ + return client->aux_dev.dev.parent->parent; +} + +/** + * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client. + * @client: The QSEECOM client to allocate the memory for. + * @size: The number of bytes to allocate. + * @dma_handle: Pointer to where the DMA address should be stored. + * @gfp: Allocation flags. + * + * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for + * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details. + */ +static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp); +} + +/** + * dma_free_coherent() - Free QSEECOM DMA memory. + * @client: The QSEECOM client for which the memory has been allocated. + * @size: The number of bytes allocated. + * @cpu_addr: Virtual memory address to free. + * @dma_handle: DMA memory address to free. + * + * Wrapper function for dma_free_coherent(), freeing memory previously + * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for + * details. + */ +static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle); +} + +/** + * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. + * @client: The QSEECOM client associated with the target app. + * @req: DMA address of the request buffer sent to the app. + * @req_size: Size of the request buffer. + * @rsp: DMA address of the response buffer, written to by the app. + * @rsp_size: Size of the response buffer. + * + * Sends a request to the QSEE app associated with the given client and read + * back its response. The caller must provide two DMA memory regions, one for + * the request and one for the response, and fill out the @req region with the + * respective (app-specific) request data. The QSEE app reads this and returns + * its response in the @rsp region. + * + * Note: This is a convenience wrapper around qcom_scm_qseecom_app_send(). + * Clients should prefer to use this wrapper. + * + * Return: Zero on success, nonzero on failure. + */ +static inline int qcom_qseecom_app_send(struct qseecom_client *client, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) +{ + return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); +} + +#endif /* __QCOM_QSEECOM_H */ diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 250ea4efb7cb..aaa19f93ac43 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -59,12 +59,12 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -extern bool qcom_scm_is_available(void); +bool qcom_scm_is_available(void); -extern int qcom_scm_set_cold_boot_addr(void *entry); -extern int qcom_scm_set_warm_boot_addr(void *entry); -extern void qcom_scm_cpu_power_down(u32 flags); -extern int qcom_scm_set_remote_state(u32 state, u32 id); +int qcom_scm_set_cold_boot_addr(void *entry); +int qcom_scm_set_warm_boot_addr(void *entry); +void qcom_scm_cpu_power_down(u32 flags); +int qcom_scm_set_remote_state(u32 state, u32 id); struct qcom_scm_pas_metadata { void *ptr; @@ -72,54 +72,69 @@ struct qcom_scm_pas_metadata { ssize_t size; }; -extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size, - struct qcom_scm_pas_metadata *ctx); +int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size, + struct qcom_scm_pas_metadata *ctx); void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); -extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size); -extern int qcom_scm_pas_auth_and_reset(u32 peripheral); -extern int qcom_scm_pas_shutdown(u32 peripheral); -extern bool qcom_scm_pas_supported(u32 peripheral); - -extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); -extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); - -extern bool qcom_scm_restore_sec_cfg_available(void); -extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); -extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); -extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); -extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); -extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size); -extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - u64 *src, - const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt); - -extern bool qcom_scm_ocmem_lock_available(void); -extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode); -extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size); - -extern bool qcom_scm_ice_available(void); -extern int qcom_scm_ice_invalidate_key(u32 index); -extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size); - -extern bool qcom_scm_hdcp_available(void); -extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp); - -extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); -extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); - -extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version); -extern int qcom_scm_lmh_profile_change(u32 profile_id); -extern bool qcom_scm_lmh_dcvsh_available(void); +int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); +int qcom_scm_pas_auth_and_reset(u32 peripheral); +int qcom_scm_pas_shutdown(u32 peripheral); +bool qcom_scm_pas_supported(u32 peripheral); + +int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); +int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); + +bool qcom_scm_restore_sec_cfg_available(void); +int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); +int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, u32 cp_nonpixel_size); +int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, u64 *src, + const struct qcom_scm_vmperm *newvm, + unsigned int dest_cnt); + +bool qcom_scm_ocmem_lock_available(void); +int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size, + u32 mode); +int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, u32 size); + +bool qcom_scm_ice_available(void); +int qcom_scm_ice_invalidate_key(u32 index); +int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, + enum qcom_scm_ice_cipher cipher, u32 data_unit_size); + +bool qcom_scm_hdcp_available(void); +int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); + +int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); +int qcom_scm_qsmmu500_wait_safe_toggle(bool en); + +int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version); +int qcom_scm_lmh_profile_change(u32 profile_id); +bool qcom_scm_lmh_dcvsh_available(void); + +#ifdef CONFIG_QCOM_QSEECOM + +int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size); + +#else /* CONFIG_QCOM_QSEECOM */ + +static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) +{ + return -EINVAL; +} + +static inline int qcom_scm_qseecom_app_send(u32 app_id, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) +{ + return -EINVAL; +} + +#endif /* CONFIG_QCOM_QSEECOM */ #endif diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 9dda7d9898ff..1a069a56c961 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -3,6 +3,7 @@ * Xilinx Zynq MPSoC Firmware layer * * Copyright (C) 2014-2021 Xilinx + * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. * * Michal Simek <michal.simek@amd.com> * Davorin Mista <davorin.mista@aggios.com> @@ -32,8 +33,25 @@ #define PM_SIP_SVC 0xC2000000 /* PM API versions */ +#define PM_API_VERSION_1 1 #define PM_API_VERSION_2 2 +#define PM_PINCTRL_PARAM_SET_VERSION 2 + +#define ZYNQMP_FAMILY_CODE 0x23 +#define VERSAL_FAMILY_CODE 0x26 + +/* When all subfamily of platform need to support */ +#define ALL_SUB_FAMILY_CODE 0x00 +#define VERSAL_SUB_FAMILY_CODE 0x01 +#define VERSALNET_SUB_FAMILY_CODE 0x03 + +#define FAMILY_CODE_MASK GENMASK(27, 21) +#define SUB_FAMILY_CODE_MASK GENMASK(20, 19) + +#define API_ID_MASK GENMASK(7, 0) +#define MODULE_ID_MASK GENMASK(11, 8) + /* ATF only commands */ #define TF_A_PM_REGISTER_SGI 0xa04 #define PM_GET_TRUSTZONE_VERSION 0xa03 @@ -78,15 +96,41 @@ /* * Node IDs for the Error Events. */ -#define EVENT_ERROR_PMC_ERR1 (0x28100000U) -#define EVENT_ERROR_PMC_ERR2 (0x28104000U) -#define EVENT_ERROR_PSM_ERR1 (0x28108000U) -#define EVENT_ERROR_PSM_ERR2 (0x2810C000U) +#define VERSAL_EVENT_ERROR_PMC_ERR1 (0x28100000U) +#define VERSAL_EVENT_ERROR_PMC_ERR2 (0x28104000U) +#define VERSAL_EVENT_ERROR_PSM_ERR1 (0x28108000U) +#define VERSAL_EVENT_ERROR_PSM_ERR2 (0x2810C000U) + +#define VERSAL_NET_EVENT_ERROR_PMC_ERR1 (0x28100000U) +#define VERSAL_NET_EVENT_ERROR_PMC_ERR2 (0x28104000U) +#define VERSAL_NET_EVENT_ERROR_PMC_ERR3 (0x28108000U) +#define VERSAL_NET_EVENT_ERROR_PSM_ERR1 (0x2810C000U) +#define VERSAL_NET_EVENT_ERROR_PSM_ERR2 (0x28110000U) +#define VERSAL_NET_EVENT_ERROR_PSM_ERR3 (0x28114000U) +#define VERSAL_NET_EVENT_ERROR_PSM_ERR4 (0x28118000U) /* ZynqMP SD tap delay tuning */ #define SD_ITAPDLY 0xFF180314 #define SD_OTAPDLYSEL 0xFF180318 +/** + * XPM_EVENT_ERROR_MASK_DDRMC_CR: Error event mask for DDRMC MC Correctable ECC Error. + */ +#define XPM_EVENT_ERROR_MASK_DDRMC_CR BIT(18) + +/** + * XPM_EVENT_ERROR_MASK_DDRMC_NCR: Error event mask for DDRMC MC Non-Correctable ECC Error. + */ +#define XPM_EVENT_ERROR_MASK_DDRMC_NCR BIT(19) +#define XPM_EVENT_ERROR_MASK_NOC_NCR BIT(13) +#define XPM_EVENT_ERROR_MASK_NOC_CR BIT(12) + +enum pm_module_id { + PM_MODULE_ID = 0x0, + XSEM_MODULE_ID = 0x3, + TF_A_MODULE_ID = 0xa, +}; + enum pm_api_cb_id { PM_INIT_SUSPEND_CB = 30, PM_ACKNOWLEDGE_CB = 31, @@ -94,6 +138,7 @@ enum pm_api_cb_id { }; enum pm_api_id { + PM_API_FEATURES = 0, PM_GET_API_VERSION = 1, PM_REGISTER_NOTIFIER = 5, PM_FORCE_POWERDOWN = 8, @@ -113,7 +158,6 @@ enum pm_api_id { PM_SECURE_SHA = 26, PM_PINCTRL_REQUEST = 28, PM_PINCTRL_RELEASE = 29, - PM_PINCTRL_GET_FUNCTION = 30, PM_PINCTRL_SET_FUNCTION = 31, PM_PINCTRL_CONFIG_PARAM_GET = 32, PM_PINCTRL_CONFIG_PARAM_SET = 33, @@ -124,19 +168,20 @@ enum pm_api_id { PM_CLOCK_GETSTATE = 38, PM_CLOCK_SETDIVIDER = 39, PM_CLOCK_GETDIVIDER = 40, - PM_CLOCK_SETRATE = 41, - PM_CLOCK_GETRATE = 42, PM_CLOCK_SETPARENT = 43, PM_CLOCK_GETPARENT = 44, PM_FPGA_READ = 46, PM_SECURE_AES = 47, + PM_EFUSE_ACCESS = 53, PM_FEATURE_CHECK = 63, }; /* PMU-FW return status codes */ enum pm_ret_status { XST_PM_SUCCESS = 0, + XST_PM_INVALID_VERSION = 4, XST_PM_NO_FEATURE = 19, + XST_PM_INVALID_CRC = 301, XST_PM_INTERNAL = 2000, XST_PM_CONFLICT = 2001, XST_PM_NO_ACCESS = 2002, @@ -484,20 +529,18 @@ struct zynqmp_pm_query_data { u32 arg3; }; -int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, - u32 arg2, u32 arg3, u32 *ret_payload); +int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) int zynqmp_pm_get_api_version(u32 *version); int zynqmp_pm_get_chipid(u32 *idcode, u32 *version); +int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily); int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out); int zynqmp_pm_clock_enable(u32 clock_id); int zynqmp_pm_clock_disable(u32 clock_id); int zynqmp_pm_clock_getstate(u32 clock_id, u32 *state); int zynqmp_pm_clock_setdivider(u32 clock_id, u32 divider); int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider); -int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate); -int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate); int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id); int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id); int zynqmp_pm_set_pll_frac_mode(u32 clk_id, u32 mode); @@ -521,6 +564,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); int zynqmp_pm_aes_engine(const u64 address, u32 *out); +int zynqmp_pm_efuse_access(const u64 address, u32 *out); int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_get_status(u32 *value); @@ -534,7 +578,6 @@ int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype); int zynqmp_pm_set_boot_health_status(u32 value); int zynqmp_pm_pinctrl_request(const u32 pin); int zynqmp_pm_pinctrl_release(const u32 pin); -int zynqmp_pm_pinctrl_get_function(const u32 pin, u32 *id); int zynqmp_pm_pinctrl_set_function(const u32 pin, const u32 id); int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param, u32 *value); @@ -571,6 +614,11 @@ static inline int zynqmp_pm_get_chipid(u32 *idcode, u32 *version) return -ENODEV; } +static inline int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily) +{ + return -ENODEV; +} + static inline int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out) { @@ -602,16 +650,6 @@ static inline int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider) return -ENODEV; } -static inline int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate) -{ - return -ENODEV; -} - -static inline int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate) -{ - return -ENODEV; -} - static inline int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id) { return -ENODEV; @@ -714,6 +752,11 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) return -ENODEV; } +static inline int zynqmp_pm_efuse_access(const u64 address, u32 *out) +{ + return -ENODEV; +} + static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags) { @@ -781,11 +824,6 @@ static inline int zynqmp_pm_pinctrl_release(const u32 pin) return -ENODEV; } -static inline int zynqmp_pm_pinctrl_get_function(const u32 pin, u32 *id) -{ - return -ENODEV; -} - static inline int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id) { return -ENODEV; diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index 3e378b1fb0bc..e9a72fd0bfe7 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -39,38 +39,6 @@ void fprop_global_destroy(struct fprop_global *p); bool fprop_new_period(struct fprop_global *p, int periods); /* - * ---- SINGLE ---- - */ -struct fprop_local_single { - /* the local events counter */ - unsigned long events; - /* Period in which we last updated events */ - unsigned int period; - raw_spinlock_t lock; /* Protect period and numerator */ -}; - -#define INIT_FPROP_LOCAL_SINGLE(name) \ -{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ -} - -int fprop_local_init_single(struct fprop_local_single *pl); -void fprop_local_destroy_single(struct fprop_local_single *pl); -void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl); -void fprop_fraction_single(struct fprop_global *p, - struct fprop_local_single *pl, unsigned long *numerator, - unsigned long *denominator); - -static inline -void fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl) -{ - unsigned long flags; - - local_irq_save(flags); - __fprop_inc_single(p, pl); - local_irq_restore(flags); -} - -/* * ---- PERCPU ---- */ struct fprop_local_percpu { diff --git a/include/linux/font.h b/include/linux/font.h index abf1442ce719..81caffd51bb4 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -57,7 +57,8 @@ extern const struct font_desc *find_font(const char *name); /* Get the default font for a specific screen size */ extern const struct font_desc *get_default_font(int xres, int yres, - u32 font_w, u32 font_h); + unsigned long *font_w, + unsigned long *font_h); /* Max. length for the name of a predefined font */ #define MAX_FONT_NAME 32 diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index da51a83b2829..6aeebe0a6777 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,6 +2,7 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#include <linux/bitfield.h> #include <linux/bug.h> #include <linux/const.h> #include <linux/limits.h> @@ -9,7 +10,46 @@ #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) -void fortify_panic(const char *name) __noreturn __cold; +#define FORTIFY_REASON_DIR(r) FIELD_GET(BIT(0), r) +#define FORTIFY_REASON_FUNC(r) FIELD_GET(GENMASK(7, 1), r) +#define FORTIFY_REASON(func, write) (FIELD_PREP(BIT(0), write) | \ + FIELD_PREP(GENMASK(7, 1), func)) + +#ifndef fortify_panic +# define fortify_panic(func, write, avail, size, retfail) \ + __fortify_panic(FORTIFY_REASON(func, write), avail, size) +#endif + +#define FORTIFY_READ 0 +#define FORTIFY_WRITE 1 + +#define EACH_FORTIFY_FUNC(macro) \ + macro(strncpy), \ + macro(strnlen), \ + macro(strlen), \ + macro(strscpy), \ + macro(strlcat), \ + macro(strcat), \ + macro(strncat), \ + macro(memset), \ + macro(memcpy), \ + macro(memmove), \ + macro(memscan), \ + macro(memcmp), \ + macro(memchr), \ + macro(memchr_inv), \ + macro(kmemdup), \ + macro(strcpy), \ + macro(UNKNOWN), + +#define MAKE_FORTIFY_FUNC(func) FORTIFY_FUNC_##func + +enum fortify_func { + EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC) +}; + +void __fortify_report(const u8 reason, const size_t avail, const size_t size); +void __fortify_panic(const u8 reason, const size_t avail, const size_t size) __cold __noreturn; void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)"); void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?"); @@ -93,13 +133,9 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #if __has_builtin(__builtin_dynamic_object_size) #define POS __pass_dynamic_object_size(1) #define POS0 __pass_dynamic_object_size(0) -#define __struct_size(p) __builtin_dynamic_object_size(p, 0) -#define __member_size(p) __builtin_dynamic_object_size(p, 1) #else #define POS __pass_object_size(1) #define POS0 __pass_object_size(0) -#define __struct_size(p) __builtin_object_size(p, 0) -#define __member_size(p) __builtin_object_size(p, 1) #endif #define __compiletime_lessthan(bounds, length) ( \ @@ -147,7 +183,7 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __write_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strncpy, FORTIFY_WRITE, p_size, size, p); return __underlying_strncpy(p, q, size); } @@ -178,7 +214,7 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size /* Do not check characters beyond the end of p. */ ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strnlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } @@ -214,82 +250,13 @@ __kernel_size_t __fortify_strlen(const char * const POS p) return __underlying_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } -/* Defined after fortified strlen() to reuse it. */ -extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); -/** - * strlcpy - Copy a string into another string buffer - * - * @p: pointer to destination of copy - * @q: pointer to NUL-terminated source string to copy - * @size: maximum number of bytes to write at @p - * - * If strlen(@q) >= @size, the copy of @q will be truncated at - * @size - 1 bytes. @p will always be NUL-terminated. - * - * Do not use this function. While FORTIFY_SOURCE tries to avoid - * over-reads when calculating strlen(@q), it is still possible. - * Prefer strscpy(), though note its different return values for - * detecting truncation. - * - * Returns total number of bytes written to @p, including terminating NUL. - * - */ -__FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) -{ - const size_t p_size = __member_size(p); - const size_t q_size = __member_size(q); - size_t q_len; /* Full count of source string length. */ - size_t len; /* Count of characters going into destination. */ - - if (p_size == SIZE_MAX && q_size == SIZE_MAX) - return __real_strlcpy(p, q, size); - q_len = strlen(q); - len = (q_len >= size) ? size - 1 : q_len; - if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) { - /* Write size is always larger than destination. */ - if (len >= p_size) - __write_overflow(); - } - if (size) { - if (len >= p_size) - fortify_panic(__func__); - __underlying_memcpy(p, q, len); - p[len] = '\0'; - } - return q_len; -} - /* Defined after fortified strnlen() to reuse it. */ -extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); -/** - * strscpy - Copy a C-string into a sized buffer - * - * @p: Where to copy the string to - * @q: Where to copy the string from - * @size: Size of destination buffer - * - * Copy the source string @q, or as much of it as fits, into the destination - * @p buffer. The behavior is undefined if the string buffers overlap. The - * destination @p buffer is always NUL terminated, unless it's zero-sized. - * - * Preferred to strlcpy() since the API doesn't require reading memory - * from the source @q string beyond the specified @size bytes, and since - * the return value is easier to error-check than strlcpy()'s. - * In addition, the implementation is robust to the string changing out - * from underneath it, unlike the current strlcpy() implementation. - * - * Preferred to strncpy() since it always returns a valid string, and - * doesn't unnecessarily force the tail of the destination buffer to be - * zero padded. If padding is desired please use strscpy_pad(). - * - * Returns the number of characters copied in @p (not including the - * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated. - */ -__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size) +extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy); +__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size) { /* Use string size rather than possible enclosing struct size. */ const size_t p_size = __member_size(p); @@ -333,8 +300,8 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s * Generate a runtime write overflow error if len is greater than * p_size. */ - if (len > p_size) - fortify_panic(__func__); + if (p_size < len) + fortify_panic(FORTIFY_FUNC_strscpy, FORTIFY_WRITE, p_size, len, -E2BIG); /* * We can now safely call vanilla strscpy because we are protected from: @@ -392,7 +359,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail) /* Give up if string is already overflowed. */ if (p_size <= p_len) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_READ, p_size, p_len + 1, wanted); if (actual >= avail) { copy_len = avail - p_len - 1; @@ -401,7 +368,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail) /* Give up if copy will overflow. */ if (p_size <= actual) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_WRITE, p_size, actual + 1, wanted); __underlying_memcpy(p + p_len, q, copy_len); p[actual] = '\0'; @@ -428,9 +395,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { const size_t p_size = __member_size(p); + const size_t wanted = strlcat(p, q, p_size); - if (strlcat(p, q, p_size) >= p_size) - fortify_panic(__func__); + if (p_size <= wanted) + fortify_panic(FORTIFY_FUNC_strcat, FORTIFY_WRITE, p_size, wanted + 1, p); return p; } @@ -459,20 +427,21 @@ char *strncat(char * const POS p, const char * const POS q, __kernel_size_t coun { const size_t p_size = __member_size(p); const size_t q_size = __member_size(q); - size_t p_len, copy_len; + size_t p_len, copy_len, total; if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strncat(p, q, count); p_len = strlen(p); copy_len = strnlen(q, count); - if (p_size < p_len + copy_len + 1) - fortify_panic(__func__); + total = p_len + copy_len + 1; + if (p_size < total) + fortify_panic(FORTIFY_FUNC_strncat, FORTIFY_WRITE, p_size, total, p); __underlying_memcpy(p + p_len, q, copy_len); p[p_len + copy_len] = '\0'; return p; } -__FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, +__FORTIFY_INLINE bool fortify_memset_chk(__kernel_size_t size, const size_t p_size, const size_t p_size_field) { @@ -507,7 +476,8 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, * lengths are unknown.) */ if (p_size != SIZE_MAX && p_size < size) - fortify_panic("memset"); + fortify_panic(FORTIFY_FUNC_memset, FORTIFY_WRITE, p_size, size, true); + return false; } #define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({ \ @@ -561,7 +531,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t q_size, const size_t p_size_field, const size_t q_size_field, - const char *func) + const u8 func) { if (__builtin_constant_p(size)) { /* @@ -605,9 +575,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if ((p_size != SIZE_MAX && p_size < size) || - (q_size != SIZE_MAX && q_size < size)) - fortify_panic(func); + if (p_size != SIZE_MAX && p_size < size) + fortify_panic(func, FORTIFY_WRITE, p_size, size, true); + else if (q_size != SIZE_MAX && q_size < size) + fortify_panic(func, FORTIFY_READ, p_size, size, true); /* * Warn when writing beyond destination field size. @@ -640,10 +611,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t __q_size_field = (q_size_field); \ WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \ __q_size, __p_size_field, \ - __q_size_field, #op), \ + __q_size_field, FORTIFY_FUNC_ ##op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ - "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ + "field \"" #p "\" at " FILE_LINE, \ __p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) @@ -707,7 +678,7 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memscan, FORTIFY_READ, p_size, size, NULL); return __real_memscan(p, c, size); } @@ -723,8 +694,10 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t if (__compiletime_lessthan(q_size, size)) __read_overflow2(); } - if (p_size < size || q_size < size) - fortify_panic(__func__); + if (p_size < size) + fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, p_size, size, INT_MIN); + else if (q_size < size) + fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, q_size, size, INT_MIN); return __underlying_memcmp(p, q, size); } @@ -736,7 +709,7 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memchr, FORTIFY_READ, p_size, size, NULL); return __underlying_memchr(p, c, size); } @@ -748,7 +721,7 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memchr_inv, FORTIFY_READ, p_size, size, NULL); return __real_memchr_inv(p, c, size); } @@ -761,7 +734,7 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, NULL); return __real_kmemdup(p, size, gfp); } @@ -798,7 +771,7 @@ char *strcpy(char * const POS p, const char * const POS q) __write_overflow(); /* Run-time check for dynamic size overflow. */ if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strcpy, FORTIFY_WRITE, p_size, size, p); __underlying_memcpy(p, q, size); return p; } diff --git a/include/linux/framer/framer-provider.h b/include/linux/framer/framer-provider.h new file mode 100644 index 000000000000..9724d4b44b9c --- /dev/null +++ b/include/linux/framer/framer-provider.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Generic framer profider header file + * + * Copyright 2023 CS GROUP France + * + * Author: Herve Codina <herve.codina@bootlin.com> + */ + +#ifndef __DRIVERS_PROVIDER_FRAMER_H +#define __DRIVERS_PROVIDER_FRAMER_H + +#include <linux/export.h> +#include <linux/framer/framer.h> +#include <linux/types.h> + +#define FRAMER_FLAG_POLL_STATUS BIT(0) + +/** + * struct framer_ops - set of function pointers for performing framer operations + * @init: operation to be performed for initializing the framer + * @exit: operation to be performed while exiting + * @power_on: powering on the framer + * @power_off: powering off the framer + * @flags: OR-ed flags (FRAMER_FLAG_*) to ask for core functionality + * - @FRAMER_FLAG_POLL_STATUS: + * Ask the core to perform a polling to get the framer status and + * notify consumers on change. + * The framer should call @framer_notify_status_change() when it + * detects a status change. This is usually done using interrupts. + * If the framer cannot detect this change, it can ask the core for + * a status polling. The core will call @get_status() periodically + * and, on change detected, it will notify the consumer. + * the @get_status() + * @owner: the module owner containing the ops + */ +struct framer_ops { + int (*init)(struct framer *framer); + void (*exit)(struct framer *framer); + int (*power_on)(struct framer *framer); + int (*power_off)(struct framer *framer); + + /** + * @get_status: + * + * Optional. + * + * Used to get the framer status. framer_init() must have + * been called on the framer. + * + * Returns: 0 if successful, an negative error code otherwise + */ + int (*get_status)(struct framer *framer, struct framer_status *status); + + /** + * @set_config: + * + * Optional. + * + * Used to set the framer configuration. framer_init() must have + * been called on the framer. + * + * Returns: 0 if successful, an negative error code otherwise + */ + int (*set_config)(struct framer *framer, const struct framer_config *config); + + /** + * @get_config: + * + * Optional. + * + * Used to get the framer configuration. framer_init() must have + * been called on the framer. + * + * Returns: 0 if successful, an negative error code otherwise + */ + int (*get_config)(struct framer *framer, struct framer_config *config); + + u32 flags; + struct module *owner; +}; + +/** + * struct framer_provider - represents the framer provider + * @dev: framer provider device + * @owner: the module owner having of_xlate + * @list: to maintain a linked list of framer providers + * @of_xlate: function pointer to obtain framer instance from framer pointer + */ +struct framer_provider { + struct device *dev; + struct module *owner; + struct list_head list; + struct framer * (*of_xlate)(struct device *dev, + const struct of_phandle_args *args); +}; + +static inline void framer_set_drvdata(struct framer *framer, void *data) +{ + dev_set_drvdata(&framer->dev, data); +} + +static inline void *framer_get_drvdata(struct framer *framer) +{ + return dev_get_drvdata(&framer->dev); +} + +#if IS_ENABLED(CONFIG_GENERIC_FRAMER) + +/* Create and destroy a framer */ +struct framer *framer_create(struct device *dev, struct device_node *node, + const struct framer_ops *ops); +void framer_destroy(struct framer *framer); + +/* devm version */ +struct framer *devm_framer_create(struct device *dev, struct device_node *node, + const struct framer_ops *ops); + +struct framer *framer_provider_simple_of_xlate(struct device *dev, + const struct of_phandle_args *args); + +struct framer_provider * +__framer_provider_of_register(struct device *dev, struct module *owner, + struct framer *(*of_xlate)(struct device *dev, + const struct of_phandle_args *args)); + +void framer_provider_of_unregister(struct framer_provider *framer_provider); + +struct framer_provider * +__devm_framer_provider_of_register(struct device *dev, struct module *owner, + struct framer *(*of_xlate)(struct device *dev, + const struct of_phandle_args *args)); + +void framer_notify_status_change(struct framer *framer); + +#else /* IS_ENABLED(CONFIG_GENERIC_FRAMER) */ + +static inline struct framer *framer_create(struct device *dev, struct device_node *node, + const struct framer_ops *ops) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void framer_destroy(struct framer *framer) +{ +} + +/* devm version */ +static inline struct framer *devm_framer_create(struct device *dev, struct device_node *node, + const struct framer_ops *ops) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct framer *framer_provider_simple_of_xlate(struct device *dev, + const struct of_phandle_args *args) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct framer_provider * +__framer_provider_of_register(struct device *dev, struct module *owner, + struct framer *(*of_xlate)(struct device *dev, + const struct of_phandle_args *args)) +{ + return ERR_PTR(-ENOSYS); +} + +void framer_provider_of_unregister(struct framer_provider *framer_provider) +{ +} + +static inline struct framer_provider * +__devm_framer_provider_of_register(struct device *dev, struct module *owner, + struct framer *(*of_xlate)(struct device *dev, + const struct of_phandle_args *args)) +{ + return ERR_PTR(-ENOSYS); +} + +void framer_notify_status_change(struct framer *framer) +{ +} + +#endif /* IS_ENABLED(CONFIG_GENERIC_FRAMER) */ + +#define framer_provider_of_register(dev, xlate) \ + __framer_provider_of_register((dev), THIS_MODULE, (xlate)) + +#define devm_framer_provider_of_register(dev, xlate) \ + __devm_framer_provider_of_register((dev), THIS_MODULE, (xlate)) + +#endif /* __DRIVERS_PROVIDER_FRAMER_H */ diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h new file mode 100644 index 000000000000..2b85fe9e7f9a --- /dev/null +++ b/include/linux/framer/framer.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Generic framer header file + * + * Copyright 2023 CS GROUP France + * + * Author: Herve Codina <herve.codina@bootlin.com> + */ + +#ifndef __DRIVERS_FRAMER_H +#define __DRIVERS_FRAMER_H + +#include <linux/err.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/of.h> +#include <linux/device.h> +#include <linux/workqueue.h> + +/** + * enum framer_iface - Framer interface + * @FRAMER_IFACE_E1: E1 interface + * @FRAMER_IFACE_T1: T1 interface + */ +enum framer_iface { + FRAMER_IFACE_E1, + FRAMER_IFACE_T1, +}; + +/** + * enum framer_clock_type - Framer clock type + * @FRAMER_CLOCK_EXT: External clock + * @FRAMER_CLOCK_INT: Internal clock + */ +enum framer_clock_type { + FRAMER_CLOCK_EXT, + FRAMER_CLOCK_INT, +}; + +/** + * struct framer_config - Framer configuration + * @iface: Framer line interface + * @clock_type: Framer clock type + * @line_clock_rate: Framer line clock rate + */ +struct framer_config { + enum framer_iface iface; + enum framer_clock_type clock_type; + unsigned long line_clock_rate; +}; + +/** + * struct framer_status - Framer status + * @link_is_on: Framer link state. true, the link is on, false, the link is off. + */ +struct framer_status { + bool link_is_on; +}; + +/** + * enum framer_event - Event available for notification + * @FRAMER_EVENT_STATUS: Event notified on framer_status changes + */ +enum framer_event { + FRAMER_EVENT_STATUS, +}; + +/** + * struct framer - represents the framer device + * @dev: framer device + * @id: id of the framer device + * @ops: function pointers for performing framer operations + * @mutex: mutex to protect framer_ops + * @init_count: used to protect when the framer is used by multiple consumers + * @power_count: used to protect when the framer is used by multiple consumers + * @pwr: power regulator associated with the framer + * @notify_status_work: work structure used for status notifications + * @notifier_list: notifier list used for notifications + * @polling_work: delayed work structure used for the polling task + * @prev_status: previous read status used by the polling task to detect changes + */ +struct framer { + struct device dev; + int id; + const struct framer_ops *ops; + struct mutex mutex; /* Protect framer */ + int init_count; + int power_count; + struct regulator *pwr; + struct work_struct notify_status_work; + struct blocking_notifier_head notifier_list; + struct delayed_work polling_work; + struct framer_status prev_status; +}; + +#if IS_ENABLED(CONFIG_GENERIC_FRAMER) +int framer_pm_runtime_get(struct framer *framer); +int framer_pm_runtime_get_sync(struct framer *framer); +int framer_pm_runtime_put(struct framer *framer); +int framer_pm_runtime_put_sync(struct framer *framer); +int framer_init(struct framer *framer); +int framer_exit(struct framer *framer); +int framer_power_on(struct framer *framer); +int framer_power_off(struct framer *framer); +int framer_get_status(struct framer *framer, struct framer_status *status); +int framer_get_config(struct framer *framer, struct framer_config *config); +int framer_set_config(struct framer *framer, const struct framer_config *config); +int framer_notifier_register(struct framer *framer, struct notifier_block *nb); +int framer_notifier_unregister(struct framer *framer, struct notifier_block *nb); + +struct framer *framer_get(struct device *dev, const char *con_id); +void framer_put(struct device *dev, struct framer *framer); + +struct framer *devm_framer_get(struct device *dev, const char *con_id); +struct framer *devm_framer_optional_get(struct device *dev, const char *con_id); +#else +static inline int framer_pm_runtime_get(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_pm_runtime_get_sync(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_pm_runtime_put(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_pm_runtime_put_sync(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_init(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_exit(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_power_on(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_power_off(struct framer *framer) +{ + return -ENOSYS; +} + +static inline int framer_get_status(struct framer *framer, struct framer_status *status) +{ + return -ENOSYS; +} + +static inline int framer_get_config(struct framer *framer, struct framer_config *config) +{ + return -ENOSYS; +} + +static inline int framer_set_config(struct framer *framer, const struct framer_config *config) +{ + return -ENOSYS; +} + +static inline int framer_notifier_register(struct framer *framer, + struct notifier_block *nb) +{ + return -ENOSYS; +} + +static inline int framer_notifier_unregister(struct framer *framer, + struct notifier_block *nb) +{ + return -ENOSYS; +} + +static inline struct framer *framer_get(struct device *dev, const char *con_id) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void framer_put(struct device *dev, struct framer *framer) +{ +} + +static inline struct framer *devm_framer_get(struct device *dev, const char *con_id) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct framer *devm_framer_optional_get(struct device *dev, const char *con_id) +{ + return NULL; +} + +#endif + +#endif /* __DRIVERS_FRAMER_H */ diff --git a/include/linux/framer/pef2256.h b/include/linux/framer/pef2256.h new file mode 100644 index 000000000000..71d80af58c40 --- /dev/null +++ b/include/linux/framer/pef2256.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PEF2256 consumer API + * + * Copyright 2023 CS GROUP France + * + * Author: Herve Codina <herve.codina@bootlin.com> + */ +#ifndef __PEF2256_H__ +#define __PEF2256_H__ + +#include <linux/types.h> + +struct pef2256; +struct regmap; + +/* Retrieve the PEF2256 regmap */ +struct regmap *pef2256_get_regmap(struct pef2256 *pef2256); + +/* PEF2256 hardware versions */ +enum pef2256_version { + PEF2256_VERSION_UNKNOWN, + PEF2256_VERSION_1_2, + PEF2256_VERSION_2_1, + PEF2256_VERSION_2_2, +}; + +/* Get the PEF2256 hardware version */ +enum pef2256_version pef2256_get_version(struct pef2256 *pef2256); + +#endif /* __PEF2256_H__ */ diff --git a/include/linux/freelist.h b/include/linux/freelist.h deleted file mode 100644 index fc1842b96469..000000000000 --- a/include/linux/freelist.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ -#ifndef FREELIST_H -#define FREELIST_H - -#include <linux/atomic.h> - -/* - * Copyright: cameron@moodycamel.com - * - * A simple CAS-based lock-free free list. Not the fastest thing in the world - * under heavy contention, but simple and correct (assuming nodes are never - * freed until after the free list is destroyed), and fairly speedy under low - * contention. - * - * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists - */ - -struct freelist_node { - atomic_t refs; - struct freelist_node *next; -}; - -struct freelist_head { - struct freelist_node *head; -}; - -#define REFS_ON_FREELIST 0x80000000 -#define REFS_MASK 0x7FFFFFFF - -static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list) -{ - /* - * Since the refcount is zero, and nobody can increase it once it's - * zero (except us, and we run only one copy of this method per node at - * a time, i.e. the single thread case), then we know we can safely - * change the next pointer of the node; however, once the refcount is - * back above zero, then other threads could increase it (happens under - * heavy contention, when the refcount goes to zero in between a load - * and a refcount increment of a node in try_get, then back up to - * something non-zero, then the refcount increment is done by the other - * thread) -- so if the CAS to add the node to the actual list fails, - * decrese the refcount and leave the add operation to the next thread - * who puts the refcount back to zero (which could be us, hence the - * loop). - */ - struct freelist_node *head = READ_ONCE(list->head); - - for (;;) { - WRITE_ONCE(node->next, head); - atomic_set_release(&node->refs, 1); - - if (!try_cmpxchg_release(&list->head, &head, node)) { - /* - * Hmm, the add failed, but we can only try again when - * the refcount goes back to zero. - */ - if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1) - continue; - } - return; - } -} - -static inline void freelist_add(struct freelist_node *node, struct freelist_head *list) -{ - /* - * We know that the should-be-on-freelist bit is 0 at this point, so - * it's safe to set it using a fetch_add. - */ - if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) { - /* - * Oh look! We were the last ones referencing this node, and we - * know we want to add it to the free list, so let's do it! - */ - __freelist_add(node, list); - } -} - -static inline struct freelist_node *freelist_try_get(struct freelist_head *list) -{ - struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head); - unsigned int refs; - - while (head) { - prev = head; - refs = atomic_read(&head->refs); - if ((refs & REFS_MASK) == 0 || - !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) { - head = smp_load_acquire(&list->head); - continue; - } - - /* - * Good, reference count has been incremented (it wasn't at - * zero), which means we can read the next and not worry about - * it changing between now and the time we do the CAS. - */ - next = READ_ONCE(head->next); - if (try_cmpxchg_acquire(&list->head, &head, next)) { - /* - * Yay, got the node. This means it was on the list, - * which means should-be-on-freelist must be false no - * matter the refcount (because nobody else knows it's - * been taken off yet, it can't have been put back on). - */ - WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST); - - /* - * Decrease refcount twice, once for our ref, and once - * for the list's ref. - */ - atomic_fetch_add(-2, &head->refs); - - return head; - } - - /* - * OK, the head must have changed on us, but we still need to decrement - * the refcount we increased. - */ - refs = atomic_fetch_add(-1, &prev->refs); - if (refs == REFS_ON_FREELIST + 1) - __freelist_add(prev, list); - } - - return NULL; -} - -#endif /* FREELIST_H */ diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h deleted file mode 100644 index eaa0ac5f9003..000000000000 --- a/include/linux/frontswap.h +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_FRONTSWAP_H -#define _LINUX_FRONTSWAP_H - -#include <linux/swap.h> -#include <linux/mm.h> -#include <linux/bitops.h> -#include <linux/jump_label.h> - -struct frontswap_ops { - void (*init)(unsigned); /* this swap type was just swapon'ed */ - int (*store)(unsigned, pgoff_t, struct page *); /* store a page */ - int (*load)(unsigned, pgoff_t, struct page *, bool *); /* load a page */ - void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */ - void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */ -}; - -int frontswap_register_ops(const struct frontswap_ops *ops); - -extern void frontswap_init(unsigned type, unsigned long *map); -extern int __frontswap_store(struct page *page); -extern int __frontswap_load(struct page *page); -extern void __frontswap_invalidate_page(unsigned, pgoff_t); -extern void __frontswap_invalidate_area(unsigned); - -#ifdef CONFIG_FRONTSWAP -extern struct static_key_false frontswap_enabled_key; - -static inline bool frontswap_enabled(void) -{ - return static_branch_unlikely(&frontswap_enabled_key); -} - -static inline void frontswap_map_set(struct swap_info_struct *p, - unsigned long *map) -{ - p->frontswap_map = map; -} - -static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) -{ - return p->frontswap_map; -} -#else -/* all inline routines become no-ops and all externs are ignored */ - -static inline bool frontswap_enabled(void) -{ - return false; -} - -static inline void frontswap_map_set(struct swap_info_struct *p, - unsigned long *map) -{ -} - -static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) -{ - return NULL; -} -#endif - -static inline int frontswap_store(struct page *page) -{ - if (frontswap_enabled()) - return __frontswap_store(page); - - return -1; -} - -static inline int frontswap_load(struct page *page) -{ - if (frontswap_enabled()) - return __frontswap_load(page); - - return -1; -} - -static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) -{ - if (frontswap_enabled()) - __frontswap_invalidate_page(type, offset); -} - -static inline void frontswap_invalidate_area(unsigned type) -{ - if (frontswap_enabled()) - __frontswap_invalidate_area(type); -} - -#endif /* _LINUX_FRONTSWAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 562f2623c9c9..8dfd53b52744 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,8 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#include <linux/maple_tree.h> +#include <linux/rw_hint.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -67,7 +69,7 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct fscrypt_info; +struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; @@ -119,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) +/* File writes are restricted (block device specific) */ +#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ @@ -309,19 +313,6 @@ struct address_space; struct writeback_control; struct readahead_control; -/* - * Write life time hint values. - * Stored in struct inode as u8. - */ -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, - WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, - WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, - WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, - WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, -}; - /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC @@ -338,6 +329,22 @@ enum rw_hint { #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) +/* + * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the + * iocb completion can be passed back to the owner for execution from a safe + * context rather than needing to be punted through a workqueue. If this + * flag is set, the bio completion handling may set iocb->dio_complete to a + * handler function and iocb->private to context information for that handler. + * The issuer should call the handler with that context information from task + * context to complete the processing of the iocb. Note that while this + * provides a task context for the dio_complete() callback, it should only be + * used on the completion side for non-IO generating completions. It's fine to + * call blocking functions from this callback, but they should not wait for + * unrelated IO (like cache flushing, new IO generation, etc). + */ +#define IOCB_DIO_CALLER_COMP (1 << 22) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ @@ -351,7 +358,8 @@ enum rw_hint { { IOCB_WRITE, "WRITE" }, \ { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ - { IOCB_ALLOC_CACHE, "ALLOC_CACHE" } + { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } struct kiocb { struct file *ki_filp; @@ -360,7 +368,23 @@ struct kiocb { void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ - struct wait_page_queue *ki_waitq; /* for async buffered IO */ + union { + /* + * Only used for async buffered reads, where it denotes the + * page waitqueue associated with completing the read. Valid + * IFF IOCB_WAITQ is set. + */ + struct wait_page_queue *ki_waitq; + /* + * Can be used for O_DIRECT IO, where the completion handling + * is punted back to the issuer of the IO. May only be set + * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer + * must then check for presence of this handler when ki_complete + * is invoked. The data passed in to this handler must be + * assigned to ->private when dio_complete is assigned. + */ + ssize_t (*dio_complete)(void *data); + }; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) @@ -403,7 +427,7 @@ struct address_space_operations { bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb); - int (*error_remove_page)(struct address_space *, struct page *); + int (*error_remove_folio)(struct address_space *, struct folio *); /* swapfile support */ int (*swap_activate)(struct swap_info_struct *sis, struct file *file, @@ -423,7 +447,7 @@ extern const struct address_space_operations empty_aops; * It is also used to block modification of page cache contents through * memory mappings. * @gfp_mask: Memory allocation flags to use for allocating pages. - * @i_mmap_writable: Number of VM_SHARED mappings. + * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings. * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. @@ -432,9 +456,9 @@ extern const struct address_space_operations empty_aops; * @a_ops: Methods. * @flags: Error bits and flags (AS_*). * @wb_err: The most recent error which has occurred. - * @private_lock: For use by the owner of the address_space. - * @private_list: For use by the owner of the address_space. - * @private_data: For use by the owner of the address_space. + * @i_private_lock: For use by the owner of the address_space. + * @i_private_list: For use by the owner of the address_space. + * @i_private_data: For use by the owner of the address_space. */ struct address_space { struct inode *host; @@ -447,15 +471,15 @@ struct address_space { atomic_t nr_thps; #endif struct rb_root_cached i_mmap; - struct rw_semaphore i_mmap_rwsem; unsigned long nrpages; pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; errseq_t wb_err; - spinlock_t private_lock; - struct list_head private_list; - void *private_data; + spinlock_t i_private_lock; + struct list_head i_private_list; + struct rw_semaphore i_mmap_rwsem; + void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but @@ -526,7 +550,7 @@ static inline int mapping_mapped(struct address_space *mapping) /* * Might pages of this file have been modified in userspace? - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap + * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * @@ -640,13 +664,13 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 i_atime; - struct timespec64 i_mtime; - struct timespec64 i_ctime; + struct timespec64 __i_atime; + struct timespec64 __i_mtime; + struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; - u8 i_write_hint; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED @@ -707,7 +731,7 @@ struct inode { #endif #ifdef CONFIG_FS_ENCRYPTION - struct fscrypt_info *i_crypt_info; + struct fscrypt_inode_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY @@ -876,7 +900,8 @@ static inline loff_t i_size_read(const struct inode *inode) preempt_enable(); return i_size; #else - return inode->i_size; + /* Pairs with smp_store_release() in i_size_write() */ + return smp_load_acquire(&inode->i_size); #endif } @@ -898,7 +923,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) inode->i_size = i_size; preempt_enable(); #else - inode->i_size = i_size; + /* + * Pairs with smp_load_acquire() in i_size_read() to ensure + * changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&inode->i_size, i_size); #endif } @@ -960,8 +990,10 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) */ struct file { union { + /* fput() uses task work when closing and freeing file (default). */ + struct callback_head f_task_work; + /* fput() must use workqueue (most kernel threads). */ struct llist_node f_llist; - struct rcu_head f_rcuhead; unsigned int f_iocb_flags; }; @@ -1011,7 +1043,10 @@ static inline struct file *get_file(struct file *f) atomic_long_inc(&f->f_count); return f; } -#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) + +struct file *get_file_rcu(struct file __rcu **f); +struct file *get_file_active(struct file **f); + #define file_count(x) atomic_long_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) @@ -1028,6 +1063,7 @@ static inline struct file *get_file(struct file *f) typedef void *fl_owner_t; struct file_lock; +struct file_lease; /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX @@ -1042,9 +1078,20 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +/* + * file_dentry() is a relic from the days that overlayfs was using files with a + * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs. + * In those days, file_dentry() was needed to get the underlying fs dentry that + * matches f_inode. + * Files with "fake" path should not exist nowadays, so use an assertion to make + * sure that file_dentry() was not papering over filesystem bugs. + */ static inline struct dentry *file_dentry(const struct file *file) { - return d_real(file->f_path.dentry, file_inode(file)); + struct dentry *dentry = file->f_path.dentry; + + WARN_ON_ONCE(d_inode(dentry) != file_inode(file)); + return dentry; } struct fasync_struct { @@ -1069,7 +1116,7 @@ extern void fasync_free(struct fasync_struct *); extern void kill_fasync(struct fasync_struct **, int, int); extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); -extern int f_setown(struct file *filp, unsigned long arg, int force); +extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); @@ -1088,13 +1135,15 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_NOATIME BIT(10) /* Do not update access times. */ #define SB_NODIRATIME BIT(11) /* Do not update directory access times */ #define SB_SILENT BIT(15) -#define SB_POSIXACL BIT(16) /* VFS does not apply the umask */ +#define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */ #define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */ #define SB_I_VERSION BIT(23) /* Update inode I_version field */ #define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ +#define SB_DEAD BIT(21) +#define SB_DYING BIT(24) #define SB_SUBMOUNT BIT(26) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) @@ -1128,11 +1177,13 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 +#define SB_I_EVM_UNSUPPORTED 0x00000080 #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ +#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ /* Possible states of 'frozen' field */ enum { @@ -1147,7 +1198,9 @@ enum { #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) struct sb_writers { - int frozen; /* Is sb frozen? */ + unsigned short frozen; /* Is sb frozen? */ + int freeze_kcount; /* How many kernel freeze requests? */ + int freeze_ucount; /* How many userspace freeze requests? */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -1172,7 +1225,7 @@ struct super_block { #ifdef CONFIG_SECURITY void *s_security; #endif - const struct xattr_handler **s_xattr; + const struct xattr_handler * const *s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ @@ -1186,7 +1239,8 @@ struct super_block { #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ - struct block_device *s_bdev; + struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ + struct file *s_bdev_file; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct hlist_node s_instances; @@ -1212,8 +1266,22 @@ struct super_block { struct fsnotify_mark_connector __rcu *s_fsnotify_marks; #endif + /* + * q: why are s_id and s_sysfs_name not the same? both are human + * readable strings that identify the filesystem + * a: s_id is allowed to change at runtime; it's used in log messages, + * and we want to when a device starts out as single device (s_id is dev + * name) but then a device is hot added and we have to switch to + * identifying it by UUID + * but s_sysfs_name is a handle for programmatic access, and can't + * change at runtime + */ char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ + u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */ + + /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */ + char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; @@ -1231,7 +1299,7 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - struct shrinker s_shrink; /* per-sb shrinker handle */ + struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; @@ -1474,7 +1542,109 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -extern struct timespec64 current_time(struct inode *inode); +struct timespec64 current_time(struct inode *inode); +struct timespec64 inode_set_ctime_current(struct inode *inode); + +static inline time64_t inode_get_atime_sec(const struct inode *inode) +{ + return inode->__i_atime.tv_sec; +} + +static inline long inode_get_atime_nsec(const struct inode *inode) +{ + return inode->__i_atime.tv_nsec; +} + +static inline struct timespec64 inode_get_atime(const struct inode *inode) +{ + return inode->__i_atime; +} + +static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->__i_atime = ts; + return ts; +} + +static inline struct timespec64 inode_set_atime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); +} + +static inline time64_t inode_get_mtime_sec(const struct inode *inode) +{ + return inode->__i_mtime.tv_sec; +} + +static inline long inode_get_mtime_nsec(const struct inode *inode) +{ + return inode->__i_mtime.tv_nsec; +} + +static inline struct timespec64 inode_get_mtime(const struct inode *inode) +{ + return inode->__i_mtime; +} + +static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->__i_mtime = ts; + return ts; +} + +static inline struct timespec64 inode_set_mtime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + return inode_set_mtime_to_ts(inode, ts); +} + +static inline time64_t inode_get_ctime_sec(const struct inode *inode) +{ + return inode->__i_ctime.tv_sec; +} + +static inline long inode_get_ctime_nsec(const struct inode *inode) +{ + return inode->__i_ctime.tv_nsec; +} + +static inline struct timespec64 inode_get_ctime(const struct inode *inode) +{ + return inode->__i_ctime; +} + +static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->__i_ctime = ts; + return ts; +} + +/** + * inode_set_ctime - set the ctime in the inode + * @inode: inode in which to set the ctime + * @sec: tv_sec value to set + * @nsec: tv_nsec value to set + * + * Set the ctime in @inode to { @sec, @nsec } + */ +static inline struct timespec64 inode_set_ctime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + + return inode_set_ctime_to_ts(inode, ts); +} + +struct timespec64 simple_inode_init_ts(struct inode *inode); /* * Snapshotting support. @@ -1504,9 +1674,70 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level) #define __sb_writers_release(sb, lev) \ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) +/** + * __sb_write_started - check if sb freeze level is held + * @sb: the super we write to + * @level: the freeze level + * + * * > 0 - sb freeze level is held + * * 0 - sb freeze level is not held + * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN + */ +static inline int __sb_write_started(const struct super_block *sb, int level) +{ + return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1); +} + +/** + * sb_write_started - check if SB_FREEZE_WRITE is held + * @sb: the super we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + */ static inline bool sb_write_started(const struct super_block *sb) { - return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1); + return __sb_write_started(sb, SB_FREEZE_WRITE); +} + +/** + * sb_write_not_started - check if SB_FREEZE_WRITE is not held + * @sb: the super we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + */ +static inline bool sb_write_not_started(const struct super_block *sb) +{ + return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0; +} + +/** + * file_write_started - check if SB_FREEZE_WRITE is held + * @file: the file we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + * May be false positive with !S_ISREG, because file_start_write() has + * no effect on !S_ISREG. + */ +static inline bool file_write_started(const struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return true; + return sb_write_started(file_inode(file)->i_sb); +} + +/** + * file_write_not_started - check if SB_FREEZE_WRITE is not held + * @file: the file we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + * May be false positive with !S_ISREG, because file_start_write() has + * no effect on !S_ISREG. + */ +static inline bool file_write_not_started(const struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return true; + return sb_write_not_started(file_inode(file)->i_sb); } /** @@ -1770,6 +2001,7 @@ struct dir_context { struct iov_iter; struct io_uring_cmd; +struct offset_ctx; struct file_operations { struct module *owner; @@ -1798,7 +2030,7 @@ struct file_operations { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); void (*splice_eof)(struct file *file); - int (*setlease)(struct file *, long, struct file_lock **, void **); + int (*setlease)(struct file *, int, struct file_lease **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); @@ -1850,7 +2082,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); - int (*update_time)(struct inode *, struct timespec64 *, int); + int (*update_time)(struct inode *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); @@ -1863,6 +2095,7 @@ struct inode_operations { int (*fileattr_set)(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); + struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, @@ -1886,9 +2119,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags); int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, @@ -1896,9 +2126,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); -extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); @@ -1908,6 +2135,25 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags); +/** + * enum freeze_holder - holder of the freeze + * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem + * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem + * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed + * + * Indicate who the owner of the freeze or thaw request is and whether + * the freeze needs to be exclusive or can nest. + * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the + * same holder aren't allowed. It is however allowed to hold a single + * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at + * the same time. This is relied upon by some filesystems during online + * repair or similar. + */ +enum freeze_holder { + FREEZE_HOLDER_KERNEL = (1U << 0), + FREEZE_HOLDER_USERSPACE = (1U << 1), + FREEZE_MAY_NEST = (1U << 2), +}; struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); @@ -1920,9 +2166,9 @@ struct super_operations { void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - int (*freeze_super) (struct super_block *); + int (*freeze_super) (struct super_block *, enum freeze_holder who); int (*freeze_fs) (struct super_block *); - int (*thaw_super) (struct super_block *); + int (*thaw_super) (struct super_block *, enum freeze_holder who); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); @@ -1935,7 +2181,7 @@ struct super_operations { #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); - struct dquot **(*get_dquots)(struct inode *); + struct dquot __rcu **(*get_dquots)(struct inode *); #endif long (*nr_cached_objects)(struct super_block *, struct shrink_control *); @@ -1998,7 +2244,12 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) + +#ifdef CONFIG_FS_POSIX_ACL #define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL) +#else +#define IS_POSIXACL(inode) 0 +#endif #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) @@ -2142,7 +2393,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) -#define I_PINNING_FSCACHE_WB (1 << 18) +#define I_PINNING_NETFS_WB (1 << 18) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) @@ -2200,7 +2451,7 @@ enum file_time_flags { extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); -int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); +int inode_update_time(struct inode *inode, int flags); static inline void file_accessed(struct file *file) { @@ -2273,6 +2524,7 @@ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data); +struct super_block *sget_dev(struct fs_context *fc, dev_t dev); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ @@ -2296,17 +2548,56 @@ extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -extern int freeze_super(struct super_block *super); -extern int thaw_super(struct super_block *super); +int freeze_super(struct super_block *super, enum freeze_holder who); +int thaw_super(struct super_block *super, enum freeze_holder who); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); +static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len) +{ + if (WARN_ON(len > sizeof(sb->s_uuid))) + len = sizeof(sb->s_uuid); + sb->s_uuid_len = len; + memcpy(&sb->s_uuid, uuid, len); +} + +/* set sb sysfs name based on sb->s_bdev */ +static inline void super_set_sysfs_name_bdev(struct super_block *sb) +{ + snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev); +} + +/* set sb sysfs name based on sb->s_uuid */ +static inline void super_set_sysfs_name_uuid(struct super_block *sb) +{ + WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid)); + snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b); +} + +/* set sb sysfs name based on sb->s_id */ +static inline void super_set_sysfs_name_id(struct super_block *sb) +{ + strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name)); +} + +/* try to use something standard before you use this */ +__printf(2, 3) +static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args); + va_end(args); +} + extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); -extern int generic_update_time(struct inode *, struct timespec64 *, int); +int inode_update_timestamps(struct inode *inode, int flags); +int generic_update_time(struct inode *, int); /* /sys/fs */ extern struct kobject *fs_kobj; @@ -2318,13 +2609,13 @@ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ - int refcnt; + atomic_t refcnt; struct audit_names *aname; const char iname[]; }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); -static inline struct mnt_idmap *file_mnt_idmap(struct file *file) +static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { return mnt_idmap(file->f_path.mnt); } @@ -2363,26 +2654,31 @@ struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -struct file *backing_file_open(const struct path *path, int flags, - const struct path *real_path, - const struct cred *cred); -struct path *backing_file_real_path(struct file *f); +struct path *backing_file_user_path(struct file *f); /* - * file_real_path - get the path corresponding to f_inode - * - * When opening a backing file for a stackable filesystem (e.g., - * overlayfs) f_path may be on the stackable filesystem and f_inode on - * the underlying filesystem. When the path associated with f_inode is - * needed, this helper should be used instead of accessing f_path - * directly. -*/ -static inline const struct path *file_real_path(struct file *f) + * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file + * stored in ->vm_file is a backing file whose f_inode is on the underlying + * filesystem. When the mapped file path and inode number are displayed to + * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the + * path and inode number to display to the user, which is the path of the fd + * that user has requested to map and the inode number that would be returned + * by fstat() on that same fd. + */ +/* Get the path to display in /proc/<pid>/maps */ +static inline const struct path *file_user_path(struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) - return backing_file_real_path(f); + return backing_file_user_path(f); return &f->f_path; } +/* Get the inode whose inode number to display in /proc/<pid>/maps */ +static inline const struct inode *file_user_inode(struct file *f) +{ + if (unlikely(f->f_mode & FMODE_BACKING)) + return d_inode(backing_file_user_path(f)->dentry); + return file_inode(f); +} static inline struct file *file_clone_open(struct file *file) { @@ -2545,6 +2841,13 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode) return (inode->i_mode ^ mode) & S_IFMT; } +/** + * file_start_write - get write access to a superblock for regular file io + * @file: the file we want to write to + * + * This is a variant of sb_start_write() which is a noop on non-regualr file. + * Should be matched with a call to file_end_write(). + */ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) @@ -2559,11 +2862,53 @@ static inline bool file_start_write_trylock(struct file *file) return sb_start_write_trylock(file_inode(file)->i_sb); } +/** + * file_end_write - drop write access to a superblock of a regular file + * @file: the file we wrote to + * + * Should be matched with a call to file_start_write(). + */ static inline void file_end_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; - __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); + sb_end_write(file_inode(file)->i_sb); +} + +/** + * kiocb_start_write - get write access to a superblock for async file io + * @iocb: the io context we want to submit the write with + * + * This is a variant of sb_start_write() for async io submission. + * Should be matched with a call to kiocb_end_write(). + */ +static inline void kiocb_start_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + sb_start_write(inode->i_sb); + /* + * Fool lockdep by telling it the lock got released so that it + * doesn't complain about the held lock when we return to userspace. + */ + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); +} + +/** + * kiocb_end_write - drop write access to a superblock after async file io + * @iocb: the io context we sumbitted the write with + * + * Should be matched with a call to kiocb_start_write(). + */ +static inline void kiocb_end_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + /* + * Tell lockdep we inherited freeze protection from submission thread. + */ + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + sb_end_write(inode->i_sb); } /* @@ -2613,8 +2958,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode) #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) static inline void i_readcount_dec(struct inode *inode) { - BUG_ON(!atomic_read(&inode->i_readcount)); - atomic_dec(&inode->i_readcount); + BUG_ON(atomic_dec_return(&inode->i_readcount) < 0); } static inline void i_readcount_inc(struct inode *inode) { @@ -2644,6 +2988,17 @@ extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); +/** + * is_dot_dotdot - returns true only if @name is "." or ".." + * @name: file name to check + * @len: length of file name, in bytes + */ +static inline bool is_dot_dotdot(const char *name, size_t len) +{ + return len && unlikely(name[0] == '.') && + (len == 1 || (len == 2 && name[1] == '.')); +} + #include <linux/err.h> /* needed for stackable file system support */ @@ -2721,6 +3076,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); +extern int file_remove_privs_flags(struct file *file, unsigned int flags); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); @@ -2789,8 +3145,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, size_t len, unsigned int flags); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len, unsigned int flags); extern void @@ -2880,7 +3234,7 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *); +void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); @@ -2919,8 +3273,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int); extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); -extern struct super_block *get_super(struct block_device *); -extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*)(struct super_block *, void *), void *); @@ -2940,6 +3292,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct mnt_idmap *, struct inode *, @@ -2956,7 +3310,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); -extern int simple_nosetlease(struct file *, long, struct file_lock **, void **); +extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); @@ -2977,12 +3331,38 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count, extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); +struct offset_ctx { + struct maple_tree mt; + unsigned long next_offset; +}; + +void simple_offset_init(struct offset_ctx *octx); +int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); +void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); +int simple_offset_empty(struct dentry *dentry); +int simple_offset_rename_exchange(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry); +void simple_offset_destroy(struct offset_ctx *octx); + +extern const struct file_operations simple_offset_dir_operations; + extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); -extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); +extern void generic_set_sb_d_ops(struct super_block *sb); + +static inline bool sb_has_encoding(const struct super_block *sb) +{ +#if IS_ENABLED(CONFIG_UNICODE) + return !!sb->s_encoding; +#else + return false; +#endif +} int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); @@ -3037,6 +3417,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; + if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND))) + return -EINVAL; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) @@ -3047,6 +3429,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; + if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) { + if (IS_APPEND(file_inode(ki->ki_filp))) + return -EPERM; + ki->ki_flags &= ~IOCB_APPEND; + } + ki->ki_flags |= kiocb_flags; return 0; } diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index ff6341e09925..c13e99cbbf81 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -109,6 +109,7 @@ struct fs_context { bool need_free:1; /* Need to call ops->free() */ bool global:1; /* Goes into &init_user_ns */ bool oldapi:1; /* Coming from mount(2) */ + bool exclusive:1; /* create new superblock, reject existing one */ }; struct fs_context_operations { @@ -135,6 +136,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size); +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); @@ -150,14 +153,13 @@ extern int get_tree_nodev(struct fs_context *fc, extern int get_tree_single(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); -extern int get_tree_single_reconf(struct fs_context *fc, - int (*fill_super)(struct super_block *sb, - struct fs_context *fc)); extern int get_tree_keyed(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc), void *key); +int setup_bdev_super(struct super_block *sb, int sb_flags, + struct fs_context *fc); extern int get_tree_bdev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h deleted file mode 100644 index 77d783f71527..000000000000 --- a/include/linux/fs_enet_pd.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Platform information definitions for the - * universal Freescale Ethernet driver. - * - * Copyright (c) 2003 Intracom S.A. - * by Pantelis Antoniou <panto@intracom.gr> - * - * 2005 (c) MontaVista Software, Inc. - * Vitaly Bordug <vbordug@ru.mvista.com> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef FS_ENET_PD_H -#define FS_ENET_PD_H - -#include <linux/clk.h> -#include <linux/string.h> -#include <linux/of_mdio.h> -#include <linux/if_ether.h> -#include <asm/types.h> - -#define FS_ENET_NAME "fs_enet" - -enum fs_id { - fsid_fec1, - fsid_fec2, - fsid_fcc1, - fsid_fcc2, - fsid_fcc3, - fsid_scc1, - fsid_scc2, - fsid_scc3, - fsid_scc4, -}; - -#define FS_MAX_INDEX 9 - -static inline int fs_get_fec_index(enum fs_id id) -{ - if (id >= fsid_fec1 && id <= fsid_fec2) - return id - fsid_fec1; - return -1; -} - -static inline int fs_get_fcc_index(enum fs_id id) -{ - if (id >= fsid_fcc1 && id <= fsid_fcc3) - return id - fsid_fcc1; - return -1; -} - -static inline int fs_get_scc_index(enum fs_id id) -{ - if (id >= fsid_scc1 && id <= fsid_scc4) - return id - fsid_scc1; - return -1; -} - -static inline int fs_fec_index2id(int index) -{ - int id = fsid_fec1 + index - 1; - if (id >= fsid_fec1 && id <= fsid_fec2) - return id; - return FS_MAX_INDEX; - } - -static inline int fs_fcc_index2id(int index) -{ - int id = fsid_fcc1 + index - 1; - if (id >= fsid_fcc1 && id <= fsid_fcc3) - return id; - return FS_MAX_INDEX; -} - -static inline int fs_scc_index2id(int index) -{ - int id = fsid_scc1 + index - 1; - if (id >= fsid_scc1 && id <= fsid_scc4) - return id; - return FS_MAX_INDEX; -} - -enum fs_mii_method { - fsmii_fixed, - fsmii_fec, - fsmii_bitbang, -}; - -enum fs_ioport { - fsiop_porta, - fsiop_portb, - fsiop_portc, - fsiop_portd, - fsiop_porte, -}; - -struct fs_mii_bit { - u32 offset; - u8 bit; - u8 polarity; -}; -struct fs_mii_bb_platform_info { - struct fs_mii_bit mdio_dir; - struct fs_mii_bit mdio_dat; - struct fs_mii_bit mdc_dat; - int delay; /* delay in us */ - int irq[32]; /* irqs per phy's */ -}; - -struct fs_platform_info { - - void(*init_ioports)(struct fs_platform_info *); - /* device specific information */ - int fs_no; /* controller index */ - char fs_type[4]; /* controller type */ - - u32 cp_page; /* CPM page */ - u32 cp_block; /* CPM sblock */ - u32 cp_command; /* CPM page/sblock/mcn */ - - u32 clk_trx; /* some stuff for pins & mux configuration*/ - u32 clk_rx; - u32 clk_tx; - u32 clk_route; - u32 clk_mask; - - u32 mem_offset; - u32 dpram_offset; - u32 fcc_regs_c; - - u32 device_flags; - - struct device_node *phy_node; - const struct fs_mii_bus_info *bus_info; - - int rx_ring, tx_ring; /* number of buffers on rx */ - __u8 macaddr[ETH_ALEN]; /* mac address */ - int rx_copybreak; /* limit we copy small frames */ - int napi_weight; /* NAPI weight */ - - int use_rmii; /* use RMII mode */ - int has_phy; /* if the network is phy container as well...*/ - - struct clk *clk_per; /* 'per' clock for register access */ -}; -struct fs_mii_fec_platform_info { - u32 irq[32]; - u32 mii_speed; -}; - -static inline int fs_get_id(struct fs_platform_info *fpi) -{ - if(strstr(fpi->fs_type, "SCC")) - return fs_scc_index2id(fpi->fs_no); - if(strstr(fpi->fs_type, "FCC")) - return fs_fcc_index2id(fpi->fs_no); - if(strstr(fpi->fs_type, "FEC")) - return fs_fec_index2id(fpi->fs_no); - return fpi->fs_no; -} - -#endif diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h index 54210a42c30d..2b1f74b24070 100644 --- a/include/linux/fs_stack.h +++ b/include/linux/fs_stack.h @@ -16,15 +16,15 @@ extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src); static inline void fsstack_copy_attr_atime(struct inode *dest, const struct inode *src) { - dest->i_atime = src->i_atime; + inode_set_atime_to_ts(dest, inode_get_atime(src)); } static inline void fsstack_copy_attr_times(struct inode *dest, const struct inode *src) { - dest->i_atime = src->i_atime; - dest->i_mtime = src->i_mtime; - dest->i_ctime = src->i_ctime; + inode_set_atime_to_ts(dest, inode_get_atime(src)); + inode_set_mtime_to_ts(dest, inode_get_mtime(src)); + inode_set_ctime_to_ts(dest, inode_get_ctime(src)); } #endif /* _LINUX_FS_STACK_H */ diff --git a/include/linux/fs_uart_pd.h b/include/linux/fs_uart_pd.h deleted file mode 100644 index 36b61ff39277..000000000000 --- a/include/linux/fs_uart_pd.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Platform information definitions for the CPM Uart driver. - * - * 2006 (c) MontaVista Software, Inc. - * Vitaly Bordug <vbordug@ru.mvista.com> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef FS_UART_PD_H -#define FS_UART_PD_H - -#include <asm/types.h> - -enum fs_uart_id { - fsid_smc1_uart, - fsid_smc2_uart, - fsid_scc1_uart, - fsid_scc2_uart, - fsid_scc3_uart, - fsid_scc4_uart, - fs_uart_nr, -}; - -static inline int fs_uart_id_scc2fsid(int id) -{ - return fsid_scc1_uart + id - 1; -} - -static inline int fs_uart_id_fsid2scc(int id) -{ - return id - fsid_scc1_uart + 1; -} - -static inline int fs_uart_id_smc2fsid(int id) -{ - return fsid_smc1_uart + id - 1; -} - -static inline int fs_uart_id_fsid2smc(int id) -{ - return id - fsid_smc1_uart + 1; -} - -struct fs_uart_platform_info { - void(*init_ioports)(struct fs_uart_platform_info *); - /* device specific information */ - int fs_no; /* controller index */ - char fs_type[4]; /* controller type */ - u32 uart_clk; - u8 tx_num_fifo; - u8 tx_buf_size; - u8 rx_num_fifo; - u8 rx_buf_size; - u8 brg; - u8 clk_rx; - u8 clk_tx; -}; - -static inline int fs_uart_get_id(struct fs_uart_platform_info *fpi) -{ - if(strstr(fpi->fs_type, "SMC")) - return fs_uart_id_smc2fsid(fpi->fs_no); - if(strstr(fpi->fs_type, "SCC")) - return fs_uart_id_scc2fsid(fpi->fs_no); - return fpi->fs_no; -} - -#endif diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index a174cedf4d90..bdf7f3eddf0a 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -189,17 +189,20 @@ extern atomic_t fscache_n_write; extern atomic_t fscache_n_no_write_space; extern atomic_t fscache_n_no_create_space; extern atomic_t fscache_n_culled; +extern atomic_t fscache_n_dio_misfit; #define fscache_count_read() atomic_inc(&fscache_n_read) #define fscache_count_write() atomic_inc(&fscache_n_write) #define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space) #define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space) #define fscache_count_culled() atomic_inc(&fscache_n_culled) +#define fscache_count_dio_misfit() atomic_inc(&fscache_n_dio_misfit) #else #define fscache_count_read() do {} while(0) #define fscache_count_write() do {} while(0) #define fscache_count_no_write_space() do {} while(0) #define fscache_count_no_create_space() do {} while(0) #define fscache_count_culled() do {} while(0) +#define fscache_count_dio_misfit() do {} while(0) #endif #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 8e312c8323a8..6e8562cbcc43 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -437,9 +437,6 @@ const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_r * indicates the cache resources to which the operation state should be * attached; @cookie indicates the cache object that will be accessed. * - * This is intended to be called from the ->begin_cache_operation() netfs lib - * operation as implemented by the network filesystem. - * * @cres->inval_counter is set from @cookie->inval_counter for comparison at * the end of the operation. This allows invalidation during the operation to * be detected by the caller. @@ -629,48 +626,6 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, } -#if __fscache_available -bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio, - struct fscache_cookie *cookie); -#else -#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \ - filemap_dirty_folio(MAPPING, FOLIO) -#endif - -/** - * fscache_unpin_writeback - Unpin writeback resources - * @wbc: The writeback control - * @cookie: The cookie referring to the cache object - * - * Unpin the writeback resources pinned by fscache_dirty_folio(). This is - * intended to be called by the netfs's ->write_inode() method. - */ -static inline void fscache_unpin_writeback(struct writeback_control *wbc, - struct fscache_cookie *cookie) -{ - if (wbc->unpinned_fscache_wb) - fscache_unuse_cookie(cookie, NULL, NULL); -} - -/** - * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode - * @cookie: The cookie referring to the cache object - * @inode: The inode to clean up - * @aux: Auxiliary data to apply to the inode - * - * Clear any writeback resources held by an inode when the inode is evicted. - * This must be called before clear_inode() is called. - */ -static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, - struct inode *inode, - const void *aux) -{ - if (inode->i_state & I_PINNING_FSCACHE_WB) { - loff_t i_size = i_size_read(inode); - fscache_unuse_cookie(cookie, aux, &i_size); - } -} - /** * fscache_note_page_release - Note that a netfs page got released * @cookie: The cookie corresponding to the file diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c895b12737a1..772f822dc6b8 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -31,7 +31,7 @@ #define FSCRYPT_CONTENTS_ALIGNMENT 16 union fscrypt_policy; -struct fscrypt_info; +struct fscrypt_inode_info; struct fs_parameter; struct seq_file; @@ -59,26 +59,55 @@ struct fscrypt_name { #ifdef CONFIG_FS_ENCRYPTION -/* - * If set, the fscrypt bounce page pool won't be allocated (unless another - * filesystem needs it). Set this if the filesystem always uses its own bounce - * pages for writes and therefore won't need the fscrypt bounce page pool. - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - /* Crypto operations for filesystems */ struct fscrypt_operations { - /* Set of optional flags; see above for allowed flags */ - unsigned int flags; + /* + * If set, then fs/crypto/ will allocate a global bounce page pool the + * first time an encryption key is set up for a file. The bounce page + * pool is required by the following functions: + * + * - fscrypt_encrypt_pagecache_blocks() + * - fscrypt_zeroout_range() for files not using inline crypto + * + * If the filesystem doesn't use those, it doesn't need to set this. + */ + unsigned int needs_bounce_pages : 1; /* - * If set, this is a filesystem-specific key description prefix that - * will be accepted for "logon" keys for v1 fscrypt policies, in - * addition to the generic prefix "fscrypt:". This functionality is - * deprecated, so new filesystems shouldn't set this field. + * If set, then fs/crypto/ will allow the use of encryption settings + * that assume inode numbers fit in 32 bits (i.e. + * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64}), provided that the other + * prerequisites for these settings are also met. This is only useful + * if the filesystem wants to support inline encryption hardware that is + * limited to 32-bit or 64-bit data unit numbers and where programming + * keyslots is very slow. */ - const char *key_prefix; + unsigned int has_32bit_inodes : 1; + + /* + * If set, then fs/crypto/ will allow users to select a crypto data unit + * size that is less than the filesystem block size. This is done via + * the log2_data_unit_size field of the fscrypt policy. This flag is + * not compatible with filesystems that encrypt variable-length blocks + * (i.e. blocks that aren't all equal to filesystem's block size), for + * example as a result of compression. It's also not compatible with + * the fscrypt_encrypt_block_inplace() and + * fscrypt_decrypt_block_inplace() functions. + */ + unsigned int supports_subblock_data_units : 1; + + /* + * This field exists only for backwards compatibility reasons and should + * only be set by the filesystems that are setting it already. It + * contains the filesystem-specific key description prefix that is + * accepted for "logon" keys for v1 fscrypt policies. This + * functionality is deprecated in favor of the generic prefix + * "fscrypt:", which itself is deprecated in favor of the filesystem + * keyring ioctls such as FS_IOC_ADD_ENCRYPTION_KEY. Filesystems that + * are newly adding fscrypt support should not set this field. + */ + const char *legacy_key_prefix; /* * Get the fscrypt context of the given inode. @@ -146,21 +175,6 @@ struct fscrypt_operations { bool (*has_stable_inodes)(struct super_block *sb); /* - * Get the number of bits that the filesystem uses to represent inode - * numbers and file logical block numbers. - * - * By default, both of these are assumed to be 64-bit. This function - * can be implemented to declare that either or both of these numbers is - * shorter, which may allow the use of the - * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of - * inline crypto hardware whose maximum DUN length is less than 64 bits - * (e.g., eMMC v5.2 spec compliant hardware). This function only needs - * to be implemented if support for one of these features is needed. - */ - void (*get_ino_and_lblk_bits)(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret); - - /* * Return an array of pointers to the block devices to which the * filesystem may write encrypted file contents, NULL if the filesystem * only has a single such block device, or an ERR_PTR() on error. @@ -178,7 +192,10 @@ struct fscrypt_operations { unsigned int *num_devs); }; -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); + +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). @@ -206,15 +223,29 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) } /* - * When d_splice_alias() moves a directory's no-key alias to its plaintext alias - * as a result of the encryption key being added, DCACHE_NOKEY_NAME must be - * cleared. Note that we don't have to support arbitrary moves of this flag - * because fscrypt doesn't allow no-key names to be the source or target of a - * rename(). + * When d_splice_alias() moves a directory's no-key alias to its + * plaintext alias as a result of the encryption key being added, + * DCACHE_NOKEY_NAME must be cleared and there might be an opportunity + * to disable d_revalidate. Note that we don't have to support the + * inverse operation because fscrypt doesn't allow no-key names to be + * the source or target of a rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { - dentry->d_flags &= ~DCACHE_NOKEY_NAME; + /* + * VFS calls fscrypt_handle_d_move even for non-fscrypt + * filesystems. + */ + if (dentry->d_flags & DCACHE_NOKEY_NAME) { + dentry->d_flags &= ~DCACHE_NOKEY_NAME; + + /* + * Other filesystem features might be handling dentry + * revalidation, in which case it cannot be disabled. + */ + if (dentry->d_op->d_revalidate == fscrypt_d_revalidate) + dentry->d_flags &= ~DCACHE_OP_REVALIDATE; + } } /** @@ -246,6 +277,35 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) return dentry->d_flags & DCACHE_NOKEY_NAME; } +static inline void fscrypt_prepare_dentry(struct dentry *dentry, + bool is_nokey_name) +{ + /* + * This code tries to only take ->d_lock when necessary to write + * to ->d_flags. We shouldn't be peeking on d_flags for + * DCACHE_OP_REVALIDATE unlocked, but in the unlikely case + * there is a race, the worst it can happen is that we fail to + * unset DCACHE_OP_REVALIDATE and pay the cost of an extra + * d_revalidate. + */ + if (is_nokey_name) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_NOKEY_NAME; + spin_unlock(&dentry->d_lock); + } else if (dentry->d_flags & DCACHE_OP_REVALIDATE && + dentry->d_op->d_revalidate == fscrypt_d_revalidate) { + /* + * Unencrypted dentries and encrypted dentries where the + * key is available are always valid from fscrypt + * perspective. Avoid the cost of calling + * fscrypt_d_revalidate unnecessarily. + */ + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_OP_REVALIDATE; + spin_unlock(&dentry->d_lock); + } +} + /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); @@ -353,7 +413,6 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len); u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ bool fscrypt_decrypt_bio(struct bio *bio); @@ -390,7 +449,8 @@ static inline void fscrypt_set_ops(struct super_block *sb, } #else /* !CONFIG_FS_ENCRYPTION */ -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { return NULL; } @@ -409,6 +469,11 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) return false; } +static inline void fscrypt_prepare_dentry(struct dentry *dentry, + bool is_nokey_name) +{ +} + /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { @@ -868,7 +933,7 @@ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) */ static inline bool fscrypt_has_encryption_key(const struct inode *inode) { - return fscrypt_get_info(inode) != NULL; + return fscrypt_get_inode_info(inode) != NULL; } /** @@ -966,6 +1031,9 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, fname->usr_fname = &dentry->d_name; fname->disk_name.name = (unsigned char *)dentry->d_name.name; fname->disk_name.len = dentry->d_name.len; + + fscrypt_prepare_dentry(dentry, false); + return 0; } diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index ed48e4f1e755..1a9de119a0f7 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,6 +17,12 @@ #include <linux/slab.h> #include <linux/bug.h> +/* Are there any inode/mount/sb objects that are being watched at all? */ +static inline bool fsnotify_sb_has_watchers(struct super_block *sb) +{ + return atomic_long_read(&sb->s_fsnotify_connectors); +} + /* * Notify this @dir inode about a change in a child directory entry. * The directory entry may have turned positive or negative or its inode may @@ -30,7 +36,7 @@ static inline int fsnotify_name(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { - if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(dir->i_sb)) return 0; return fsnotify(mask, data, data_type, dir, name, NULL, cookie); @@ -44,7 +50,7 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, static inline void fsnotify_inode(struct inode *inode, __u32 mask) { - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(inode->i_sb)) return; if (S_ISDIR(inode->i_mode)) @@ -59,7 +65,7 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, { struct inode *inode = d_inode(dentry); - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(inode->i_sb)) return 0; if (S_ISDIR(inode->i_mode)) { @@ -96,35 +102,73 @@ static inline int fsnotify_file(struct file *file, __u32 mask) if (file->f_mode & FMODE_NONOTIFY) return 0; - /* Overlayfs internal files have fake f_path */ - path = file_real_path(file); + path = &file->f_path; return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } -/* Simple call site for access decisions */ -static inline int fsnotify_perm(struct file *file, int mask) +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +/* + * fsnotify_file_area_perm - permission hook before access to file range + */ +static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, + const loff_t *ppos, size_t count) { - int ret; - __u32 fsnotify_mask = 0; + __u32 fsnotify_mask = FS_ACCESS_PERM; + + /* + * filesystem may be modified in the context of permission events + * (e.g. by HSM filling a file on access), so sb freeze protection + * must not be held. + */ + lockdep_assert_once(file_write_not_started(file)); - if (!(mask & (MAY_READ | MAY_OPEN))) + if (!(perm_mask & MAY_READ)) return 0; - if (mask & MAY_OPEN) { - fsnotify_mask = FS_OPEN_PERM; + return fsnotify_file(file, fsnotify_mask); +} + +/* + * fsnotify_file_perm - permission hook before file access + */ +static inline int fsnotify_file_perm(struct file *file, int perm_mask) +{ + return fsnotify_file_area_perm(file, perm_mask, NULL, 0); +} - if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); +/* + * fsnotify_open_perm - permission hook before file open + */ +static inline int fsnotify_open_perm(struct file *file) +{ + int ret; - if (ret) - return ret; - } - } else if (mask & MAY_READ) { - fsnotify_mask = FS_ACCESS_PERM; + if (file->f_flags & __FMODE_EXEC) { + ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); + if (ret) + return ret; } - return fsnotify_file(file, fsnotify_mask); + return fsnotify_file(file, FS_OPEN_PERM); +} + +#else +static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, + const loff_t *ppos, size_t count) +{ + return 0; +} + +static inline int fsnotify_file_perm(struct file *file, int perm_mask) +{ + return 0; +} + +static inline int fsnotify_open_perm(struct file *file) +{ + return 0; } +#endif /* * fsnotify_link_count - inode's link count changed diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d7d96c806bff..8f40c349b228 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -31,8 +31,8 @@ #define FS_ACCESS 0x00000001 /* File was accessed */ #define FS_MODIFY 0x00000002 /* File was modified */ #define FS_ATTRIB 0x00000004 /* Metadata changed */ -#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ -#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FS_CLOSE_WRITE 0x00000008 /* Writable file was closed */ +#define FS_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ #define FS_OPEN 0x00000020 /* File was opened */ #define FS_MOVED_FROM 0x00000040 /* File was moved from X */ #define FS_MOVED_TO 0x00000080 /* File was moved to Y */ @@ -472,10 +472,8 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; struct fsnotify_mark_connector { spinlock_t lock; unsigned short type; /* Type of object [lock] */ -#define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01 #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ - __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { /* Object pointer [lock] */ fsnotify_connp_t *obj; @@ -530,6 +528,8 @@ struct fsnotify_mark { #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 #define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 +#define FSNOTIFY_MARK_FLAG_HAS_FSID 0x0800 +#define FSNOTIFY_MARK_FLAG_WEAK_FSID 0x1000 unsigned int flags; /* flags [mark->lock] */ }; @@ -760,17 +760,13 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, struct fsnotify_group *group); -/* Get cached fsid of filesystem containing object */ -extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, - __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int obj_type, - int add_flags, __kernel_fsid_t *fsid); + int add_flags); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int obj_type, int add_flags, - __kernel_fsid_t *fsid); + unsigned int obj_type, int add_flags); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, @@ -778,15 +774,14 @@ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, int add_flags) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL); + FSNOTIFY_OBJ_TYPE_INODE, add_flags); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags, - NULL); + FSNOTIFY_OBJ_TYPE_INODE, add_flags); } /* given a group and a mark, flag mark to be freed when all references are dropped */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index aad9cf8876b5..54d53f345d14 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -862,13 +862,8 @@ extern int skip_trace(unsigned long ip); extern void ftrace_module_init(struct module *mod); extern void ftrace_module_enable(struct module *mod); extern void ftrace_release_mod(struct module *mod); - -extern void ftrace_disable_daemon(void); -extern void ftrace_enable_daemon(void); #else /* CONFIG_DYNAMIC_FTRACE */ static inline int skip_trace(unsigned long ip) { return 0; } -static inline void ftrace_disable_daemon(void) { } -static inline void ftrace_enable_daemon(void) { } static inline void ftrace_module_init(struct module *mod) { } static inline void ftrace_module_enable(struct module *mod) { } static inline void ftrace_release_mod(struct module *mod) { } @@ -1156,7 +1151,9 @@ static inline void unpause_graph_tracing(void) { } #ifdef CONFIG_TRACING enum ftrace_dump_mode; -extern enum ftrace_dump_mode ftrace_dump_on_oops; +#define MAX_TRACER_SIZE 100 +extern char ftrace_dump_on_oops[]; +extern int ftrace_dump_on_oops_enabled(void); extern int tracepoint_printk; extern void disable_trace_on_warning(void); diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h new file mode 100644 index 000000000000..3ff4c277296f --- /dev/null +++ b/include/linux/fw_table.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * fw_tables.h - Parsing support for ACPI and ACPI-like tables provided by + * platform or device firmware + * + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2023 Intel Corp. + */ +#ifndef _FW_TABLE_H_ +#define _FW_TABLE_H_ + +union acpi_subtable_headers; + +typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, + const unsigned long end); + +typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, + void *arg, const unsigned long end); + +struct acpi_subtable_proc { + int id; + acpi_tbl_entry_handler handler; + acpi_tbl_entry_handler_arg handler_arg; + void *arg; + int count; +}; + +union fw_table_header { + struct acpi_table_header acpi; + struct acpi_table_cdat cdat; +}; + +union acpi_subtable_headers { + struct acpi_subtable_header common; + struct acpi_hmat_structure hmat; + struct acpi_prmt_module_header prmt; + struct acpi_cedt_header cedt; + struct acpi_cdat_header cdat; +}; + +int acpi_parse_entries_array(char *id, unsigned long table_size, + union fw_table_header *table_header, + unsigned long max_length, + struct acpi_subtable_proc *proc, + int proc_num, unsigned int max_entries); + +int cdat_table_parse(enum acpi_cdat_type type, + acpi_tbl_entry_handler_arg handler_arg, void *arg, + struct acpi_table_cdat *table_header, + unsigned long length); + +/* CXL is the only non-ACPI consumer of the FIRMWARE_TABLE library */ +#if IS_ENABLED(CONFIG_ACPI) && !IS_ENABLED(CONFIG_CXL_BUS) +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_fwtbl_lib __init_or_acpilib +#else +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_NS_GPL(x, CXL) +#define __init_or_fwtbl_lib +#endif + +#endif diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 5700451b300f..0d79070c5a70 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -9,10 +9,16 @@ #ifndef _LINUX_FWNODE_H_ #define _LINUX_FWNODE_H_ -#include <linux/types.h> -#include <linux/list.h> #include <linux/bits.h> #include <linux/err.h> +#include <linux/list.h> +#include <linux/types.h> + +enum dev_dma_attr { + DEV_DMA_NOT_SUPPORTED, + DEV_DMA_NON_COHERENT, + DEV_DMA_COHERENT, +}; struct fwnode_operations; struct device; @@ -41,6 +47,8 @@ struct device; struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; + + /* The below is used solely by device links, don't use otherwise */ struct device *dev; struct list_head suppliers; struct list_head consumers; @@ -51,8 +59,10 @@ struct fwnode_handle { * fwnode link flags * * CYCLE: The fwnode link is part of a cycle. Don't defer probe. + * IGNORE: Completely ignore this link, even during cycle detection. */ #define FWLINK_FLAG_CYCLE BIT(0) +#define FWLINK_FLAG_IGNORE BIT(1) struct fwnode_link { struct fwnode_handle *supplier; @@ -185,7 +195,6 @@ struct fwnode_operations { if (fwnode_has_op(fwnode, op)) \ (fwnode)->ops->op(fwnode, ## __VA_ARGS__); \ } while (false) -#define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) static inline void fwnode_init(struct fwnode_handle *fwnode, const struct fwnode_operations *ops) @@ -207,9 +216,10 @@ static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode, fwnode->flags &= ~FWNODE_FLAG_INITIALIZED; } -extern bool fw_devlink_is_strict(void); -int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); +int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, + u8 flags); void fwnode_links_purge(struct fwnode_handle *fwnode); void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); +bool fw_devlink_is_strict(void); #endif diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 107613f7d792..f3512fddf3d7 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -5,7 +5,7 @@ * DOC: Generic radix trees/sparse arrays * * Very simple and minimalistic, supporting arbitrary size entries up to - * PAGE_SIZE. + * GENRADIX_NODE_SIZE. * * A genradix is defined with the type it will store, like so: * @@ -38,18 +38,22 @@ #include <asm/page.h> #include <linux/bug.h> +#include <linux/limits.h> #include <linux/log2.h> #include <linux/math.h> #include <linux/types.h> struct genradix_root; +#define GENRADIX_NODE_SHIFT 9 +#define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT) + struct __genradix { struct genradix_root *root; }; /* - * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE: + * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE: */ #define __GENRADIX_INITIALIZER \ @@ -100,14 +104,14 @@ void __genradix_free(struct __genradix *); static inline size_t __idx_to_offset(size_t idx, size_t obj_size) { if (__builtin_constant_p(obj_size)) - BUILD_BUG_ON(obj_size > PAGE_SIZE); + BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE); else - BUG_ON(obj_size > PAGE_SIZE); + BUG_ON(obj_size > GENRADIX_NODE_SIZE); if (!is_power_of_2(obj_size)) { - size_t objs_per_page = PAGE_SIZE / obj_size; + size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size; - return (idx / objs_per_page) * PAGE_SIZE + + return (idx / objs_per_page) * GENRADIX_NODE_SIZE + (idx % objs_per_page) * obj_size; } else { return idx * obj_size; @@ -116,6 +120,11 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) +#define __genradix_objs_per_page(_radix) \ + (GENRADIX_NODE_SIZE / sizeof((_radix)->type[0])) +#define __genradix_page_remainder(_radix) \ + (GENRADIX_NODE_SIZE % sizeof((_radix)->type[0])) + #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) @@ -179,16 +188,40 @@ void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); #define genradix_iter_peek(_iter, _radix) \ (__genradix_cast(_radix) \ __genradix_iter_peek(_iter, &(_radix)->tree, \ - PAGE_SIZE / __genradix_obj_size(_radix))) + __genradix_objs_per_page(_radix))) + +void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *, + size_t, size_t); + +/** + * genradix_iter_peek_prev - get first entry at or below iterator's current + * position + * @_iter: a genradix_iter + * @_radix: genradix being iterated over + * + * If no more entries exist at or below @_iter's current position, returns NULL + */ +#define genradix_iter_peek_prev(_iter, _radix) \ + (__genradix_cast(_radix) \ + __genradix_iter_peek_prev(_iter, &(_radix)->tree, \ + __genradix_objs_per_page(_radix), \ + __genradix_obj_size(_radix) + \ + __genradix_page_remainder(_radix))) static inline void __genradix_iter_advance(struct genradix_iter *iter, size_t obj_size) { + if (iter->offset + obj_size < iter->offset) { + iter->offset = SIZE_MAX; + iter->pos = SIZE_MAX; + return; + } + iter->offset += obj_size; if (!is_power_of_2(obj_size) && - (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE) - iter->offset = round_up(iter->offset, PAGE_SIZE); + (iter->offset & (GENRADIX_NODE_SIZE - 1)) + obj_size > GENRADIX_NODE_SIZE) + iter->offset = round_up(iter->offset, GENRADIX_NODE_SIZE); iter->pos++; } @@ -196,6 +229,25 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, #define genradix_iter_advance(_iter, _radix) \ __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) +static inline void __genradix_iter_rewind(struct genradix_iter *iter, + size_t obj_size) +{ + if (iter->offset == 0 || + iter->offset == SIZE_MAX) { + iter->offset = SIZE_MAX; + return; + } + + if ((iter->offset & (GENRADIX_NODE_SIZE - 1)) == 0) + iter->offset -= GENRADIX_NODE_SIZE % obj_size; + + iter->offset -= obj_size; + iter->pos--; +} + +#define genradix_iter_rewind(_iter, _radix) \ + __genradix_iter_rewind(_iter, __genradix_obj_size(_radix)) + #define genradix_for_each_from(_radix, _iter, _p, _start) \ for (_iter = genradix_iter_init(_radix, _start); \ (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ @@ -213,6 +265,23 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, #define genradix_for_each(_radix, _iter, _p) \ genradix_for_each_from(_radix, _iter, _p, 0) +#define genradix_last_pos(_radix) \ + (SIZE_MAX / GENRADIX_NODE_SIZE * __genradix_objs_per_page(_radix) - 1) + +/** + * genradix_for_each_reverse - iterate over entry in a genradix, reverse order + * @_radix: genradix to iterate over + * @_iter: a genradix_iter to track current position + * @_p: pointer to genradix entry type + * + * On every iteration, @_p will point to the current entry, and @_iter.pos + * will be the current entry's index. + */ +#define genradix_for_each_reverse(_radix, _iter, _p) \ + for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\ + (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\ + genradix_iter_rewind(&_iter, _radix)) + int __genradix_prealloc(struct __genradix *, size_t, gfp_t); /** diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 2984b0cb24b1..d4da060b7532 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -2,6 +2,7 @@ #ifndef GENL_MAGIC_FUNC_H #define GENL_MAGIC_FUNC_H +#include <linux/args.h> #include <linux/build_bug.h> #include <linux/genl_magic_struct.h> @@ -23,7 +24,7 @@ #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ [tag_name] = { .type = NLA_NESTED }, -static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { +static struct nla_policy CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { #include GENL_MAGIC_INCLUDE_FILE }; @@ -209,7 +210,7 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ * Magic: define op number to op name mapping {{{1 * {{{2 */ -static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) +static const char *CONCATENATE(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) { switch (cmd) { #undef GENL_op @@ -235,7 +236,7 @@ static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) .cmd = op_name, \ }, -#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) +#define ZZZ_genl_ops CONCATENATE(GENL_MAGIC_FAMILY, _genl_ops) static struct genl_ops ZZZ_genl_ops[] __read_mostly = { #include GENL_MAGIC_INCLUDE_FILE }; @@ -248,32 +249,32 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { * and provide register/unregister functions. * {{{2 */ -#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) +#define ZZZ_genl_family CONCATENATE(GENL_MAGIC_FAMILY, _genl_family) static struct genl_family ZZZ_genl_family; /* * Magic: define multicast groups * Magic: define multicast group registration helper */ -#define ZZZ_genl_mcgrps CONCAT_(GENL_MAGIC_FAMILY, _genl_mcgrps) +#define ZZZ_genl_mcgrps CONCATENATE(GENL_MAGIC_FAMILY, _genl_mcgrps) static const struct genl_multicast_group ZZZ_genl_mcgrps[] = { #undef GENL_mc_group #define GENL_mc_group(group) { .name = #group, }, #include GENL_MAGIC_INCLUDE_FILE }; -enum CONCAT_(GENL_MAGIC_FAMILY, group_ids) { +enum CONCATENATE(GENL_MAGIC_FAMILY, group_ids) { #undef GENL_mc_group -#define GENL_mc_group(group) CONCAT_(GENL_MAGIC_FAMILY, _group_ ## group), +#define GENL_mc_group(group) CONCATENATE(GENL_MAGIC_FAMILY, _group_ ## group), #include GENL_MAGIC_INCLUDE_FILE }; #undef GENL_mc_group #define GENL_mc_group(group) \ -static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ +static int CONCATENATE(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ struct sk_buff *skb, gfp_t flags) \ { \ unsigned int group_id = \ - CONCAT_(GENL_MAGIC_FAMILY, _group_ ## group); \ + CONCATENATE(GENL_MAGIC_FAMILY, _group_ ## group); \ return genlmsg_multicast(&ZZZ_genl_family, skb, 0, \ group_id, flags); \ } @@ -289,8 +290,8 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { #ifdef GENL_MAGIC_FAMILY_HDRSZ .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), #endif - .maxattr = ARRAY_SIZE(CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy))-1, - .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), + .maxattr = ARRAY_SIZE(CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy))-1, + .policy = CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy), .ops = ZZZ_genl_ops, .n_ops = ARRAY_SIZE(ZZZ_genl_ops), .mcgrps = ZZZ_genl_mcgrps, @@ -299,12 +300,12 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { .module = THIS_MODULE, }; -int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) +int CONCATENATE(GENL_MAGIC_FAMILY, _genl_register)(void) { return genl_register_family(&ZZZ_genl_family); } -void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) +void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void) { genl_unregister_family(&ZZZ_genl_family); } diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index f81d48987528..a419d93789ff 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -14,14 +14,12 @@ # error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" #endif +#include <linux/args.h> #include <linux/genetlink.h> #include <linux/types.h> -#define CONCAT__(a,b) a ## b -#define CONCAT_(a,b) CONCAT__(a,b) - -extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); -extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); +extern int CONCATENATE(GENL_MAGIC_FAMILY, _genl_register)(void); +extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void); /* * Extension of genl attribute validation policies {{{2 diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 665f06675c83..c775ea3c6015 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -8,6 +8,7 @@ #include <linux/topology.h> struct vm_area_struct; +struct mempolicy; /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -262,7 +263,9 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); -struct folio *folio_alloc(gfp_t gfp, unsigned order); +struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx, int nid); +struct folio *folio_alloc(gfp_t gfp, unsigned int order); struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); #else @@ -270,6 +273,11 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node(numa_node_id(), gfp_mask, order); } +static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx, int nid) +{ + return alloc_pages(gfp, order); +} static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); @@ -303,15 +311,23 @@ extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; +void page_frag_cache_drain(struct page_frag_cache *nc); extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} static inline void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); } extern void page_frag_free(void *addr); @@ -320,11 +336,13 @@ extern void page_frag_free(void *addr); #define free_page(addr) free_pages((addr), 0) void page_alloc_init_cpuhp(void); +int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); void page_alloc_init_late(void); +void setup_pcp_cacheinfo(unsigned int cpu); /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what @@ -343,6 +361,15 @@ static inline bool gfp_has_io_fs(gfp_t gfp) return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS); } +/* + * Check if the gfp flags allow compaction - GFP_NOIO is a really + * tricky context because the migration might require IO. + */ +static inline bool gfp_compaction_allowed(gfp_t gfp_mask) +{ + return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO); +} + extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_CONTIG_ALLOC diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 6583a58670c5..13becafe41df 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -2,6 +2,8 @@ #ifndef __LINUX_GFP_TYPES_H #define __LINUX_GFP_TYPES_H +#include <linux/bits.h> + /* The typedef is in types.h but we want the documentation here */ #if 0 /** @@ -21,44 +23,78 @@ typedef unsigned int __bitwise gfp_t; * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c */ +enum { + ___GFP_DMA_BIT, + ___GFP_HIGHMEM_BIT, + ___GFP_DMA32_BIT, + ___GFP_MOVABLE_BIT, + ___GFP_RECLAIMABLE_BIT, + ___GFP_HIGH_BIT, + ___GFP_IO_BIT, + ___GFP_FS_BIT, + ___GFP_ZERO_BIT, + ___GFP_UNUSED_BIT, /* 0x200u unused */ + ___GFP_DIRECT_RECLAIM_BIT, + ___GFP_KSWAPD_RECLAIM_BIT, + ___GFP_WRITE_BIT, + ___GFP_NOWARN_BIT, + ___GFP_RETRY_MAYFAIL_BIT, + ___GFP_NOFAIL_BIT, + ___GFP_NORETRY_BIT, + ___GFP_MEMALLOC_BIT, + ___GFP_COMP_BIT, + ___GFP_NOMEMALLOC_BIT, + ___GFP_HARDWALL_BIT, + ___GFP_THISNODE_BIT, + ___GFP_ACCOUNT_BIT, + ___GFP_ZEROTAGS_BIT, +#ifdef CONFIG_KASAN_HW_TAGS + ___GFP_SKIP_ZERO_BIT, + ___GFP_SKIP_KASAN_BIT, +#endif +#ifdef CONFIG_LOCKDEP + ___GFP_NOLOCKDEP_BIT, +#endif + ___GFP_LAST_BIT +}; + /* Plain integer GFP bitmasks. Do not use this directly. */ -#define ___GFP_DMA 0x01u -#define ___GFP_HIGHMEM 0x02u -#define ___GFP_DMA32 0x04u -#define ___GFP_MOVABLE 0x08u -#define ___GFP_RECLAIMABLE 0x10u -#define ___GFP_HIGH 0x20u -#define ___GFP_IO 0x40u -#define ___GFP_FS 0x80u -#define ___GFP_ZERO 0x100u +#define ___GFP_DMA BIT(___GFP_DMA_BIT) +#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT) +#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT) +#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT) +#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT) +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define ___GFP_IO BIT(___GFP_IO_BIT) +#define ___GFP_FS BIT(___GFP_FS_BIT) +#define ___GFP_ZERO BIT(___GFP_ZERO_BIT) /* 0x200u unused */ -#define ___GFP_DIRECT_RECLAIM 0x400u -#define ___GFP_KSWAPD_RECLAIM 0x800u -#define ___GFP_WRITE 0x1000u -#define ___GFP_NOWARN 0x2000u -#define ___GFP_RETRY_MAYFAIL 0x4000u -#define ___GFP_NOFAIL 0x8000u -#define ___GFP_NORETRY 0x10000u -#define ___GFP_MEMALLOC 0x20000u -#define ___GFP_COMP 0x40000u -#define ___GFP_NOMEMALLOC 0x80000u -#define ___GFP_HARDWALL 0x100000u -#define ___GFP_THISNODE 0x200000u -#define ___GFP_ACCOUNT 0x400000u -#define ___GFP_ZEROTAGS 0x800000u +#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define ___GFP_WRITE BIT(___GFP_WRITE_BIT) +#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT) +#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT) +#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT) +#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT) +#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT) +#define ___GFP_COMP BIT(___GFP_COMP_BIT) +#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT) +#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT) +#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT) +#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT) +#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT) #ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_ZERO 0x1000000u -#define ___GFP_SKIP_KASAN 0x2000000u +#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT) +#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT) #else #define ___GFP_SKIP_ZERO 0 #define ___GFP_SKIP_KASAN 0 #endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x4000000u +#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT) #else #define ___GFP_NOLOCKDEP 0 #endif -/* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* * Physical address zone modifiers (see linux/mmzone.h - low four bits) @@ -162,25 +198,25 @@ typedef unsigned int __bitwise gfp_t; * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. * * The default allocator behavior depends on the request size. We have a concept - * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). + * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). * !costly allocations are too essential to fail so they are implicitly * non-failing by default (with some exceptions like OOM victims might fail so * the caller still has to check for failures) while costly requests try to be * not disruptive and back off even without invoking the OOM killer. * The following three modifiers might be used to override some of these - * implicit rules + * implicit rules. * * %__GFP_NORETRY: The VM implementation will try only very lightweight * memory direct reclaim to get some memory under memory pressure (thus * it can sleep). It will avoid disruptive actions like OOM killer. The * caller must handle the failure which is quite likely to happen under * heavy memory pressure. The flag is suitable when failure can easily be - * handled at small cost, such as reduced throughput + * handled at small cost, such as reduced throughput. * * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim * procedures that have previously failed if there is some indication - * that progress has been made else where. It can wait for other - * tasks to attempt high level approaches to freeing memory such as + * that progress has been made elsewhere. It can wait for other + * tasks to attempt high-level approaches to freeing memory such as * compaction (which removes fragmentation) and page-out. * There is still a definite limit to the number of retries, but it is * a larger limit than with %__GFP_NORETRY. @@ -230,7 +266,7 @@ typedef unsigned int __bitwise gfp_t; * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting * memory tags at the same time as zeroing memory has minimal additional - * performace impact. + * performance impact. * * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. * Used for userspace and vmalloc pages; the latter are unpoisoned by @@ -249,7 +285,7 @@ typedef unsigned int __bitwise gfp_t; #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT ___GFP_LAST_BIT #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** @@ -274,7 +310,8 @@ typedef unsigned int __bitwise gfp_t; * accounted to kmemcg. * * %GFP_NOWAIT is for kernel allocations that should not stall for direct - * reclaim, start physical IO or use any filesystem callback. + * reclaim, start physical IO or use any filesystem callback. It is very + * likely to fail to allocate memory, even for very small allocations. * * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages * that do not require the starting of any physical IO. @@ -325,7 +362,7 @@ typedef unsigned int __bitwise gfp_t; #define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) -#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) #define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 1c4385a00f88..db2dfbae8edb 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -159,7 +159,6 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, int gpiod_set_config(struct gpio_desc *desc, unsigned long config); int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce); -int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); void gpiod_toggle_active_low(struct gpio_desc *desc); int gpiod_is_active_low(const struct gpio_desc *desc); @@ -494,13 +493,6 @@ static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int deboun return -ENOSYS; } -static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) -{ - /* GPIO can never have been requested */ - WARN_ON(desc); - return -ENOSYS; -} - static inline void gpiod_toggle_active_low(struct gpio_desc *desc) { /* GPIO can never have been requested */ @@ -614,8 +606,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev); int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); -struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label); - #else /* CONFIG_GPIOLIB && CONFIG_ACPI */ #include <linux/err.h> @@ -633,12 +623,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, return -ENXIO; } -static inline struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, - char *label) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4f0c5d62c8f3..f8617eaf08ba 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -3,6 +3,8 @@ #define __LINUX_GPIO_DRIVER_H #include <linux/bits.h> +#include <linux/cleanup.h> +#include <linux/err.h> #include <linux/irqchip/chained_irq.h> #include <linux/irqdomain.h> #include <linux/irqhandler.h> @@ -333,10 +335,12 @@ struct gpio_irq_chip { * (same as GPIO_LINE_DIRECTION_OUT / GPIO_LINE_DIRECTION_IN), * or negative error. It is recommended to always implement this * function, even on input-only or output-only gpio chips. - * @direction_input: configures signal "offset" as input, or returns error - * This can be omitted on input-only or output-only gpio chips. - * @direction_output: configures signal "offset" as output, or returns error - * This can be omitted on input-only or output-only gpio chips. + * @direction_input: configures signal "offset" as input, returns 0 on success + * or a negative error number. This can be omitted on input-only or + * output-only gpio chips. + * @direction_output: configures signal "offset" as output, returns 0 on + * success or a negative error number. This can be omitted on input-only + * or output-only gpio chips. * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error @@ -529,29 +533,64 @@ struct gpio_chip { #endif /* CONFIG_OF_GPIO */ }; -extern const char *gpiochip_is_requested(struct gpio_chip *gc, - unsigned int offset); +char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset); + + +struct _gpiochip_for_each_data { + const char **label; + unsigned int *i; +}; + +DEFINE_CLASS(_gpiochip_for_each_data, + struct _gpiochip_for_each_data, + if (*_T.label) kfree(*_T.label), + ({ + struct _gpiochip_for_each_data _data = { label, i }; + *_data.i = 0; + _data; + }), + const char **label, int *i) + +/** + * for_each_hwgpio - Iterates over all GPIOs for given chip. + * @_chip: Chip to iterate over. + * @_i: Loop counter. + * @_label: Place to store the address of the label if the GPIO is requested. + * Set to NULL for unused GPIOs. + */ +#define for_each_hwgpio(_chip, _i, _label) \ + for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ + *_data.i < _chip->ngpio; \ + (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ + if (IS_ERR(*_data.label = \ + gpiochip_dup_line_label(_chip, *_data.i))) {} \ + else /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range - * @chip: the chip to query - * @i: loop variable - * @base: first GPIO in the range - * @size: amount of GPIOs to check starting from @base - * @label: label of current GPIO + * @_chip: the chip to query + * @_i: loop variable + * @_base: first GPIO in the range + * @_size: amount of GPIOs to check starting from @base + * @_label: label of current GPIO */ -#define for_each_requested_gpio_in_range(chip, i, base, size, label) \ - for (i = 0; i < size; i++) \ - if ((label = gpiochip_is_requested(chip, base + i)) == NULL) {} else +#define for_each_requested_gpio_in_range(_chip, _i, _base, _size, _label) \ + for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ + *_data.i < _size; \ + (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ + if ((*_data.label = \ + gpiochip_dup_line_label(_chip, _base + *_data.i)) == NULL) {} \ + else if (IS_ERR(*_data.label)) {} \ + else /* Iterates over all requested GPIO of the given @chip */ #define for_each_requested_gpio(chip, i, label) \ for_each_requested_gpio_in_range(chip, i, 0, chip->ngpio, label) /* add/remove chips */ -extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -599,13 +638,22 @@ static inline int gpiochip_add(struct gpio_chip *gc) { return gpiochip_add_data(gc, NULL); } -extern void gpiochip_remove(struct gpio_chip *gc); -extern int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +void gpiochip_remove(struct gpio_chip *gc); +int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, + void *data, struct lock_class_key *lock_key, + struct lock_class_key *request_key); + +struct gpio_device *gpio_device_find(const void *data, + int (*match)(struct gpio_chip *gc, + const void *data)); -extern struct gpio_chip *gpiochip_find(void *data, - int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_get(struct gpio_device *gdev); +void gpio_device_put(struct gpio_device *gdev); + +DEFINE_FREE(gpio_device_put, struct gpio_device *, + if (!IS_ERR_OR_NULL(_T)) gpio_device_put(_T)) + +struct device *gpio_device_to_device(struct gpio_device *gdev); bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); @@ -672,25 +720,12 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #define BGPIOF_NO_SET_ON_INPUT BIT(6) -int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq); -void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq); - -int gpiochip_irq_domain_activate(struct irq_domain *domain, - struct irq_data *data, bool reserve); -void gpiochip_irq_domain_deactivate(struct irq_domain *domain, - struct irq_data *data); - -bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, - unsigned int offset); - #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); #else #include <asm/bug.h> -#include <asm/errno.h> static inline int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain) @@ -758,18 +793,29 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); +struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); +struct gpio_desc * +gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); + +struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev); + #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); - struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); +struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); -#else /* CONFIG_GPIOLIB */ +/* struct gpio_device getters */ +int gpio_device_get_base(struct gpio_device *gdev); +const char *gpio_device_get_label(struct gpio_device *gdev); -#include <linux/err.h> +struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); + +#else /* CONFIG_GPIOLIB */ #include <asm/bug.h> @@ -780,6 +826,36 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) +{ + WARN_ON(1); + return ERR_PTR(-ENODEV); +} + +static inline int gpio_device_get_base(struct gpio_device *gdev) +{ + WARN_ON(1); + return -ENODEV; +} + +static inline const char *gpio_device_get_label(struct gpio_device *gdev) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_label(const char *label) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h new file mode 100644 index 000000000000..b5a84864650d --- /dev/null +++ b/include/linux/gpio/gpio-nomadik.h @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GPIO_NOMADIK_H +#define __LINUX_GPIO_NOMADIK_H + +struct fwnode_handle; + +/* Package definitions */ +#define PINCTRL_NMK_STN8815 0 +#define PINCTRL_NMK_DB8500 1 + +#define GPIO_BLOCK_SHIFT 5 +#define NMK_GPIO_PER_CHIP BIT(GPIO_BLOCK_SHIFT) +#define NMK_MAX_BANKS DIV_ROUND_UP(512, NMK_GPIO_PER_CHIP) + +/* Register in the logic block */ +#define NMK_GPIO_DAT 0x00 +#define NMK_GPIO_DATS 0x04 +#define NMK_GPIO_DATC 0x08 +#define NMK_GPIO_PDIS 0x0c +#define NMK_GPIO_DIR 0x10 +#define NMK_GPIO_DIRS 0x14 +#define NMK_GPIO_DIRC 0x18 +#define NMK_GPIO_SLPC 0x1c +#define NMK_GPIO_AFSLA 0x20 +#define NMK_GPIO_AFSLB 0x24 +#define NMK_GPIO_LOWEMI 0x28 + +#define NMK_GPIO_RIMSC 0x40 +#define NMK_GPIO_FIMSC 0x44 +#define NMK_GPIO_IS 0x48 +#define NMK_GPIO_IC 0x4c +#define NMK_GPIO_RWIMSC 0x50 +#define NMK_GPIO_FWIMSC 0x54 +#define NMK_GPIO_WKS 0x58 +/* These appear in DB8540 and later ASICs */ +#define NMK_GPIO_EDGELEVEL 0x5C +#define NMK_GPIO_LEVEL 0x60 + +/* Pull up/down values */ +enum nmk_gpio_pull { + NMK_GPIO_PULL_NONE, + NMK_GPIO_PULL_UP, + NMK_GPIO_PULL_DOWN, +}; + +/* Sleep mode */ +enum nmk_gpio_slpm { + NMK_GPIO_SLPM_INPUT, + NMK_GPIO_SLPM_WAKEUP_ENABLE = NMK_GPIO_SLPM_INPUT, + NMK_GPIO_SLPM_NOCHANGE, + NMK_GPIO_SLPM_WAKEUP_DISABLE = NMK_GPIO_SLPM_NOCHANGE, +}; + +struct nmk_gpio_chip { + struct gpio_chip chip; + void __iomem *addr; + struct clk *clk; + unsigned int bank; + void (*set_ioforce)(bool enable); + spinlock_t lock; + bool sleepmode; + bool is_mobileye_soc; + /* Keep track of configured edges */ + u32 edge_rising; + u32 edge_falling; + u32 real_wake; + u32 rwimsc; + u32 fwimsc; + u32 rimsc; + u32 fimsc; + u32 pull_up; + u32 lowemi; +}; + +/* Alternate functions: function C is set in hw by setting both A and B */ +#define NMK_GPIO_ALT_GPIO 0 +#define NMK_GPIO_ALT_A 1 +#define NMK_GPIO_ALT_B 2 +#define NMK_GPIO_ALT_C (NMK_GPIO_ALT_A | NMK_GPIO_ALT_B) + +#define NMK_GPIO_ALT_CX_SHIFT 2 +#define NMK_GPIO_ALT_C1 ((1<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C2 ((2<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C3 ((3<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C4 ((4<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) + +#define PRCM_GPIOCR_ALTCX(pin_num,\ + altc1_used, altc1_ri, altc1_cb,\ + altc2_used, altc2_ri, altc2_cb,\ + altc3_used, altc3_ri, altc3_cb,\ + altc4_used, altc4_ri, altc4_cb)\ +{\ + .pin = pin_num,\ + .altcx[PRCM_IDX_GPIOCR_ALTC1] = {\ + .used = altc1_used,\ + .reg_index = altc1_ri,\ + .control_bit = altc1_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC2] = {\ + .used = altc2_used,\ + .reg_index = altc2_ri,\ + .control_bit = altc2_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC3] = {\ + .used = altc3_used,\ + .reg_index = altc3_ri,\ + .control_bit = altc3_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC4] = {\ + .used = altc4_used,\ + .reg_index = altc4_ri,\ + .control_bit = altc4_cb\ + },\ +} + +/** + * enum prcm_gpiocr_reg_index + * Used to reference an PRCM GPIOCR register address. + */ +enum prcm_gpiocr_reg_index { + PRCM_IDX_GPIOCR1, + PRCM_IDX_GPIOCR2, + PRCM_IDX_GPIOCR3 +}; +/** + * enum prcm_gpiocr_altcx_index + * Used to reference an Other alternate-C function. + */ +enum prcm_gpiocr_altcx_index { + PRCM_IDX_GPIOCR_ALTC1, + PRCM_IDX_GPIOCR_ALTC2, + PRCM_IDX_GPIOCR_ALTC3, + PRCM_IDX_GPIOCR_ALTC4, + PRCM_IDX_GPIOCR_ALTC_MAX, +}; + +/** + * struct prcm_gpio_altcx - Other alternate-C function + * @used: other alternate-C function availability + * @reg_index: PRCM GPIOCR register index used to control the function + * @control_bit: PRCM GPIOCR bit used to control the function + */ +struct prcm_gpiocr_altcx { + bool used:1; + u8 reg_index:2; + u8 control_bit:5; +} __packed; + +/** + * struct prcm_gpio_altcx_pin_desc - Other alternate-C pin + * @pin: The pin number + * @altcx: array of other alternate-C[1-4] functions + */ +struct prcm_gpiocr_altcx_pin_desc { + unsigned short pin; + struct prcm_gpiocr_altcx altcx[PRCM_IDX_GPIOCR_ALTC_MAX]; +}; + +/** + * struct nmk_function - Nomadik pinctrl mux function + * @name: The name of the function, exported to pinctrl core. + * @groups: An array of pin groups that may select this function. + * @ngroups: The number of entries in @groups. + */ +struct nmk_function { + const char *name; + const char * const *groups; + unsigned int ngroups; +}; + +/** + * struct nmk_pingroup - describes a Nomadik pin group + * @grp: Generic data of the pin group (name and pins) + * @altsetting: the altsetting to apply to all pins in this group to + * configure them to be used by a function + */ +struct nmk_pingroup { + struct pingroup grp; + int altsetting; +}; + +#define NMK_PIN_GROUP(a, b) \ + { \ + .grp = PINCTRL_PINGROUP(#a, a##_pins, ARRAY_SIZE(a##_pins)), \ + .altsetting = b, \ + } + +/** + * struct nmk_pinctrl_soc_data - Nomadik pin controller per-SoC configuration + * @pins: An array describing all pins the pin controller affects. + * All pins which are also GPIOs must be listed first within the + * array, and be numbered identically to the GPIO controller's + * numbering. + * @npins: The number of entries in @pins. + * @functions: The functions supported on this SoC. + * @nfunction: The number of entries in @functions. + * @groups: An array describing all pin groups the pin SoC supports. + * @ngroups: The number of entries in @groups. + * @altcx_pins: The pins that support Other alternate-C function on this SoC + * @npins_altcx: The number of Other alternate-C pins + * @prcm_gpiocr_registers: The array of PRCM GPIOCR registers on this SoC + */ +struct nmk_pinctrl_soc_data { + const struct pinctrl_pin_desc *pins; + unsigned int npins; + const struct nmk_function *functions; + unsigned int nfunctions; + const struct nmk_pingroup *groups; + unsigned int ngroups; + const struct prcm_gpiocr_altcx_pin_desc *altcx_pins; + unsigned int npins_altcx; + const u16 *prcm_gpiocr_registers; +}; + +#ifdef CONFIG_PINCTRL_STN8815 + +void nmk_pinctrl_stn8815_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_stn8815_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +#ifdef CONFIG_PINCTRL_DB8500 + +void nmk_pinctrl_db8500_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_db8500_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +#ifdef CONFIG_PINCTRL_DB8540 + +void nmk_pinctrl_db8540_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_db8540_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +struct platform_device; + +#ifdef CONFIG_DEBUG_FS + +/* + * Symbols declared in gpio-nomadik used by pinctrl-nomadik. If pinctrl-nomadik + * is enabled, then gpio-nomadik is enabled as well; the reverse if not always + * true. + */ +void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev, + struct gpio_chip *chip, unsigned int offset, + unsigned int gpio); + +#else + +static inline void nmk_gpio_dbg_show_one(struct seq_file *s, + struct pinctrl_dev *pctldev, + struct gpio_chip *chip, + unsigned int offset, + unsigned int gpio) +{ +} + +#endif + +void __nmk_gpio_make_output(struct nmk_gpio_chip *nmk_chip, + unsigned int offset, int val); +void __nmk_gpio_set_slpm(struct nmk_gpio_chip *nmk_chip, unsigned int offset, + enum nmk_gpio_slpm mode); +struct nmk_gpio_chip *nmk_gpio_populate_chip(struct fwnode_handle *fwnode, + struct platform_device *pdev); + +/* Symbols declared in pinctrl-nomadik used by gpio-nomadik. */ +#ifdef CONFIG_PINCTRL_NOMADIK +extern struct nmk_gpio_chip *nmk_gpio_chips[NMK_MAX_BANKS]; +extern spinlock_t nmk_gpio_slpm_lock; +int __maybe_unused nmk_prcm_gpiocr_get_mode(struct pinctrl_dev *pctldev, + int gpio); +#endif + +#endif /* __LINUX_GPIO_NOMADIK_H */ diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h index 6c75c8bd44a0..1a14e239221f 100644 --- a/include/linux/gpio/property.h +++ b/include/linux/gpio/property.h @@ -2,7 +2,6 @@ #ifndef __LINUX_GPIO_PROPERTY_H #define __LINUX_GPIO_PROPERTY_H -#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */ #include <linux/property.h> #define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \ diff --git a/include/linux/greybus.h b/include/linux/greybus.h index 18c0fb958b74..634c9511cf78 100644 --- a/include/linux/greybus.h +++ b/include/linux/greybus.h @@ -104,44 +104,14 @@ void gb_debugfs_init(void); void gb_debugfs_cleanup(void); struct dentry *gb_debugfs_get(void); -extern struct bus_type greybus_bus_type; - -extern struct device_type greybus_hd_type; -extern struct device_type greybus_module_type; -extern struct device_type greybus_interface_type; -extern struct device_type greybus_control_type; -extern struct device_type greybus_bundle_type; -extern struct device_type greybus_svc_type; - -static inline int is_gb_host_device(const struct device *dev) -{ - return dev->type == &greybus_hd_type; -} - -static inline int is_gb_module(const struct device *dev) -{ - return dev->type == &greybus_module_type; -} - -static inline int is_gb_interface(const struct device *dev) -{ - return dev->type == &greybus_interface_type; -} - -static inline int is_gb_control(const struct device *dev) -{ - return dev->type == &greybus_control_type; -} - -static inline int is_gb_bundle(const struct device *dev) -{ - return dev->type == &greybus_bundle_type; -} - -static inline int is_gb_svc(const struct device *dev) -{ - return dev->type == &greybus_svc_type; -} +extern const struct bus_type greybus_bus_type; + +extern const struct device_type greybus_hd_type; +extern const struct device_type greybus_module_type; +extern const struct device_type greybus_interface_type; +extern const struct device_type greybus_control_type; +extern const struct device_type greybus_bundle_type; +extern const struct device_type greybus_svc_type; static inline bool cport_id_valid(struct gb_host_device *hd, u16 cport_id) { diff --git a/include/linux/greybus/greybus_protocols.h b/include/linux/greybus/greybus_protocols.h index aeb8f9243545..820134b0105c 100644 --- a/include/linux/greybus/greybus_protocols.h +++ b/include/linux/greybus/greybus_protocols.h @@ -232,9 +232,7 @@ struct gb_fw_download_fetch_firmware_request { __le32 size; } __packed; -struct gb_fw_download_fetch_firmware_response { - __u8 data[0]; -} __packed; +/* gb_fw_download_fetch_firmware_response contains no other data */ /* firmware download release firmware request */ struct gb_fw_download_release_firmware_request { @@ -414,9 +412,7 @@ struct gb_bootrom_get_firmware_request { __le32 size; } __packed; -struct gb_bootrom_get_firmware_response { - __u8 data[0]; -} __packed; +/* gb_bootrom_get_firmware_response contains no other data */ /* Bootrom protocol Ready to boot request */ struct gb_bootrom_ready_to_boot_request { diff --git a/include/linux/greybus/svc.h b/include/linux/greybus/svc.h index 5afaf5f06856..da547fb9071b 100644 --- a/include/linux/greybus/svc.h +++ b/include/linux/greybus/svc.h @@ -100,7 +100,4 @@ bool gb_svc_watchdog_enabled(struct gb_svc *svc); int gb_svc_watchdog_enable(struct gb_svc *svc); int gb_svc_watchdog_disable(struct gb_svc *svc); -int gb_svc_protocol_init(void); -void gb_svc_protocol_exit(void); - #endif /* __SVC_H */ diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h new file mode 100644 index 000000000000..f316c8d0f3fc --- /dev/null +++ b/include/linux/habanalabs/cpucp_if.h @@ -0,0 +1,1423 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2020-2023 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef CPUCP_IF_H +#define CPUCP_IF_H + +#include <linux/types.h> +#include <linux/if_ether.h> + +#include "hl_boot_if.h" + +#define NUM_HBM_PSEUDO_CH 2 +#define NUM_HBM_CH_PER_DEV 8 +#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0 +#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK 0x00000001 +#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_SHIFT 1 +#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK 0x00000002 +#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_SHIFT 2 +#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK 0x00000004 +#define CPUCP_PKT_HBM_ECC_INFO_DERR_SHIFT 3 +#define CPUCP_PKT_HBM_ECC_INFO_DERR_MASK 0x00000008 +#define CPUCP_PKT_HBM_ECC_INFO_SERR_SHIFT 4 +#define CPUCP_PKT_HBM_ECC_INFO_SERR_MASK 0x00000010 +#define CPUCP_PKT_HBM_ECC_INFO_TYPE_SHIFT 5 +#define CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK 0x00000020 +#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 +#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 + +#define PLL_MAP_MAX_BITS 128 +#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) + +enum eq_event_id { + EQ_EVENT_NIC_STS_REQUEST = 0, + EQ_EVENT_PWR_MODE_0, + EQ_EVENT_PWR_MODE_1, + EQ_EVENT_PWR_MODE_2, + EQ_EVENT_PWR_MODE_3, + EQ_EVENT_PWR_BRK_ENTRY, + EQ_EVENT_PWR_BRK_EXIT, + EQ_EVENT_HEARTBEAT, +}; + +/* + * info of the pkt queue pointers in the first async occurrence + */ +struct cpucp_pkt_sync_err { + __le32 pi; + __le32 ci; +}; + +struct hl_eq_hbm_ecc_data { + /* SERR counter */ + __le32 sec_cnt; + /* DERR counter */ + __le32 dec_cnt; + /* Supplemental Information according to the mask bits */ + __le32 hbm_ecc_info; + /* Address in hbm where the ecc happened */ + __le32 first_addr; + /* SERR continuous address counter */ + __le32 sec_cont_cnt; + __le32 pad; +}; + +/* + * EVENT QUEUE + */ + +struct hl_eq_header { + __le32 reserved; + __le32 ctl; +}; + +struct hl_eq_ecc_data { + __le64 ecc_address; + __le64 ecc_syndrom; + __u8 memory_wrapper_idx; + __u8 is_critical; + __le16 block_id; + __u8 pad[4]; +}; + +enum hl_sm_sei_cause { + SM_SEI_SO_OVERFLOW, + SM_SEI_LBW_4B_UNALIGNED, + SM_SEI_AXI_RESPONSE_ERR +}; + +struct hl_eq_sm_sei_data { + __le32 sei_log; + /* enum hl_sm_sei_cause */ + __u8 sei_cause; + __u8 pad[3]; +}; + +enum hl_fw_alive_severity { + FW_ALIVE_SEVERITY_MINOR, + FW_ALIVE_SEVERITY_CRITICAL +}; + +struct hl_eq_fw_alive { + __le64 uptime_seconds; + __le32 process_id; + __le32 thread_id; + /* enum hl_fw_alive_severity */ + __u8 severity; + __u8 pad[7]; +}; + +struct hl_eq_intr_cause { + __le64 intr_cause_data; +}; + +struct hl_eq_pcie_drain_ind_data { + struct hl_eq_intr_cause intr_cause; + __le64 drain_wr_addr_lbw; + __le64 drain_rd_addr_lbw; + __le64 drain_wr_addr_hbw; + __le64 drain_rd_addr_hbw; +}; + +struct hl_eq_razwi_lbw_info_regs { + __le32 rr_aw_razwi_reg; + __le32 rr_aw_razwi_id_reg; + __le32 rr_ar_razwi_reg; + __le32 rr_ar_razwi_id_reg; +}; + +struct hl_eq_razwi_hbw_info_regs { + __le32 rr_aw_razwi_hi_reg; + __le32 rr_aw_razwi_lo_reg; + __le32 rr_aw_razwi_id_reg; + __le32 rr_ar_razwi_hi_reg; + __le32 rr_ar_razwi_lo_reg; + __le32 rr_ar_razwi_id_reg; +}; + +/* razwi_happened masks */ +#define RAZWI_HAPPENED_HBW 0x1 +#define RAZWI_HAPPENED_LBW 0x2 +#define RAZWI_HAPPENED_AW 0x4 +#define RAZWI_HAPPENED_AR 0x8 + +struct hl_eq_razwi_info { + __le32 razwi_happened_mask; + union { + struct hl_eq_razwi_lbw_info_regs lbw; + struct hl_eq_razwi_hbw_info_regs hbw; + }; + __le32 pad; +}; + +struct hl_eq_razwi_with_intr_cause { + struct hl_eq_razwi_info razwi_info; + struct hl_eq_intr_cause intr_cause; +}; + +#define HBM_CA_ERR_CMD_LIFO_LEN 8 +#define HBM_RD_ERR_DATA_LIFO_LEN 8 +#define HBM_WR_PAR_CMD_LIFO_LEN 11 + +enum hl_hbm_sei_cause { + /* Command/address parity error event is split into 2 events due to + * size limitation: ODD suffix for odd HBM CK_t cycles and EVEN suffix + * for even HBM CK_t cycles + */ + HBM_SEI_CMD_PARITY_EVEN, + HBM_SEI_CMD_PARITY_ODD, + /* Read errors can be reflected as a combination of SERR/DERR/parity + * errors. Therefore, we define one event for all read error types. + * LKD will perform further proccessing. + */ + HBM_SEI_READ_ERR, + HBM_SEI_WRITE_DATA_PARITY_ERR, + HBM_SEI_CATTRIP, + HBM_SEI_MEM_BIST_FAIL, + HBM_SEI_DFI, + HBM_SEI_INV_TEMP_READ_OUT, + HBM_SEI_BIST_FAIL, +}; + +/* Masks for parsing hl_hbm_sei_headr fields */ +#define HBM_ECC_SERR_CNTR_MASK 0xFF +#define HBM_ECC_DERR_CNTR_MASK 0xFF00 +#define HBM_RD_PARITY_CNTR_MASK 0xFF0000 + +/* HBM index and MC index are known by the event_id */ +struct hl_hbm_sei_header { + union { + /* relevant only in case of HBM read error */ + struct { + __u8 ecc_serr_cnt; + __u8 ecc_derr_cnt; + __u8 read_par_cnt; + __u8 reserved; + }; + /* All other cases */ + __le32 cnt; + }; + __u8 sei_cause; /* enum hl_hbm_sei_cause */ + __u8 mc_channel; /* range: 0-3 */ + __u8 mc_pseudo_channel; /* range: 0-7 */ + __u8 is_critical; +}; + +#define HBM_RD_ADDR_SID_SHIFT 0 +#define HBM_RD_ADDR_SID_MASK 0x1 +#define HBM_RD_ADDR_BG_SHIFT 1 +#define HBM_RD_ADDR_BG_MASK 0x6 +#define HBM_RD_ADDR_BA_SHIFT 3 +#define HBM_RD_ADDR_BA_MASK 0x18 +#define HBM_RD_ADDR_COL_SHIFT 5 +#define HBM_RD_ADDR_COL_MASK 0x7E0 +#define HBM_RD_ADDR_ROW_SHIFT 11 +#define HBM_RD_ADDR_ROW_MASK 0x3FFF800 + +struct hbm_rd_addr { + union { + /* bit fields are only for FW use */ + struct { + u32 dbg_rd_err_addr_sid:1; + u32 dbg_rd_err_addr_bg:2; + u32 dbg_rd_err_addr_ba:2; + u32 dbg_rd_err_addr_col:6; + u32 dbg_rd_err_addr_row:15; + u32 reserved:6; + }; + __le32 rd_addr_val; + }; +}; + +#define HBM_RD_ERR_BEAT_SHIFT 2 +/* dbg_rd_err_misc fields: */ +/* Read parity is calculated per DW on every beat */ +#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT 0 +#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK 0x3 +#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT 8 +#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK 0x300 +/* ECC is calculated per PC on every beat */ +#define HBM_RD_ERR_SERR_BEAT0_SHIFT 16 +#define HBM_RD_ERR_SERR_BEAT0_MASK 0x10000 +#define HBM_RD_ERR_DERR_BEAT0_SHIFT 24 +#define HBM_RD_ERR_DERR_BEAT0_MASK 0x100000 + +struct hl_eq_hbm_sei_read_err_intr_info { + /* DFI_RD_ERR_REP_ADDR */ + struct hbm_rd_addr dbg_rd_err_addr; + /* DFI_RD_ERR_REP_ERR */ + union { + struct { + /* bit fields are only for FW use */ + u32 dbg_rd_err_par:8; + u32 dbg_rd_err_par_data:8; + u32 dbg_rd_err_serr:4; + u32 dbg_rd_err_derr:4; + u32 reserved:8; + }; + __le32 dbg_rd_err_misc; + }; + /* DFI_RD_ERR_REP_DM */ + __le32 dbg_rd_err_dm; + /* DFI_RD_ERR_REP_SYNDROME */ + __le32 dbg_rd_err_syndrome; + /* DFI_RD_ERR_REP_DATA */ + __le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN]; +}; + +struct hl_eq_hbm_sei_ca_par_intr_info { + /* 14 LSBs */ + __le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN]; + /* 18 LSBs */ + __le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN]; +}; + +#define WR_PAR_LAST_CMD_COL_SHIFT 0 +#define WR_PAR_LAST_CMD_COL_MASK 0x3F +#define WR_PAR_LAST_CMD_BG_SHIFT 6 +#define WR_PAR_LAST_CMD_BG_MASK 0xC0 +#define WR_PAR_LAST_CMD_BA_SHIFT 8 +#define WR_PAR_LAST_CMD_BA_MASK 0x300 +#define WR_PAR_LAST_CMD_SID_SHIFT 10 +#define WR_PAR_LAST_CMD_SID_MASK 0x400 + +/* Row address isn't latched */ +struct hbm_sei_wr_cmd_address { + /* DFI_DERR_LAST_CMD */ + union { + struct { + /* bit fields are only for FW use */ + u32 col:6; + u32 bg:2; + u32 ba:2; + u32 sid:1; + u32 reserved:21; + }; + __le32 dbg_wr_cmd_addr; + }; +}; + +struct hl_eq_hbm_sei_wr_par_intr_info { + /* entry 0: WR command address from the 1st cycle prior to the error + * entry 1: WR command address from the 2nd cycle prior to the error + * and so on... + */ + struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN]; + /* derr[0:1] - 1st HBM cycle DERR output + * derr[2:3] - 2nd HBM cycle DERR output + */ + __u8 dbg_derr; + /* extend to reach 8B */ + __u8 pad[3]; +}; + +/* + * this struct represents the following sei causes: + * command parity, ECC double error, ECC single error, dfi error, cattrip, + * temperature read-out, read parity error and write parity error. + * some only use the header while some have extra data. + */ +struct hl_eq_hbm_sei_data { + struct hl_hbm_sei_header hdr; + union { + struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info; + struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info; + struct hl_eq_hbm_sei_read_err_intr_info read_err_info; + struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info; + }; +}; + +/* Engine/farm arc interrupt type */ +enum hl_engine_arc_interrupt_type { + /* Qman/farm ARC DCCM QUEUE FULL interrupt type */ + ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1 +}; + +/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */ +struct hl_engine_arc_dccm_queue_full_irq { + /* Queue index value which caused DCCM QUEUE FULL */ + __le32 queue_index; + __le32 pad; +}; + +/* Data structure specifies details of QM/FARM ARC interrupt */ +struct hl_eq_engine_arc_intr_data { + /* ARC engine id e.g. DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */ + __le32 engine_id; + __le32 intr_type; /* enum hl_engine_arc_interrupt_type */ + /* More info related to the interrupt e.g. queue index + * incase of DCCM_QUEUE_FULL interrupt. + */ + __le64 payload; + __le64 pad[5]; +}; + +#define ADDR_DEC_ADDRESS_COUNT_MAX 4 + +/* Data structure specifies details of ADDR_DEC interrupt */ +struct hl_eq_addr_dec_intr_data { + struct hl_eq_intr_cause intr_cause; + __le64 addr[ADDR_DEC_ADDRESS_COUNT_MAX]; + __u8 addr_cnt; + __u8 pad[7]; +}; + +struct hl_eq_entry { + struct hl_eq_header hdr; + union { + __le64 data_placeholder; + struct hl_eq_ecc_data ecc_data; + struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Obsolete */ + struct hl_eq_sm_sei_data sm_sei_data; + struct cpucp_pkt_sync_err pkt_sync_err; + struct hl_eq_fw_alive fw_alive; + struct hl_eq_intr_cause intr_cause; + struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data; + struct hl_eq_razwi_info razwi_info; + struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause; + struct hl_eq_hbm_sei_data sei_data; /* Gaudi2 HBM */ + struct hl_eq_engine_arc_intr_data arc_data; + struct hl_eq_addr_dec_intr_data addr_dec; + __le64 data[7]; + }; +}; + +#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) + +#define EQ_CTL_READY_SHIFT 31 +#define EQ_CTL_READY_MASK 0x80000000 + +#define EQ_CTL_EVENT_TYPE_SHIFT 16 +#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 + +#define EQ_CTL_INDEX_SHIFT 0 +#define EQ_CTL_INDEX_MASK 0x0000FFFF + +enum pq_init_status { + PQ_INIT_STATUS_NA = 0, + PQ_INIT_STATUS_READY_FOR_CP, + PQ_INIT_STATUS_READY_FOR_HOST, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI, + PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR, + PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR +}; + +/* + * CpuCP Primary Queue Packets + * + * During normal operation, the host's kernel driver needs to send various + * messages to CpuCP, usually either to SET some value into a H/W periphery or + * to GET the current value of some H/W periphery. For example, SET the + * frequency of MME/TPC and GET the value of the thermal sensor. + * + * These messages can be initiated either by the User application or by the + * host's driver itself, e.g. power management code. In either case, the + * communication from the host's driver to CpuCP will *always* be in + * synchronous mode, meaning that the host will send a single message and poll + * until the message was acknowledged and the results are ready (if results are + * needed). + * + * This means that only a single message can be sent at a time and the host's + * driver must wait for its result before sending the next message. Having said + * that, because these are control messages which are sent in a relatively low + * frequency, this limitation seems acceptable. It's important to note that + * in case of multiple devices, messages to different devices *can* be sent + * at the same time. + * + * The message, inputs/outputs (if relevant) and fence object will be located + * on the device DDR at an address that will be determined by the host's driver. + * During device initialization phase, the host will pass to CpuCP that address. + * Most of the message types will contain inputs/outputs inside the message + * itself. The common part of each message will contain the opcode of the + * message (its type) and a field representing a fence object. + * + * When the host's driver wishes to send a message to CPU CP, it will write the + * message contents to the device DDR, clear the fence object and then write to + * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU. + * + * Upon receiving the interrupt (#121), CpuCP will read the message from the + * DDR. In case the message is a SET operation, CpuCP will first perform the + * operation and then write to the fence object on the device DDR. In case the + * message is a GET operation, CpuCP will first fill the results section on the + * device DDR and then write to the fence object. If an error occurred, CpuCP + * will fill the rc field with the right error code. + * + * In the meantime, the host's driver will poll on the fence object. Once the + * host sees that the fence object is signaled, it will read the results from + * the device DDR (if relevant) and resume the code execution in the host's + * driver. + * + * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 + * so the value being put by the host's driver matches the value read by CpuCP + * + * Non-QMAN packets should be limited to values 1 through (2^8 - 1) + * + * Detailed description: + * + * CPUCP_PACKET_DISABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU must NOT issue PCI + * transactions (read/write) towards the Host CPU. This also include + * sending MSI-X interrupts. + * This packet is usually sent before the device is moved to D3Hot state. + * + * CPUCP_PACKET_ENABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU is allowed to issue PCI + * transactions towards the Host CPU, including sending MSI-X interrupts. + * This packet is usually send after the device is moved to D0 state. + * + * CPUCP_PACKET_TEMPERATURE_GET - + * Fetch the current temperature / Max / Max Hyst / Critical / + * Critical Hyst of a specified thermal sensor. The packet's + * arguments specify the desired sensor and the field to get. + * + * CPUCP_PACKET_VOLTAGE_GET - + * Fetch the voltage / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_CURRENT_GET - + * Fetch the current / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_FAN_SPEED_GET - + * Fetch the speed / Max / Min of a specified fan. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_GET - + * Fetch the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_SET - + * Set the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor, type and value. + * + * CPUCP_PACKET_FREQUENCY_SET - + * Set the frequency of a specified PLL. The packet's arguments specify + * the PLL and the desired frequency. The actual frequency in the device + * might differ from the requested frequency. + * + * CPUCP_PACKET_FREQUENCY_GET - + * Fetch the frequency of a specified PLL. The packet's arguments specify + * the PLL. + * + * CPUCP_PACKET_LED_SET - + * Set the state of a specified led. The packet's arguments + * specify the led and the desired state. + * + * CPUCP_PACKET_I2C_WR - + * Write 32-bit value to I2C device. The packet's arguments specify the + * I2C bus, address and value. + * + * CPUCP_PACKET_I2C_RD - + * Read 32-bit value from I2C device. The packet's arguments specify the + * I2C bus and address. + * + * CPUCP_PACKET_INFO_GET - + * Fetch information from the device as specified in the packet's + * structure. The host's driver passes the max size it allows the CpuCP to + * write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ - + * Unmask the given IRQ. The IRQ number is specified in the value field. + * The packet is sent after receiving an interrupt and printing its + * relevant information. + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - + * Unmask the given IRQs. The IRQs numbers are specified in an array right + * after the cpucp_packet structure, where its first element is the array + * length. The packet is sent after a soft reset was done in order to + * handle any interrupts that were sent during the reset process. + * + * CPUCP_PACKET_TEST - + * Test packet for CpuCP connectivity. The CPU will put the fence value + * in the result field. + * + * CPUCP_PACKET_FREQUENCY_CURR_GET - + * Fetch the current frequency of a specified PLL. The packet's arguments + * specify the PLL. + * + * CPUCP_PACKET_MAX_POWER_GET - + * Fetch the maximal power of the device. + * + * CPUCP_PACKET_MAX_POWER_SET - + * Set the maximal power of the device. The packet's arguments specify + * the power. + * + * CPUCP_PACKET_EEPROM_DATA_GET - + * Get EEPROM data from the CpuCP kernel. The buffer is specified in the + * addr field. The CPU will put the returned data size in the result + * field. In addition, the host's driver passes the max size it allows the + * CpuCP to write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_NIC_INFO_GET - + * Fetch information from the device regarding the NIC. the host's driver + * passes the max size it allows the CpuCP to write to the structure, to + * prevent data corruption in case of mismatched driver/FW versions. + * + * CPUCP_PACKET_TEMPERATURE_SET - + * Set the value of the offset property of a specified thermal sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_VOLTAGE_SET - + * Trigger the reset_history property of a specified voltage sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_CURRENT_SET - + * Trigger the reset_history property of a specified current sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_PCIE_THROUGHPUT_GET - + * Get throughput of PCIe. + * The packet's arguments specify the transaction direction (TX/RX). + * The window measurement is 10[msec], and the return value is in KB/sec. + * + * CPUCP_PACKET_PCIE_REPLAY_CNT_GET + * Replay count measures number of "replay" events, which is basicly + * number of retries done by PCIe. + * + * CPUCP_PACKET_TOTAL_ENERGY_GET - + * Total Energy is measurement of energy from the time FW Linux + * is loaded. It is calculated by multiplying the average power + * by time (passed from armcp start). The units are in MilliJouls. + * + * CPUCP_PACKET_PLL_INFO_GET - + * Fetch frequencies of PLL from the required PLL IP. + * The packet's arguments specify the device PLL type + * Pll type is the PLL from device pll_index enum. + * The result is composed of 4 outputs, each is 16-bit + * frequency in MHz. + * + * CPUCP_PACKET_POWER_GET - + * Fetch the present power consumption of the device (Current * Voltage). + * + * CPUCP_PACKET_NIC_PFC_SET - + * Enable/Disable the NIC PFC feature. The packet's arguments specify the + * NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_FAULT_GET - + * Fetch the current indication for local/remote faults from the NIC MAC. + * The result is 32-bit value of the relevant register. + * + * CPUCP_PACKET_NIC_LPBK_SET - + * Enable/Disable the MAC loopback feature. The packet's arguments specify + * the NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_MAC_INIT - + * Configure the NIC MAC channels. The packet's arguments specify the + * NIC port and the speed. + * + * CPUCP_PACKET_MSI_INFO_SET - + * set the index number for each supported msi type going from + * host to device + * + * CPUCP_PACKET_NIC_XPCS91_REGS_GET - + * Fetch the un/correctable counters values from the NIC MAC. + * + * CPUCP_PACKET_NIC_STAT_REGS_GET - + * Fetch various NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_CLR - + * Clear the various NIC MAC counters in the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET - + * Fetch all NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_IS_IDLE_CHECK - + * Check if the device is IDLE in regard to the DMA/compute engines + * and QMANs. The f/w will return a bitmask where each bit represents + * a different engine or QMAN according to enum cpucp_idle_mask. + * The bit will be 1 if the engine is NOT idle. + * + * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET - + * Fetch all HBM replaced-rows and prending to be replaced rows data. + * + * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS - + * Fetch status of HBM rows pending replacement and need a reboot to + * be replaced. + * + * CPUCP_PACKET_POWER_SET - + * Resets power history of device to 0 + * + * CPUCP_PACKET_ENGINE_CORE_ASID_SET - + * Packet to perform engine core ASID configuration + * + * CPUCP_PACKET_SEC_ATTEST_GET - + * Get the attestaion data that is collected during various stages of the + * boot sequence. the attestation data is also hashed with some unique + * number (nonce) provided by the host to prevent replay attacks. + * public key and certificate also provided as part of the FW response. + * + * CPUCP_PACKET_INFO_SIGNED_GET - + * Get the device information signed by the Trusted Platform device. + * device info data is also hashed with some unique number (nonce) provided + * by the host to prevent replay attacks. public key and certificate also + * provided as part of the FW response. + * + * CPUCP_PACKET_MONITOR_DUMP_GET - + * Get monitors registers dump from the CpuCP kernel. + * The CPU will put the registers dump in the a buffer allocated by the driver + * which address is passed via the CpuCp packet. In addition, the host's driver + * passes the max size it allows the CpuCP to write to the structure, to prevent + * data corruption in case of mismatched driver/FW versions. + * Obsolete. + * + * CPUCP_PACKET_GENERIC_PASSTHROUGH - + * Generic opcode for all firmware info that is only passed to host + * through the LKD, without getting parsed there. + * + * CPUCP_PACKET_ACTIVE_STATUS_SET - + * LKD sends FW indication whether device is free or in use, this indication is reported + * also to the BMC. + * + * CPUCP_PACKET_SOFT_RESET - + * Packet to perform soft-reset. + * + * CPUCP_PACKET_INTS_REGISTER - + * Packet to inform FW that queues have been established and LKD is ready to receive + * EQ events. + */ + +enum cpucp_packet_id { + CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ + CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ + CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_GET, /* sysfs */ + CPUCP_PACKET_CURRENT_GET, /* sysfs */ + CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */ + CPUCP_PACKET_PWM_GET, /* sysfs */ + CPUCP_PACKET_PWM_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_GET, /* sysfs */ + CPUCP_PACKET_LED_SET, /* debugfs */ + CPUCP_PACKET_I2C_WR, /* debugfs */ + CPUCP_PACKET_I2C_RD, /* debugfs */ + CPUCP_PACKET_INFO_GET, /* IOCTL */ + CPUCP_PACKET_FLASH_PROGRAM_REMOVED, + CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ + CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ + CPUCP_PACKET_TEST, /* internal */ + CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_SET, /* sysfs */ + CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */ + CPUCP_PACKET_NIC_INFO_GET, /* internal */ + CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_SET, /* sysfs */ + CPUCP_PACKET_CURRENT_SET, /* sysfs */ + CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ + CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ + CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ + CPUCP_PACKET_PLL_INFO_GET, /* internal */ + CPUCP_PACKET_NIC_STATUS, /* internal */ + CPUCP_PACKET_POWER_GET, /* internal */ + CPUCP_PACKET_NIC_PFC_SET, /* internal */ + CPUCP_PACKET_NIC_FAULT_GET, /* internal */ + CPUCP_PACKET_NIC_LPBK_SET, /* internal */ + CPUCP_PACKET_NIC_MAC_CFG, /* internal */ + CPUCP_PACKET_MSI_INFO_SET, /* internal */ + CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */ + CPUCP_PACKET_IS_IDLE_CHECK, /* internal */ + CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */ + CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */ + CPUCP_PACKET_POWER_SET, /* internal */ + CPUCP_PACKET_RESERVED, /* not used */ + CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ + CPUCP_PACKET_RESERVED2, /* not used */ + CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ + CPUCP_PACKET_INFO_SIGNED_GET, /* internal */ + CPUCP_PACKET_RESERVED4, /* not used */ + CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ + CPUCP_PACKET_RESERVED5, /* not used */ + CPUCP_PACKET_RESERVED6, /* not used */ + CPUCP_PACKET_RESERVED7, /* not used */ + CPUCP_PACKET_GENERIC_PASSTHROUGH, /* IOCTL */ + CPUCP_PACKET_RESERVED8, /* not used */ + CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ + CPUCP_PACKET_RESERVED9, /* not used */ + CPUCP_PACKET_RESERVED10, /* not used */ + CPUCP_PACKET_RESERVED11, /* not used */ + CPUCP_PACKET_RESERVED12, /* internal */ + CPUCP_PACKET_RESERVED13, /* internal */ + CPUCP_PACKET_SOFT_RESET, /* internal */ + CPUCP_PACKET_INTS_REGISTER, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ +}; + +#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 + +#define CPUCP_PKT_CTL_RC_SHIFT 12 +#define CPUCP_PKT_CTL_RC_MASK 0x0000F000 + +#define CPUCP_PKT_CTL_OPCODE_SHIFT 16 +#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 + +#define CPUCP_PKT_RES_PLL_OUT0_SHIFT 0 +#define CPUCP_PKT_RES_PLL_OUT0_MASK 0x000000000000FFFFull +#define CPUCP_PKT_RES_PLL_OUT1_SHIFT 16 +#define CPUCP_PKT_RES_PLL_OUT1_MASK 0x00000000FFFF0000ull +#define CPUCP_PKT_RES_PLL_OUT2_SHIFT 32 +#define CPUCP_PKT_RES_PLL_OUT2_MASK 0x0000FFFF00000000ull +#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48 +#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull + +#define CPUCP_PKT_RES_EEPROM_OUT0_SHIFT 0 +#define CPUCP_PKT_RES_EEPROM_OUT0_MASK 0x000000000000FFFFull +#define CPUCP_PKT_RES_EEPROM_OUT1_SHIFT 16 +#define CPUCP_PKT_RES_EEPROM_OUT1_MASK 0x0000000000FF0000ull + +#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull + +#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull + +#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull + +/* heartbeat status bits */ +#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0 +#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001 + +struct cpucp_packet { + union { + __le64 value; /* For SET packets */ + __le64 result; /* For GET packets */ + __le64 addr; /* For PQ */ + }; + + __le32 ctl; + + __le32 fence; /* Signal to host that message is completed */ + + union { + struct {/* For temperature/current/voltage/fan/pwm get/set */ + __le16 sensor_index; + __le16 type; + }; + + struct { /* For I2C read/write */ + __u8 i2c_bus; + __u8 i2c_addr; + __u8 i2c_reg; + /* + * In legacy implemetations, i2c_len was not present, + * was unused and just added as pad. + * So if i2c_len is 0, it is treated as legacy + * and r/w 1 Byte, else if i2c_len is specified, + * its treated as new multibyte r/w support. + */ + __u8 i2c_len; + }; + + struct {/* For PLL info fetch */ + __le16 pll_type; + /* TODO pll_reg is kept temporary before removal */ + __le16 pll_reg; + }; + + /* For any general request */ + __le32 index; + + /* For frequency get/set */ + __le32 pll_index; + + /* For led set */ + __le32 led_index; + + /* For get CpuCP info/EEPROM data/NIC info */ + __le32 data_max_size; + + /* + * For any general status bitmask. Shall be used whenever the + * result cannot be used to hold general purpose data. + */ + __le32 status_mask; + + /* random, used once number, for security packets */ + __le32 nonce; + }; + + union { + /* For NIC requests */ + __le32 port_index; + + /* For Generic packet sub index */ + __le32 pkt_subidx; + }; +}; + +struct cpucp_unmask_irq_arr_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 irqs[]; +}; + +struct cpucp_nic_status_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[]; +}; + +struct cpucp_array_data_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[]; +}; + +enum cpucp_led_index { + CPUCP_LED0_INDEX = 0, + CPUCP_LED1_INDEX, + CPUCP_LED2_INDEX, + CPUCP_LED_MAX_INDEX = CPUCP_LED2_INDEX +}; + +/* + * enum cpucp_packet_rc - Error return code + * @cpucp_packet_success -> in case of success. + * @cpucp_packet_invalid -> this is to support first generation platforms. + * @cpucp_packet_fault -> in case of processing error like failing to + * get device binding or semaphore etc. + * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. + * @cpucp_packet_invalid_params -> when checking parameter like length of buffer + * or attribute value etc. + * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. + */ +enum cpucp_packet_rc { + cpucp_packet_success, + cpucp_packet_invalid, + cpucp_packet_fault, + cpucp_packet_invalid_pkt, + cpucp_packet_invalid_params, + cpucp_packet_rc_max +}; + +/* + * cpucp_temp_type should adhere to hwmon_temp_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_temp_type { + cpucp_temp_input, + cpucp_temp_min = 4, + cpucp_temp_min_hyst, + cpucp_temp_max = 6, + cpucp_temp_max_hyst, + cpucp_temp_crit, + cpucp_temp_crit_hyst, + cpucp_temp_offset = 19, + cpucp_temp_lowest = 21, + cpucp_temp_highest = 22, + cpucp_temp_reset_history = 23, + cpucp_temp_warn = 24, + cpucp_temp_max_crit = 25, + cpucp_temp_max_warn = 26, +}; + +enum cpucp_in_attributes { + cpucp_in_input, + cpucp_in_min, + cpucp_in_max, + cpucp_in_lowest = 6, + cpucp_in_highest = 7, + cpucp_in_reset_history, + cpucp_in_intr_alarm_a, + cpucp_in_intr_alarm_b, +}; + +enum cpucp_curr_attributes { + cpucp_curr_input, + cpucp_curr_min, + cpucp_curr_max, + cpucp_curr_lowest = 6, + cpucp_curr_highest = 7, + cpucp_curr_reset_history +}; + +enum cpucp_fan_attributes { + cpucp_fan_input, + cpucp_fan_min = 2, + cpucp_fan_max +}; + +enum cpucp_pwm_attributes { + cpucp_pwm_input, + cpucp_pwm_enable +}; + +enum cpucp_pcie_throughput_attributes { + cpucp_pcie_throughput_tx, + cpucp_pcie_throughput_rx +}; + +/* TODO temporary kept before removal */ +enum cpucp_pll_reg_attributes { + cpucp_pll_nr_reg, + cpucp_pll_nf_reg, + cpucp_pll_od_reg, + cpucp_pll_div_factor_reg, + cpucp_pll_div_sel_reg +}; + +/* TODO temporary kept before removal */ +enum cpucp_pll_type_attributes { + cpucp_pll_cpu, + cpucp_pll_pci, +}; + +/* + * cpucp_power_type aligns with hwmon_power_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_power_type { + CPUCP_POWER_INPUT = 8, + CPUCP_POWER_INPUT_HIGHEST = 9, + CPUCP_POWER_RESET_INPUT_HISTORY = 11 +}; + +/* + * MSI type enumeration table for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES). + * Changing the order of entries or removing entries is not allowed. + */ +enum cpucp_msi_type { + CPUCP_EVENT_QUEUE_MSI_TYPE, + CPUCP_NIC_PORT1_MSI_TYPE, + CPUCP_NIC_PORT3_MSI_TYPE, + CPUCP_NIC_PORT5_MSI_TYPE, + CPUCP_NIC_PORT7_MSI_TYPE, + CPUCP_NIC_PORT9_MSI_TYPE, + CPUCP_EVENT_QUEUE_ERR_MSI_TYPE, + CPUCP_NUM_OF_MSI_TYPES +}; + +/* + * PLL enumeration table used for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table. + * Changing the order of entries or removing entries is not allowed. + */ +enum pll_index { + CPU_PLL = 0, + PCI_PLL = 1, + NIC_PLL = 2, + DMA_PLL = 3, + MESH_PLL = 4, + MME_PLL = 5, + TPC_PLL = 6, + IF_PLL = 7, + SRAM_PLL = 8, + NS_PLL = 9, + HBM_PLL = 10, + MSS_PLL = 11, + DDR_PLL = 12, + VID_PLL = 13, + BANK_PLL = 14, + MMU_PLL = 15, + IC_PLL = 16, + MC_PLL = 17, + EMMC_PLL = 18, + D2D_PLL = 19, + CS_PLL = 20, + C2C_PLL = 21, + NCH_PLL = 22, + C2M_PLL = 23, + PLL_MAX +}; + +enum rl_index { + TPC_RL = 0, + MME_RL, + EDMA_RL, +}; + +enum pvt_index { + PVT_SW, + PVT_SE, + PVT_NW, + PVT_NE +}; + +/* Event Queue Packets */ + +struct eq_generic_event { + __le64 data[7]; +}; + +/* + * CpuCP info + */ + +#define CARD_NAME_MAX_LEN 16 +#define CPUCP_MAX_SENSORS 128 +#define CPUCP_MAX_NICS 128 +#define CPUCP_LANES_PER_NIC 4 +#define CPUCP_NIC_QSFP_EEPROM_MAX_LEN 1024 +#define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC) +#define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64) +#define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64) +#define CPUCP_HBM_ROW_REPLACE_MAX 32 + +struct cpucp_sensor { + __le32 type; + __le32 flags; +}; + +/** + * struct cpucp_card_types - ASIC card type. + * @cpucp_card_type_pci: PCI card. + * @cpucp_card_type_pmc: PCI Mezzanine Card. + */ +enum cpucp_card_types { + cpucp_card_type_pci, + cpucp_card_type_pmc +}; + +#define CPUCP_SEC_CONF_ENABLED_SHIFT 0 +#define CPUCP_SEC_CONF_ENABLED_MASK 0x00000001 + +#define CPUCP_SEC_CONF_FLASH_WP_SHIFT 1 +#define CPUCP_SEC_CONF_FLASH_WP_MASK 0x00000002 + +#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT 2 +#define CPUCP_SEC_CONF_EEPROM_WP_MASK 0x00000004 + +/** + * struct cpucp_security_info - Security information. + * @config: configuration bit field + * @keys_num: number of stored keys + * @revoked_keys: revoked keys bit field + * @min_svn: minimal security version + */ +struct cpucp_security_info { + __u8 config; + __u8 keys_num; + __u8 revoked_keys; + __u8 min_svn; +}; + +/** + * struct cpucp_info - Info from CpuCP that is necessary to the host's driver + * @sensors: available sensors description. + * @kernel_version: CpuCP linux kernel version. + * @reserved: reserved field. + * @card_type: card configuration type. + * @card_location: in a server, each card has different connections topology + * depending on its location (relevant for PMC card type) + * @cpld_version: CPLD programmed F/W version. + * @infineon_version: Infineon main DC-DC version. + * @fuse_version: silicon production FUSE information. + * @thermal_version: thermald S/W version. + * @cpucp_version: CpuCP S/W version. + * @infineon_second_stage_version: Infineon 2nd stage DC-DC version. + * @dram_size: available DRAM size. + * @card_name: card name that will be displayed in HWMON subsystem on the host + * @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance + * (0 = functional, 1 = binned) + * @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance + * (0 = functional, 1 = binned), maximum 1 per dcore + * @sram_binning: Categorize SRAM functionality + * (0 = fully functional, 1 = lower-half is not functional, + * 2 = upper-half is not functional) + * @sec_info: security information + * @pll_map: Bit map of supported PLLs for current ASIC version. + * @mme_binning_mask: MME binning mask, + * bits [0:6] <==> dcore0 mme fma + * bits [7:13] <==> dcore1 mme fma + * bits [14:20] <==> dcore0 mme ima + * bits [21:27] <==> dcore1 mme ima + * For each group, if the 6th bit is set then first 5 bits + * represent the col's idx [0-31], otherwise these bits are + * ignored, and col idx 32 is binned. 7th bit is don't care. + * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance + * (0 = functional 1 = binned) + * @memory_repair_flag: eFuse flag indicating memory repair + * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance + * (0 = functional 1 = binned) + * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance + * (0 = functional 1 = binned) + * @interposer_version: Interposer version programmed in eFuse + * @substrate_version: Substrate version programmed in eFuse + * @eq_health_check_supported: eq health check feature supported in FW. + * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. + * @fw_os_version: Firmware OS Version + */ +struct cpucp_info { + struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; + __u8 kernel_version[VERSION_MAX_LEN]; + __le32 reserved; + __le32 card_type; + __le32 card_location; + __le32 cpld_version; + __le32 infineon_version; + __u8 fuse_version[VERSION_MAX_LEN]; + __u8 thermal_version[VERSION_MAX_LEN]; + __u8 cpucp_version[VERSION_MAX_LEN]; + __le32 infineon_second_stage_version; + __le64 dram_size; + char card_name[CARD_NAME_MAX_LEN]; + __le64 tpc_binning_mask; + __le64 decoder_binning_mask; + __u8 sram_binning; + __u8 dram_binning_mask; + __u8 memory_repair_flag; + __u8 edma_binning_mask; + __u8 xbar_binning_mask; + __u8 interposer_version; + __u8 substrate_version; + __u8 eq_health_check_supported; + struct cpucp_security_info sec_info; + __le32 fw_hbm_region_size; + __u8 pll_map[PLL_MAP_LEN]; + __le64 mme_binning_mask; + __u8 fw_os_version[VERSION_MAX_LEN]; +}; + +struct cpucp_mac_addr { + __u8 mac_addr[ETH_ALEN]; +}; + +enum cpucp_serdes_type { + TYPE_1_SERDES_TYPE, + TYPE_2_SERDES_TYPE, + HLS1_SERDES_TYPE, + HLS1H_SERDES_TYPE, + HLS2_SERDES_TYPE, + HLS2_TYPE_1_SERDES_TYPE, + MAX_NUM_SERDES_TYPE, /* number of types */ + UNKNOWN_SERDES_TYPE = 0xFFFF /* serdes_type is u16 */ +}; + +struct cpucp_nic_info { + struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS]; + __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le64 pol_tx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; + __le64 pol_rx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; + __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN]; + __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; + __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le16 serdes_type; /* enum cpucp_serdes_type */ + __le16 tx_swap_map[CPUCP_MAX_NICS]; + __u8 reserved[6]; +}; + +#define PAGE_DISCARD_MAX 64 + +struct page_discard_info { + __u8 num_entries; + __u8 reserved[7]; + __le32 mmu_page_idx[PAGE_DISCARD_MAX]; +}; + +/* + * struct frac_val - fracture value represented by "integer.frac". + * @integer: the integer part of the fracture value; + * @frac: the fracture part of the fracture value. + */ +struct frac_val { + union { + struct { + __le16 integer; + __le16 frac; + }; + __le32 val; + }; +}; + +/* + * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp". + * @integer: the integer part of the SER value; + * @exp: the exponent part of the SER value. + */ +struct ser_val { + __le16 integer; + __le16 exp; +}; + +/* + * struct cpucp_nic_status - describes the status of a NIC port. + * @port: NIC port index. + * @bad_format_cnt: e.g. CRC. + * @responder_out_of_sequence_psn_cnt: e.g NAK. + * @high_ber_reinit_cnt: link reinit due to high BER. + * @correctable_err_cnt: e.g. bit-flip. + * @uncorrectable_err_cnt: e.g. MAC errors. + * @retraining_cnt: re-training counter. + * @up: is port up. + * @pcs_link: has PCS link. + * @phy_ready: is PHY ready. + * @auto_neg: is Autoneg enabled. + * @timeout_retransmission_cnt: timeout retransmission events. + * @high_ber_cnt: high ber events. + * @pre_fec_ser: pre FEC SER value. + * @post_fec_ser: post FEC SER value. + * @throughput: measured throughput. + * @latency: measured latency. + */ +struct cpucp_nic_status { + __le32 port; + __le32 bad_format_cnt; + __le32 responder_out_of_sequence_psn_cnt; + __le32 high_ber_reinit; + __le32 correctable_err_cnt; + __le32 uncorrectable_err_cnt; + __le32 retraining_cnt; + __u8 up; + __u8 pcs_link; + __u8 phy_ready; + __u8 auto_neg; + __le32 timeout_retransmission_cnt; + __le32 high_ber_cnt; + struct ser_val pre_fec_ser; + struct ser_val post_fec_ser; + struct frac_val bandwidth; + struct frac_val lat; +}; + +enum cpucp_hbm_row_replace_cause { + REPLACE_CAUSE_DOUBLE_ECC_ERR, + REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR, +}; + +struct cpucp_hbm_row_info { + __u8 hbm_idx; + __u8 pc; + __u8 sid; + __u8 bank_idx; + __le16 row_addr; + __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */ + __u8 pad; +}; + +struct cpucp_hbm_row_replaced_rows_info { + __le16 num_replaced_rows; + __u8 pad[6]; + struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX]; +}; + +enum cpu_reset_status { + CPU_RST_STATUS_NA = 0, + CPU_RST_STATUS_SOFT_RST_DONE = 1, +}; + +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct cpucp_sec_attest_info - attestation report of the boot + * @pcr_data: raw values of the PCR registers + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @nonce: number only used once. random number provided by host. this also + * passed to the quote command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pcr_quote: attestation report data structure + * @quote_sig_len: length of the attestation report signature (bytes) + * @quote_sig: signature structure of the attestation report + * @pub_data_len: length of the public data (bytes) + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the attestation signing key + */ +struct cpucp_sec_attest_info { + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __le16 pad0; + __le32 nonce; + __le16 pcr_quote_len; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig_len; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +/* + * struct cpucp_dev_info_signed - device information signed by a secured device + * @info: device information structure as defined above + * @nonce: number only used once. random number provided by host. this number is + * hashed and signed along with the device information. + * @info_sig_len: length of the attestation signature (bytes) + * @info_sig: signature of the info + nonce data. + * @pub_data_len: length of the public data (bytes) + * @public_data: public key info signed info data + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the signing key + */ +struct cpucp_dev_info_signed { + struct cpucp_info info; /* assumed to be 64bit aligned */ + __le32 nonce; + __le32 pad0; + __u8 info_sig_len; + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +#define DCORE_MON_REGS_SZ 512 +/* + * struct dcore_monitor_regs_data - DCORE monitor regs data. + * the structure follows sync manager block layout. Obsolete. + * @mon_pay_addrl: array of payload address low bits. + * @mon_pay_addrh: array of payload address high bits. + * @mon_pay_data: array of payload data. + * @mon_arm: array of monitor arm. + * @mon_status: array of monitor status. + */ +struct dcore_monitor_regs_data { + __le32 mon_pay_addrl[DCORE_MON_REGS_SZ]; + __le32 mon_pay_addrh[DCORE_MON_REGS_SZ]; + __le32 mon_pay_data[DCORE_MON_REGS_SZ]; + __le32 mon_arm[DCORE_MON_REGS_SZ]; + __le32 mon_status[DCORE_MON_REGS_SZ]; +}; + +/* contains SM data for each SYNC_MNGR (Obsolete) */ +struct cpucp_monitor_dump { + struct dcore_monitor_regs_data sync_mngr_w_s; + struct dcore_monitor_regs_data sync_mngr_e_s; + struct dcore_monitor_regs_data sync_mngr_w_n; + struct dcore_monitor_regs_data sync_mngr_e_n; +}; + +/* + * The Type of the generic request (and other input arguments) will be fetched from user by reading + * from "pkt_subidx" field in struct cpucp_packet. + * + * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. + */ +enum hl_passthrough_type { + HL_PASSTHROUGH_VERSIONS, +}; + +#endif /* CPUCP_IF_H */ diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h new file mode 100644 index 000000000000..93366d5621fd --- /dev/null +++ b/include/linux/habanalabs/hl_boot_if.h @@ -0,0 +1,792 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2018-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HL_BOOT_IF_H +#define HL_BOOT_IF_H + +#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */ +#define HL_POWER9_HOST_MAGIC 0x1DA30009 + +#define BOOT_FIT_SRAM_OFFSET 0x200000 + +#define VERSION_MAX_LEN 128 + +enum cpu_boot_err { + CPU_BOOT_ERR_DRAM_INIT_FAIL = 0, + CPU_BOOT_ERR_FIT_CORRUPTED = 1, + CPU_BOOT_ERR_TS_INIT_FAIL = 2, + CPU_BOOT_ERR_DRAM_SKIPPED = 3, + CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4, + CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5, + CPU_BOOT_ERR_NIC_FW_FAIL = 6, + CPU_BOOT_ERR_SECURITY_NOT_RDY = 7, + CPU_BOOT_ERR_SECURITY_FAIL = 8, + CPU_BOOT_ERR_EFUSE_FAIL = 9, + CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10, + CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11, + CPU_BOOT_ERR_PLL_FAIL = 12, + CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13, + CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, + CPU_BOOT_ERR_BINNING_FAIL = 19, + CPU_BOOT_ERR_TPM_FAIL = 20, + CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, + CPU_BOOT_ERR_EEPROM_FAIL = 22, + CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL = 23, + CPU_BOOT_ERR_ENABLED = 31, + CPU_BOOT_ERR_SCND_EN = 63, + CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ +}; + +/* + * Mask for fatal failures + * This mask contains all possible fatal failures, and a dynamic code + * will clear the non-relevant ones. + */ +#define CPU_BOOT_ERR_FATAL_MASK \ + ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \ + (1 << CPU_BOOT_ERR_PLL_FAIL) | \ + (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \ + (1 << CPU_BOOT_ERR_BINNING_FAIL) | \ + (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \ + (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) | \ + (1 << CPU_BOOT_ERR_EEPROM_FAIL)) + +/* + * CPU error bits in BOOT_ERROR registers + * + * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed. + * DRAM is not reliable to use. + * + * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the + * image provided by the host has failed. + * + * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed. + * Boot continues as usual, but keep in + * mind this is a warning. + * + * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped. + * Skipping DRAM initialization has been + * requested (e.g. strap, command, etc.) + * and FW skipped the DRAM initialization. + * Host can initialize the DRAM. + * + * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped. + * Meaning the BMC data might not be + * available until reset. + * + * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready. + * BMC has not provided the NIC data yet. + * Once provided this bit will be cleared. + * + * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed. + * The NIC FW loading and initialization + * failed. This means NICs are not usable. + * + * CPU_BOOT_ERR0_SECURITY_NOT_RDY Chip security initialization has been + * started, but is not ready yet - chip + * cannot be accessed. + * + * CPU_BOOT_ERR0_SECURITY_FAIL Security related tasks have failed. + * The tasks are security init (root of + * trust), boot authentication (chain of + * trust), data packets authentication. + * + * CPU_BOOT_ERR0_EFUSE_FAIL Reading from eFuse failed. + * The PCI device ID might be wrong. + * + * CPU_BOOT_ERR0_PRI_IMG_VER_FAIL Verification of primary image failed. + * It mean that ppboot checksum + * verification for the preboot primary + * image has failed to match expected + * checksum. Trying to program image again + * might solve this. + * + * CPU_BOOT_ERR0_SEC_IMG_VER_FAIL Verification of secondary image failed. + * It mean that ppboot checksum + * verification for the preboot secondary + * image has failed to match expected + * checksum. Trying to program image again + * might solve this. + * + * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one + * of the PLLs remains in REF_CLK + * + * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support + * should be contacted. + * + * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during + * the execution of ppboot or preboot. + * for example: stack overflow. + * + * CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning + * malfunctioning components might still be + * in use. + * + * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. + * + * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature + * sensor. + * + * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults + * are used. + * + * CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL Failed scrubbing the Engines/ARCFarm + * memories. Boot disabled until reset. + * + * CPU_BOOT_ERR0_ENABLED Error registers enabled. + * This is a main indication that the + * running FW populates the error + * registers. Meaning the error bits are + * not garbage, but actual error statuses. + */ +#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) +#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED) +#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL) +#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED) +#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED) +#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY) +#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL) +#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY) +#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL) +#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL) +#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL) +#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) +#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) +#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) +#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) +#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) +#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) +#define CPU_BOOT_ERR0_ENG_ARC_MEM_SCRUB_FAIL (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) +#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) +#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) + +enum cpu_boot_dev_sts { + CPU_BOOT_DEV_STS_SECURITY_EN = 0, + CPU_BOOT_DEV_STS_DEBUG_EN = 1, + CPU_BOOT_DEV_STS_WATCHDOG_EN = 2, + CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3, + CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4, + CPU_BOOT_DEV_STS_E2E_CRED_EN = 5, + CPU_BOOT_DEV_STS_HBM_CRED_EN = 6, + CPU_BOOT_DEV_STS_RL_EN = 7, + CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8, + CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9, + CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10, + CPU_BOOT_DEV_STS_PLL_INFO_EN = 11, + CPU_BOOT_DEV_STS_SP_SRAM_EN = 12, + CPU_BOOT_DEV_STS_CLK_GATE_EN = 13, + CPU_BOOT_DEV_STS_HBM_ECC_EN = 14, + CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15, + CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16, + CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17, + CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18, + CPU_BOOT_DEV_STS_DYN_PLL_EN = 19, + CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20, + CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21, + CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22, + CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23, + CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, + CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, + CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, + CPU_BOOT_DEV_STS_ENABLED = 31, + CPU_BOOT_DEV_STS_SCND_EN = 63, + CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ +}; + +/* + * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers + * + * CPU_BOOT_DEV_STS0_SECURITY_EN Security is Enabled. + * This is an indication for security + * enabled in FW, which means that + * all conditions for security are met: + * device is indicated as security enabled, + * registers are protected, and device + * uses keys for image verification. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_DEBUG_EN Debug is enabled. + * Enabled when JTAG or DEBUG is enabled + * in FW. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_WATCHDOG_EN Watchdog is enabled. + * Watchdog is enabled in FW. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_DRAM_INIT_EN DRAM initialization is enabled. + * DRAM initialization has been done in FW. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_BMC_WAIT_EN Waiting for BMC data enabled. + * If set, it means that during boot, + * FW waited for BMC data. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_E2E_CRED_EN E2E credits initialized. + * FW initialized E2E credits. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_HBM_CRED_EN HBM credits initialized. + * FW initialized HBM credits. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_RL_EN Rate limiter initialized. + * FW initialized rate limiter. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_SRAM_SCR_EN SRAM scrambler enabled. + * FW initialized SRAM scrambler. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_DRAM_SCR_EN DRAM scrambler enabled. + * FW initialized DRAM scrambler. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_FW_HARD_RST_EN FW hard reset procedure is enabled. + * FW has the hard reset procedure + * implemented. This means that FW will + * perform hard reset procedure on + * receiving the halt-machine event. + * Initialized in: preboot, u-boot, linux + * + * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available + * for use. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. + * FW initialized Clock Gating. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_HBM_ECC_EN HBM ECC handling Enabled. + * FW handles HBM ECC indications. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd + * is set to the PI counter. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication + * protocol is enabled. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled. + * This bit if set, means the iATU has been + * configured and is ready for use. + * Initialized in: ppboot + * + * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and + * any access to them is done via the FW. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. + * FW sends to host a bitmap of supported + * PLLs. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from + * previleged entity. FW sets this status + * bit for host. If this bit is set then + * GIC can not be accessed from host. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_EQ_INDEX_EN Event Queue (EQ) index is a running + * index for each new event sent to host. + * This is used as a method in host to + * identify that the waiting event in + * queue is actually a new event which + * was not served before. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to + * prevent IRQs overriding each other. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN + * NIC STAT and XPCS91 access is restricted + * and is done via FW only. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN + * NIC STAT get all is supported. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN + * F/W checks if the device is idle by reading defined set + * of registers. It returns a bitmask of all the engines, + * where a bit is set if the engine is not idle. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_MAP_HWMON_EN + * If set, means f/w supports proprietary + * HWMON enum mapping to cpucp enums. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. + * This is a main indication that the + * running FW populates the device status + * register. Meaning the device status + * bits are not garbage, but actual + * statuses. + * Initialized in: preboot + * + */ +#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN) +#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN) +#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN) +#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN) +#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN) +#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN) +#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN) +#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN) +#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN) +#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN) +#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN) +#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN) +#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN) +#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN) +#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN) +#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN) +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN) +#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN) +#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN) +#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) +#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) +#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) +#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) +#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) + +enum cpu_boot_status { + CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ + CPU_BOOT_STATUS_IN_WFE = 1, + CPU_BOOT_STATUS_DRAM_RDY = 2, + CPU_BOOT_STATUS_SRAM_AVAIL = 3, + CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ + CPU_BOOT_STATUS_IN_PREBOOT = 5, + CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */ + CPU_BOOT_STATUS_IN_UBOOT = 7, + CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ + /* U-Boot console prompt activated, commands are not processed */ + CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, + /* Finished NICs init, reported after DRAM and NICs */ + CPU_BOOT_STATUS_NIC_FW_RDY = 11, + CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ + CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ + /* Last boot loader progress status, ready to receive commands */ + CPU_BOOT_STATUS_READY_TO_BOOT = 15, + /* Internal Boot finished, ready for boot-fit */ + CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, + /* Internal Security has been initialized, device can be accessed */ + CPU_BOOT_STATUS_SECURITY_READY = 17, + /* FW component is preparing to shutdown and communication with host is not available */ + CPU_BOOT_STATUS_FW_SHUTDOWN_PREP = 18, +}; + +enum kmd_msg { + KMD_MSG_NA = 0, + KMD_MSG_GOTO_WFE, + KMD_MSG_FIT_RDY, + KMD_MSG_SKIP_BMC, + RESERVED, + KMD_MSG_RST_DEV, + KMD_MSG_LAST +}; + +enum cpu_msg_status { + CPU_MSG_CLR = 0, + CPU_MSG_OK, + CPU_MSG_ERR, +}; + +/* communication registers mapping - consider ABI when changing */ +struct cpu_dyn_regs { + __le32 cpu_pq_base_addr_low; + __le32 cpu_pq_base_addr_high; + __le32 cpu_pq_length; + __le32 cpu_pq_init_status; + __le32 cpu_eq_base_addr_low; + __le32 cpu_eq_base_addr_high; + __le32 cpu_eq_length; + __le32 cpu_eq_ci; + __le32 cpu_cq_base_addr_low; + __le32 cpu_cq_base_addr_high; + __le32 cpu_cq_length; + __le32 cpu_pf_pq_pi; + __le32 cpu_boot_dev_sts0; + __le32 cpu_boot_dev_sts1; + __le32 cpu_boot_err0; + __le32 cpu_boot_err1; + __le32 cpu_boot_status; + __le32 fw_upd_sts; + __le32 fw_upd_cmd; + __le32 fw_upd_pending_sts; + __le32 fuse_ver_offset; + __le32 preboot_ver_offset; + __le32 uboot_ver_offset; + __le32 hw_state; + __le32 kmd_msg_to_cpu; + __le32 cpu_cmd_status_to_host; + __le32 gic_host_pi_upd_irq; + __le32 gic_tpc_qm_irq_ctrl; + __le32 gic_mme_qm_irq_ctrl; + __le32 gic_dma_qm_irq_ctrl; + __le32 gic_nic_qm_irq_ctrl; + __le32 gic_dma_core_irq_ctrl; + __le32 gic_host_halt_irq; + __le32 gic_host_ints_irq; + __le32 gic_host_soft_rst_irq; + __le32 gic_rot_qm_irq_ctrl; + __le32 cpu_rst_status; + __le32 eng_arc_irq_ctrl; + __le32 reserved1[20]; /* reserve for future use */ +}; + +/* TODO: remove the desc magic after the code is updated to use message */ +/* HCDM - Habana Communications Descriptor Magic */ +#define HL_COMMS_DESC_MAGIC 0x4843444D +#define HL_COMMS_DESC_VER 3 + +/* HCMv - Habana Communications Message + header version */ +#define HL_COMMS_MSG_MAGIC_VALUE 0x48434D00 +#define HL_COMMS_MSG_MAGIC_MASK 0xFFFFFF00 +#define HL_COMMS_MSG_MAGIC_VER_MASK 0xFF + +#define HL_COMMS_MSG_MAGIC_VER(ver) (HL_COMMS_MSG_MAGIC_VALUE | \ + ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) +#define HL_COMMS_MSG_MAGIC_V0 HL_COMMS_DESC_MAGIC +#define HL_COMMS_MSG_MAGIC_V1 HL_COMMS_MSG_MAGIC_VER(1) +#define HL_COMMS_MSG_MAGIC_V2 HL_COMMS_MSG_MAGIC_VER(2) +#define HL_COMMS_MSG_MAGIC_V3 HL_COMMS_MSG_MAGIC_VER(3) + +#define HL_COMMS_MSG_MAGIC HL_COMMS_MSG_MAGIC_V3 + +#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic) \ + (((magic) & HL_COMMS_MSG_MAGIC_MASK) == \ + HL_COMMS_MSG_MAGIC_VALUE) + +#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver) \ + (((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >= \ + ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) + +#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver) \ + (HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) && \ + HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver))) + +enum comms_msg_type { + HL_COMMS_DESC_TYPE = 0, + HL_COMMS_RESET_CAUSE_TYPE = 1, + HL_COMMS_FW_CFG_SKIP_TYPE = 2, + HL_COMMS_BINNING_CONF_TYPE = 3, +}; + +/* + * Binning information shared between LKD and FW + * @tpc_mask_l - TPC binning information lower 64 bit + * @dec_mask - Decoder binning information + * @dram_mask - DRAM binning information + * @edma_mask - EDMA binning information + * @mme_mask_l - MME binning information lower 32 + * @mme_mask_h - MME binning information upper 32 + * @rot_mask - Rotator binning information + * @xbar_mask - xBAR binning information + * @reserved - reserved field for future binning info w/o ABI change + * @tpc_mask_h - TPC binning information upper 64 bit + * @nic_mask - NIC binning information + */ +struct lkd_fw_binning_info { + __le64 tpc_mask_l; + __le32 dec_mask; + __le32 dram_mask; + __le32 edma_mask; + __le32 mme_mask_l; + __le32 mme_mask_h; + __le32 rot_mask; + __le32 xbar_mask; + __le32 reserved0; + __le64 tpc_mask_h; + __le64 nic_mask; + __le32 reserved1[8]; +}; + +/* TODO: remove this struct after the code is updated to use message */ +/* this is the comms descriptor header - meta data */ +struct comms_desc_header { + __le32 magic; /* magic for validation */ + __le32 crc32; /* CRC32 of the descriptor w/o header */ + __le16 size; /* size of the descriptor w/o header */ + __u8 version; /* descriptor version */ + __u8 reserved[5]; /* pad to 64 bit */ +}; + +/* this is the comms message header - meta data */ +struct comms_msg_header { + __le32 magic; /* magic for validation */ + __le32 crc32; /* CRC32 of the message w/o header */ + __le16 size; /* size of the message w/o header */ + __u8 version; /* message payload version */ + __u8 type; /* message type */ + __u8 reserved[4]; /* pad to 64 bit */ +}; + +enum lkd_fw_ascii_msg_lvls { + LKD_FW_ASCII_MSG_ERR = 0, + LKD_FW_ASCII_MSG_WRN = 1, + LKD_FW_ASCII_MSG_INF = 2, + LKD_FW_ASCII_MSG_DBG = 3, +}; + +#define LKD_FW_ASCII_MSG_MAX_LEN 128 +#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ + +struct lkd_fw_ascii_msg { + __u8 valid; + __u8 msg_lvl; + __u8 reserved[6]; + char msg[LKD_FW_ASCII_MSG_MAX_LEN]; +}; + +/* this is the main FW descriptor - consider ABI when changing */ +struct lkd_fw_comms_desc { + struct comms_desc_header header; + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + __le64 img_addr; /* address for next FW component load */ + struct lkd_fw_binning_info binning_info; + struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + __le32 rsvd_mem_size_mb; /* reserved memory size [MB] for FW/SVE */ + char reserved1[4]; +}; + +enum comms_reset_cause { + HL_RESET_CAUSE_UNKNOWN = 0, + HL_RESET_CAUSE_HEARTBEAT = 1, + HL_RESET_CAUSE_TDR = 2, +}; + +/* TODO: remove define after struct name is aligned on all projects */ +#define lkd_msg_comms lkd_fw_comms_msg + +/* this is the comms message descriptor */ +struct lkd_fw_comms_msg { + struct comms_msg_header header; + /* union for future expantions of new messages */ + union { + struct { + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + /* address for next FW component load */ + __le64 img_addr; + struct lkd_fw_binning_info binning_info; + struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + /* reserved memory size [MB] for FW/SVE */ + __le32 rsvd_mem_size_mb; + char reserved1[4]; + }; + struct { + __u8 reset_cause; + }; + struct { + __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ + }; + struct lkd_fw_binning_info binning_conf; + }; +}; + +/* + * LKD commands: + * + * COMMS_NOOP Used to clear the command register and no actual + * command is send. + * + * COMMS_CLR_STS Clear status command - FW should clear the + * status register. Used for synchronization + * between the commands as part of the race free + * protocol. + * + * COMMS_RST_STATE Reset the current communication state which is + * kept by FW for proper responses. + * Should be used in the beginning of the + * communication cycle to clean any leftovers from + * previous communication attempts. + * + * COMMS_PREP_DESC Prepare descriptor for setting up the + * communication and other dynamic data: + * struct lkd_fw_comms_desc. + * This command has a parameter stating the next FW + * component size, so the FW can actually prepare a + * space for it and in the status response provide + * the descriptor offset. The Offset of the next FW + * data component is a part of the descriptor + * structure. + * + * COMMS_DATA_RDY The FW data has been uploaded and is ready for + * validation. + * + * COMMS_EXEC Execute the next FW component. + * + * COMMS_RST_DEV Reset the device. + * + * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure + * devices. + * + * COMMS_SKIP_BMC Perform actions required for BMC-less servers. + * Do not wait for BMC response. + * + * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory + * space is allocated in a ELBI access only + * address range. + * + */ +enum comms_cmd { + COMMS_NOOP = 0, + COMMS_CLR_STS = 1, + COMMS_RST_STATE = 2, + COMMS_PREP_DESC = 3, + COMMS_DATA_RDY = 4, + COMMS_EXEC = 5, + COMMS_RST_DEV = 6, + COMMS_GOTO_WFE = 7, + COMMS_SKIP_BMC = 8, + COMMS_PREP_DESC_ELBI = 10, + COMMS_INVLD_LAST +}; + +#define COMMS_COMMAND_SIZE_SHIFT 0 +#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF +#define COMMS_COMMAND_CMD_SHIFT 27 +#define COMMS_COMMAND_CMD_MASK 0xF8000000 + +/* + * LKD command to FW register structure + * @size - FW component size + * @cmd - command from enum comms_cmd + */ +struct comms_command { + union { /* bit fields are only for FW use */ + struct { + u32 size :25; /* 32MB max. */ + u32 reserved :2; + enum comms_cmd cmd :5; /* 32 commands */ + }; + __le32 val; + }; +}; + +/* + * FW status + * + * COMMS_STS_NOOP Used to clear the status register and no actual + * status is provided. + * + * COMMS_STS_ACK Command has been received and recognized. + * + * COMMS_STS_OK Command execution has finished successfully. + * + * COMMS_STS_ERR Command execution was unsuccessful and resulted + * in error. + * + * COMMS_STS_VALID_ERR FW validation has failed. + * + * COMMS_STS_TIMEOUT_ERR Command execution has timed out. + */ +enum comms_sts { + COMMS_STS_NOOP = 0, + COMMS_STS_ACK = 1, + COMMS_STS_OK = 2, + COMMS_STS_ERR = 3, + COMMS_STS_VALID_ERR = 4, + COMMS_STS_TIMEOUT_ERR = 5, + COMMS_STS_INVLD_LAST +}; + +/* RAM types for FW components loading - defines the base address */ +enum comms_ram_types { + COMMS_SRAM = 0, + COMMS_DRAM = 1, +}; + +#define COMMS_STATUS_OFFSET_SHIFT 0 +#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF +#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2 +#define COMMS_STATUS_RAM_TYPE_SHIFT 26 +#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000 +#define COMMS_STATUS_STATUS_SHIFT 28 +#define COMMS_STATUS_STATUS_MASK 0xF0000000 + +/* + * FW status to LKD register structure + * @offset - an offset from the base of the ram_type shifted right by + * 2 bits (always aligned to 32 bits). + * Allows a maximum addressable offset of 256MB from RAM base. + * Example: for real offset in RAM of 0x800000 (8MB), the value + * in offset field is (0x800000 >> 2) = 0x200000. + * @ram_type - the RAM type that should be used for offset from + * enum comms_ram_types + * @status - status from enum comms_sts + */ +struct comms_status { + union { /* bit fields are only for FW use */ + struct { + u32 offset :26; + enum comms_ram_types ram_type :2; + enum comms_sts status :4; /* 16 statuses */ + }; + __le32 val; + }; +}; + +#define NAME_MAX_LEN 32 /* bytes */ +struct hl_module_data { + __u8 name[NAME_MAX_LEN]; + __u8 version[VERSION_MAX_LEN]; +}; + +/** + * struct hl_component_versions - versions associated with hl component. + * @struct_size: size of all the struct (including dynamic size of modules). + * @modules_offset: offset of the modules field in this struct. + * @component: version of the component itself. + * @fw_os: Firmware OS Version. + * @comp_name: Name of the component. + * @modules_counter: number of set bits in modules_mask. + * @reserved: reserved for future use. + * @modules: versions of the component's modules. Elborated explanation in + * struct cpucp_versions. + */ +struct hl_component_versions { + __le16 struct_size; + __le16 modules_offset; + __u8 component[VERSION_MAX_LEN]; + __u8 fw_os[VERSION_MAX_LEN]; + __u8 comp_name[NAME_MAX_LEN]; + __u8 modules_counter; + __u8 reserved[3]; + struct hl_module_data modules[]; +}; + +/* Max size of fit size */ +#define HL_FW_VERSIONS_FIT_SIZE 4096 + +#endif /* HL_BOOT_IF_H */ diff --git a/include/linux/hid-roccat.h b/include/linux/hid-roccat.h index 3214fb0815fc..753654fff07f 100644 --- a/include/linux/hid-roccat.h +++ b/include/linux/hid-roccat.h @@ -16,7 +16,7 @@ #ifdef __KERNEL__ -int roccat_connect(struct class *klass, struct hid_device *hid, +int roccat_connect(const struct class *klass, struct hid_device *hid, int report_size); void roccat_disconnect(int minor); int roccat_report_event(int minor, u8 const *data); diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 13b1e65fbdcc..6730ee900ee1 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -21,6 +21,10 @@ #define HID_USAGE_SENSOR_ALS 0x200041 #define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0 #define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1 +#define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY 0x2004d3 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X 0x2004d4 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y 0x2004d5 /* PROX (200011) */ #define HID_USAGE_SENSOR_PROX 0x200011 diff --git a/include/linux/hid.h b/include/linux/hid.h index 39e21e3815ad..b12cb1c8e682 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -341,6 +341,29 @@ struct hid_item { */ #define MAX_USBHID_BOOT_QUIRKS 4 +/** + * DOC: HID quirks + * | @HID_QUIRK_NOTOUCH: + * | @HID_QUIRK_IGNORE: ignore this device + * | @HID_QUIRK_NOGET: + * | @HID_QUIRK_HIDDEV_FORCE: + * | @HID_QUIRK_BADPAD: + * | @HID_QUIRK_MULTI_INPUT: + * | @HID_QUIRK_HIDINPUT_FORCE: + * | @HID_QUIRK_ALWAYS_POLL: + * | @HID_QUIRK_INPUT_PER_APP: + * | @HID_QUIRK_X_INVERT: + * | @HID_QUIRK_Y_INVERT: + * | @HID_QUIRK_SKIP_OUTPUT_REPORTS: + * | @HID_QUIRK_SKIP_OUTPUT_REPORT_ID: + * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP: + * | @HID_QUIRK_HAVE_SPECIAL_DRIVER: + * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE: + * | @HID_QUIRK_FULLSPEED_INTERVAL: + * | @HID_QUIRK_NO_INIT_REPORTS: + * | @HID_QUIRK_NO_IGNORE: + * | @HID_QUIRK_NO_INPUT_SYNC: + */ /* BIT(0) reserved for backward compatibility, was HID_QUIRK_INVERT */ #define HID_QUIRK_NOTOUCH BIT(1) #define HID_QUIRK_IGNORE BIT(2) @@ -360,6 +383,7 @@ struct hid_item { #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) #define HID_QUIRK_HAVE_SPECIAL_DRIVER BIT(19) #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE BIT(20) +#define HID_QUIRK_NOINVERT BIT(21) #define HID_QUIRK_FULLSPEED_INTERVAL BIT(28) #define HID_QUIRK_NO_INIT_REPORTS BIT(29) #define HID_QUIRK_NO_IGNORE BIT(30) @@ -555,9 +579,9 @@ struct hid_input { struct hid_report *report; struct input_dev *input; const char *name; - bool registered; struct list_head reports; /* the list of reports */ unsigned int application; /* application usage for this input */ + bool registered; }; enum hid_type { @@ -655,14 +679,17 @@ struct hid_device { /* device report descriptor */ struct list_head debug_list; spinlock_t debug_list_lock; wait_queue_head_t debug_wait; + struct kref ref; unsigned int id; /* system unique id */ -#ifdef CONFIG_BPF +#ifdef CONFIG_HID_BPF struct hid_bpf bpf; /* hid-bpf data */ -#endif /* CONFIG_BPF */ +#endif /* CONFIG_HID_BPF */ }; +void hiddev_free(struct kref *ref); + #define to_hid_device(pdev) \ container_of(pdev, struct hid_device, dev) @@ -809,11 +836,11 @@ struct hid_driver { void (*feature_mapping)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage); -#ifdef CONFIG_PM + int (*suspend)(struct hid_device *hdev, pm_message_t message); int (*resume)(struct hid_device *hdev); int (*reset_resume)(struct hid_device *hdev); -#endif + /* private: */ struct device_driver driver; }; @@ -885,7 +912,7 @@ extern bool hid_ignore(struct hid_device *); extern int hid_add_device(struct hid_device *); extern void hid_destroy_device(struct hid_device *); -extern struct bus_type hid_bus_type; +extern const struct bus_type hid_bus_type; extern int __must_check __hid_register_driver(struct hid_driver *, struct module *, const char *mod_name); diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index e9afb61e6ee0..7118ac28d468 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -77,17 +77,6 @@ enum hid_bpf_attach_flags { int hid_bpf_device_event(struct hid_bpf_ctx *ctx); int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); -/* Following functions are kfunc that we export to BPF programs */ -/* available everywhere in HID-BPF */ -__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); - -/* only available in syscall */ -int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); -int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, - enum hid_report_type rtype, enum hid_class_request reqtype); -struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id); -void hid_bpf_release_context(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ @@ -115,7 +104,7 @@ struct hid_bpf_ops { size_t len, enum hid_report_type rtype, enum hid_class_request reqtype); struct module *owner; - struct bus_type *bus_type; + const struct bus_type *bus_type; }; extern struct hid_bpf_ops *hid_bpf_ops; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 68da30625a6c..00341b56d291 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -440,6 +440,140 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) } /** + * memcpy_from_folio - Copy a range of bytes from a folio. + * @to: The memory to copy to. + * @folio: The folio to read from. + * @offset: The first byte in the folio to read. + * @len: The number of bytes to copy. + */ +static inline void memcpy_from_folio(char *to, struct folio *folio, + size_t offset, size_t len) +{ + VM_BUG_ON(offset + len > folio_size(folio)); + + do { + const char *from = kmap_local_folio(folio, offset); + size_t chunk = len; + + if (folio_test_highmem(folio) && + chunk > PAGE_SIZE - offset_in_page(offset)) + chunk = PAGE_SIZE - offset_in_page(offset); + memcpy(to, from, chunk); + kunmap_local(from); + + to += chunk; + offset += chunk; + len -= chunk; + } while (len > 0); +} + +/** + * memcpy_to_folio - Copy a range of bytes to a folio. + * @folio: The folio to write to. + * @offset: The first byte in the folio to store to. + * @from: The memory to copy from. + * @len: The number of bytes to copy. + */ +static inline void memcpy_to_folio(struct folio *folio, size_t offset, + const char *from, size_t len) +{ + VM_BUG_ON(offset + len > folio_size(folio)); + + do { + char *to = kmap_local_folio(folio, offset); + size_t chunk = len; + + if (folio_test_highmem(folio) && + chunk > PAGE_SIZE - offset_in_page(offset)) + chunk = PAGE_SIZE - offset_in_page(offset); + memcpy(to, from, chunk); + kunmap_local(to); + + from += chunk; + offset += chunk; + len -= chunk; + } while (len > 0); + + flush_dcache_folio(folio); +} + +/** + * folio_zero_tail - Zero the tail of a folio. + * @folio: The folio to zero. + * @offset: The byte offset in the folio to start zeroing at. + * @kaddr: The address the folio is currently mapped to. + * + * If you have already used kmap_local_folio() to map a folio, written + * some data to it and now need to zero the end of the folio (and flush + * the dcache), you can use this function. If you do not have the + * folio kmapped (eg the folio has been partially populated by DMA), + * use folio_zero_range() or folio_zero_segment() instead. + * + * Return: An address which can be passed to kunmap_local(). + */ +static inline __must_check void *folio_zero_tail(struct folio *folio, + size_t offset, void *kaddr) +{ + size_t len = folio_size(folio) - offset; + + if (folio_test_highmem(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { + memset(kaddr, 0, max); + kunmap_local(kaddr); + len -= max; + offset += max; + max = PAGE_SIZE; + kaddr = kmap_local_folio(folio, offset); + } + } + + memset(kaddr, 0, len); + flush_dcache_folio(folio); + + return kaddr; +} + +/** + * folio_fill_tail - Copy some data to a folio and pad with zeroes. + * @folio: The destination folio. + * @offset: The offset into @folio at which to start copying. + * @from: The data to copy. + * @len: How many bytes of data to copy. + * + * This function is most useful for filesystems which support inline data. + * When they want to copy data from the inode into the page cache, this + * function does everything for them. It supports large folios even on + * HIGHMEM configurations. + */ +static inline void folio_fill_tail(struct folio *folio, size_t offset, + const char *from, size_t len) +{ + char *to = kmap_local_folio(folio, offset); + + VM_BUG_ON(offset + len > folio_size(folio)); + + if (folio_test_highmem(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { + memcpy(to, from, max); + kunmap_local(to); + len -= max; + from += max; + offset += max; + max = PAGE_SIZE; + to = kmap_local_folio(folio, offset); + } + } + + memcpy(to, from, len); + to = folio_zero_tail(folio, offset + len, to + len); + kunmap_local(to); +} + +/** * memcpy_from_file_folio - Copy some bytes from a file folio. * @to: The destination buffer. * @folio: The folio to copy from. @@ -507,10 +641,24 @@ static inline void folio_zero_range(struct folio *folio, zero_user_segments(&folio->page, start, start + length, 0, 0); } -static inline void unmap_and_put_page(struct page *page, void *addr) +/** + * folio_release_kmap - Unmap a folio and drop a refcount. + * @folio: The folio to release. + * @addr: The address previously returned by a call to kmap_local_folio(). + * + * It is common, eg in directory handling to kmap a folio. This function + * unmaps the folio and drops the refcount that was being held to keep the + * folio alive while we accessed it. + */ +static inline void folio_release_kmap(struct folio *folio, void *addr) { kunmap_local(addr); - put_page(page); + folio_put(folio); +} + +static inline void unmap_and_put_page(struct page *page, void *addr) +{ + folio_release_kmap(page_folio(page), addr); } #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index a7d54d4d41fd..9d7754ad5e9b 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -43,6 +43,7 @@ #define QM_MB_CMD_CQC_BT 0x5 #define QM_MB_CMD_SQC_VFT_V2 0x6 #define QM_MB_CMD_STOP_QP 0x8 +#define QM_MB_CMD_FLUSH_QM 0x9 #define QM_MB_CMD_SRC 0xc #define QM_MB_CMD_DST 0xd @@ -104,21 +105,17 @@ enum qm_stop_reason { QM_NORMAL, QM_SOFT_RESET, - QM_FLR, + QM_DOWN, }; enum qm_state { - QM_INIT = 0, - QM_START, - QM_CLOSE, + QM_WORK = 0, QM_STOP, }; enum qp_state { - QP_INIT = 1, - QP_START, + QP_START = 1, QP_STOP, - QP_CLOSE, }; enum qm_hw_ver { @@ -144,15 +141,33 @@ enum qm_vf_state { QM_NOT_READY, }; +enum qm_misc_ctl_bits { + QM_DRIVER_REMOVING = 0x0, + QM_RST_SCHED, + QM_RESETTING, + QM_MODULE_PARAM, +}; + enum qm_cap_bits { QM_SUPPORT_DB_ISOLATION = 0x0, QM_SUPPORT_FUNC_QOS, QM_SUPPORT_STOP_QP, + QM_SUPPORT_STOP_FUNC, QM_SUPPORT_MB_COMMAND, QM_SUPPORT_SVA_PREFETCH, QM_SUPPORT_RPM, }; +struct qm_dev_alg { + u64 alg_msk; + const char *alg; +}; + +struct qm_dev_dfx { + u32 dev_state; + u32 dev_timeout; +}; + struct dfx_diff_registers { u32 *regs; u32 reg_offset; @@ -181,6 +196,7 @@ struct qm_debug { struct dentry *debug_root; struct dentry *qm_d; struct debugfs_file files[DEBUG_FILE_NUM]; + struct qm_dev_dfx dev_dfx; unsigned int *qm_last_words; /* ACC engines recoreding last regs */ unsigned int *last_words; @@ -258,6 +274,16 @@ struct hisi_qm_cap_info { u32 v3_val; }; +struct hisi_qm_cap_record { + u32 type; + u32 cap_val; +}; + +struct hisi_qm_cap_tables { + struct hisi_qm_cap_record *qm_cap_table; + struct hisi_qm_cap_record *dev_cap_table; +}; + struct hisi_qm_list { struct mutex lock; struct list_head list; @@ -269,6 +295,7 @@ struct hisi_qm_poll_data { struct hisi_qm *qm; struct work_struct work; u16 *qp_finish_id; + u16 eqe_num; }; /** @@ -285,6 +312,18 @@ struct qm_err_isolate { struct list_head qm_hw_errs; }; +struct qm_rsv_buf { + struct qm_sqc *sqc; + struct qm_cqc *cqc; + struct qm_eqc *eqc; + struct qm_aeqc *aeqc; + dma_addr_t sqc_dma; + dma_addr_t cqc_dma; + dma_addr_t eqc_dma; + dma_addr_t aeqc_dma; + struct qm_dma qcdma; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -317,6 +356,7 @@ struct hisi_qm { dma_addr_t cqc_dma; dma_addr_t eqe_dma; dma_addr_t aeqe_dma; + struct qm_rsv_buf xqc_buf; struct hisi_qm_status status; const struct hisi_qm_err_ini *err_ini; @@ -344,7 +384,6 @@ struct hisi_qm { struct work_struct rst_work; struct work_struct cmd_process; - const char *algs; bool use_sva; resource_size_t phys_base; @@ -355,6 +394,8 @@ struct hisi_qm { u32 mb_qos; u32 type_rate; struct qm_err_isolate isolate_data; + + struct hisi_qm_cap_tables cap_tables; }; struct hisi_qp_status { @@ -471,12 +512,26 @@ static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) mutex_init(&qm_list->lock); } +static inline void hisi_qm_add_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_add_tail(&qm->list, &qm_list->list); + mutex_unlock(&qm_list->lock); +} + +static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_del(&qm->list); + mutex_unlock(&qm_list->lock); +} + int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); -int hisi_qm_stop_qp(struct hisi_qp *qp); +void hisi_qm_stop_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); void hisi_qm_debug_init(struct hisi_qm *qm); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); @@ -516,8 +571,8 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); int hisi_qm_resume(struct device *dev); int hisi_qm_suspend(struct device *dev); void hisi_qm_pm_uninit(struct hisi_qm *qm); @@ -528,6 +583,8 @@ void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); u32 hisi_qm_get_hw_info(struct hisi_qm *qm, const struct hisi_qm_cap_info *info_table, u32 index, bool is_read); +int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *dev_algs, + u32 dev_algs_size); /* Used by VFIO ACC live migration driver */ struct pci_driver *hisi_sec_get_pf_driver(void); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0ee140176f10..aa1e65ccb615 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -13,16 +13,12 @@ #define _LINUX_HRTIMER_H #include <linux/hrtimer_defs.h> -#include <linux/rbtree.h> +#include <linux/hrtimer_types.h> #include <linux/init.h> #include <linux/list.h> -#include <linux/percpu.h> -#include <linux/seqlock.h> +#include <linux/percpu-defs.h> +#include <linux/rbtree.h> #include <linux/timer.h> -#include <linux/timerqueue.h> - -struct hrtimer_clock_base; -struct hrtimer_cpu_base; /* * Mode arguments of xxx_hrtimer functions: @@ -60,14 +56,6 @@ enum hrtimer_mode { }; /* - * Return values for the callback function - */ -enum hrtimer_restart { - HRTIMER_NORESTART, /* Timer is not restarted */ - HRTIMER_RESTART, /* Timer must be restarted */ -}; - -/* * Values to track state of the timer * * Possible states: @@ -95,38 +83,6 @@ enum hrtimer_restart { #define HRTIMER_STATE_ENQUEUED 0x01 /** - * struct hrtimer - the basic hrtimer structure - * @node: timerqueue node, which also manages node.expires, - * the absolute expiry time in the hrtimers internal - * representation. The time is related to the clock on - * which the timer is based. Is setup by adding - * slack to the _softexpires value. For non range timers - * identical to _softexpires. - * @_softexpires: the absolute earliest expiry time of the hrtimer. - * The time which was given as expiry time when the timer - * was armed. - * @function: timer expiry callback function - * @base: pointer to the timer base (per cpu and per clock) - * @state: state information (See bit values above) - * @is_rel: Set if the timer was armed relative - * @is_soft: Set if hrtimer will be expired in soft interrupt context. - * @is_hard: Set if hrtimer will be expired in hard interrupt context - * even on RT. - * - * The hrtimer structure must be initialized by hrtimer_init() - */ -struct hrtimer { - struct timerqueue_node node; - ktime_t _softexpires; - enum hrtimer_restart (*function)(struct hrtimer *); - struct hrtimer_clock_base *base; - u8 state; - u8 is_rel; - u8 is_soft; - u8 is_hard; -}; - -/** * struct hrtimer_sleeper - simple sleeper structure * @timer: embedded timer structure * @task: task to wake up @@ -138,105 +94,6 @@ struct hrtimer_sleeper { struct task_struct *task; }; -#ifdef CONFIG_64BIT -# define __hrtimer_clock_base_align ____cacheline_aligned -#else -# define __hrtimer_clock_base_align -#endif - -/** - * struct hrtimer_clock_base - the timer base for a specific clock - * @cpu_base: per cpu clock base - * @index: clock type index for per_cpu support when moving a - * timer to a base on another cpu. - * @clockid: clock id for per_cpu support - * @seq: seqcount around __run_hrtimer - * @running: pointer to the currently running hrtimer - * @active: red black tree root node for the active timers - * @get_time: function to retrieve the current time of the clock - * @offset: offset of this clock to the monotonic base - */ -struct hrtimer_clock_base { - struct hrtimer_cpu_base *cpu_base; - unsigned int index; - clockid_t clockid; - seqcount_raw_spinlock_t seq; - struct hrtimer *running; - struct timerqueue_head active; - ktime_t (*get_time)(void); - ktime_t offset; -} __hrtimer_clock_base_align; - -enum hrtimer_base_type { - HRTIMER_BASE_MONOTONIC, - HRTIMER_BASE_REALTIME, - HRTIMER_BASE_BOOTTIME, - HRTIMER_BASE_TAI, - HRTIMER_BASE_MONOTONIC_SOFT, - HRTIMER_BASE_REALTIME_SOFT, - HRTIMER_BASE_BOOTTIME_SOFT, - HRTIMER_BASE_TAI_SOFT, - HRTIMER_MAX_CLOCK_BASES, -}; - -/** - * struct hrtimer_cpu_base - the per cpu clock bases - * @lock: lock protecting the base and associated clock bases - * and timers - * @cpu: cpu number - * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set_seq: Sequence counter of clock was set events - * @hres_active: State of high resolution mode - * @in_hrtirq: hrtimer_interrupt() is currently executing - * @hang_detected: The last hrtimer interrupt detected a hang - * @softirq_activated: displays, if the softirq is raised - update of softirq - * related settings is not required then. - * @nr_events: Total number of hrtimer interrupt events - * @nr_retries: Total number of hrtimer interrupt retries - * @nr_hangs: Total number of hrtimer interrupt hangs - * @max_hang_time: Maximum time spent in hrtimer_interrupt - * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are - * expired - * @timer_waiters: A hrtimer_cancel() invocation waits for the timer - * callback to finish. - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue; it is the total first expiry time (hard - * and soft hrtimer are taken into account) - * @next_timer: Pointer to the first expiring timer - * @softirq_expires_next: Time to check, if soft queues needs also to be expired - * @softirq_next_timer: Pointer to the first expiring softirq based timer - * @clock_base: array of clock bases for this cpu - * - * Note: next_timer is just an optimization for __remove_hrtimer(). - * Do not dereference the pointer because it is not reliable on - * cross cpu removals. - */ -struct hrtimer_cpu_base { - raw_spinlock_t lock; - unsigned int cpu; - unsigned int active_bases; - unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1, - softirq_activated : 1; -#ifdef CONFIG_HIGH_RES_TIMERS - unsigned int nr_events; - unsigned short nr_retries; - unsigned short nr_hangs; - unsigned int max_hang_time; -#endif -#ifdef CONFIG_PREEMPT_RT - spinlock_t softirq_expiry_lock; - atomic_t timer_waiters; -#endif - ktime_t expires_next; - struct hrtimer *next_timer; - ktime_t softirq_expires_next; - struct hrtimer *softirq_next_timer; - struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -} ____cacheline_aligned; - static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = time; @@ -485,20 +342,12 @@ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); /** - * hrtimer_forward_now - forward the timer expiry so it expires after now + * hrtimer_forward_now() - forward the timer expiry so it expires after now * @timer: hrtimer to forward * @interval: the interval to forward * - * Forward the timer expiry so it will expire after the current time - * of the hrtimer clock base. Returns the number of overruns. - * - * Can be safely called from the callback function of @timer. If - * called from other contexts @timer must neither be enqueued nor - * running the callback and the caller needs to take care of - * serialization. - * - * Note: This only updates the timer expiry value and does not requeue - * the timer. + * It is a variant of hrtimer_forward(). The timer will expire after the current + * time of the hrtimer clock base. See hrtimer_forward() for details. */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) @@ -531,9 +380,9 @@ extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU -int hrtimers_dead_cpu(unsigned int cpu); +int hrtimers_cpu_dying(unsigned int cpu); #else -#define hrtimers_dead_cpu NULL +#define hrtimers_cpu_dying NULL #endif #endif diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index 2d3e3c5fb946..c3b4b7ed7c16 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -3,6 +3,8 @@ #define _LINUX_HRTIMER_DEFS_H #include <linux/ktime.h> +#include <linux/timerqueue.h> +#include <linux/seqlock.h> #ifdef CONFIG_HIGH_RES_TIMERS @@ -24,4 +26,106 @@ #endif +#ifdef CONFIG_64BIT +# define __hrtimer_clock_base_align ____cacheline_aligned +#else +# define __hrtimer_clock_base_align +#endif + +/** + * struct hrtimer_clock_base - the timer base for a specific clock + * @cpu_base: per cpu clock base + * @index: clock type index for per_cpu support when moving a + * timer to a base on another cpu. + * @clockid: clock id for per_cpu support + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer + * @active: red black tree root node for the active timers + * @get_time: function to retrieve the current time of the clock + * @offset: offset of this clock to the monotonic base + */ +struct hrtimer_clock_base { + struct hrtimer_cpu_base *cpu_base; + unsigned int index; + clockid_t clockid; + seqcount_raw_spinlock_t seq; + struct hrtimer *running; + struct timerqueue_head active; + ktime_t (*get_time)(void); + ktime_t offset; +} __hrtimer_clock_base_align; + +enum hrtimer_base_type { + HRTIMER_BASE_MONOTONIC, + HRTIMER_BASE_REALTIME, + HRTIMER_BASE_BOOTTIME, + HRTIMER_BASE_TAI, + HRTIMER_BASE_MONOTONIC_SOFT, + HRTIMER_BASE_REALTIME_SOFT, + HRTIMER_BASE_BOOTTIME_SOFT, + HRTIMER_BASE_TAI_SOFT, + HRTIMER_MAX_CLOCK_BASES, +}; + +/** + * struct hrtimer_cpu_base - the per cpu clock bases + * @lock: lock protecting the base and associated clock bases + * and timers + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events + * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired + * @online: CPU is online from an hrtimers point of view + * @timer_waiters: A hrtimer_cancel() invocation waits for the timer + * callback to finish. + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) + * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer + * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). + * Do not dereference the pointer because it is not reliable on + * cross cpu removals. + */ +struct hrtimer_cpu_base { + raw_spinlock_t lock; + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; + unsigned int hres_active : 1, + in_hrtirq : 1, + hang_detected : 1, + softirq_activated : 1, + online : 1; +#ifdef CONFIG_HIGH_RES_TIMERS + unsigned int nr_events; + unsigned short nr_retries; + unsigned short nr_hangs; + unsigned int max_hang_time; +#endif +#ifdef CONFIG_PREEMPT_RT + spinlock_t softirq_expiry_lock; + atomic_t timer_waiters; +#endif + ktime_t expires_next; + struct hrtimer *next_timer; + ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; +} ____cacheline_aligned; + + #endif diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h new file mode 100644 index 000000000000..ad66a3081735 --- /dev/null +++ b/include/linux/hrtimer_types.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HRTIMER_TYPES_H +#define _LINUX_HRTIMER_TYPES_H + +#include <linux/types.h> +#include <linux/timerqueue_types.h> + +struct hrtimer_clock_base; + +/* + * Return values for the callback function + */ +enum hrtimer_restart { + HRTIMER_NORESTART, /* Timer is not restarted */ + HRTIMER_RESTART, /* Timer must be restarted */ +}; + +/** + * struct hrtimer - the basic hrtimer structure + * @node: timerqueue node, which also manages node.expires, + * the absolute expiry time in the hrtimers internal + * representation. The time is related to the clock on + * which the timer is based. Is setup by adding + * slack to the _softexpires value. For non range timers + * identical to _softexpires. + * @_softexpires: the absolute earliest expiry time of the hrtimer. + * The time which was given as expiry time when the timer + * was armed. + * @function: timer expiry callback function + * @base: pointer to the timer base (per cpu and per clock) + * @state: state information (See bit values above) + * @is_rel: Set if the timer was armed relative + * @is_soft: Set if hrtimer will be expired in soft interrupt context. + * @is_hard: Set if hrtimer will be expired in hard interrupt context + * even on RT. + * + * The hrtimer structure must be initialized by hrtimer_init() + */ +struct hrtimer { + struct timerqueue_node node; + ktime_t _softexpires; + enum hrtimer_restart (*function)(struct hrtimer *); + struct hrtimer_clock_base *base; + u8 state; + u8 is_rel; + u8 is_soft; + u8 is_hard; +}; + +#endif /* _LINUX_HRTIMER_TYPES_H */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e718dbe928ba..de0c89105076 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -67,6 +67,26 @@ extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) +/* + * Mask of all large folio orders supported for anonymous THP; all orders up to + * and including PMD_ORDER, except order-0 (which is not "huge") and order-1 + * (which is a limitation of the THP implementation). + */ +#define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) + +/* + * Mask of all large folio orders supported for file THP. + */ +#define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) + +/* + * Mask of all large folio orders supported for THP. + */ +#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) + +#define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \ + (!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order))) + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define HPAGE_PMD_SHIFT PMD_SHIFT #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) @@ -77,45 +97,105 @@ extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) extern unsigned long transparent_hugepage_flags; +extern unsigned long huge_anon_orders_always; +extern unsigned long huge_anon_orders_madvise; +extern unsigned long huge_anon_orders_inherit; + +static inline bool hugepage_global_enabled(void) +{ + return transparent_hugepage_flags & + ((1<<TRANSPARENT_HUGEPAGE_FLAG) | + (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)); +} + +static inline bool hugepage_global_always(void) +{ + return transparent_hugepage_flags & + (1<<TRANSPARENT_HUGEPAGE_FLAG); +} + +static inline bool hugepage_flags_enabled(void) +{ + /* + * We cover both the anon and the file-backed case here; we must return + * true if globally enabled, even when all anon sizes are set to never. + * So we don't need to look at huge_anon_orders_inherit. + */ + return hugepage_global_enabled() || + huge_anon_orders_always || + huge_anon_orders_madvise; +} + +static inline int highest_order(unsigned long orders) +{ + return fls_long(orders) - 1; +} -#define hugepage_flags_enabled() \ - (transparent_hugepage_flags & \ - ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \ - (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))) -#define hugepage_flags_always() \ - (transparent_hugepage_flags & \ - (1<<TRANSPARENT_HUGEPAGE_FLAG)) +static inline int next_order(unsigned long *orders, int prev) +{ + *orders &= ~BIT(prev); + return highest_order(*orders); +} /* * Do the below checks: * - For file vma, check if the linear page offset of vma is - * HPAGE_PMD_NR aligned within the file. The hugepage is - * guaranteed to be hugepage-aligned within the file, but we must - * check that the PMD-aligned addresses in the VMA map to - * PMD-aligned offsets within the file, else the hugepage will - * not be PMD-mappable. - * - For all vmas, check if the haddr is in an aligned HPAGE_PMD_SIZE + * order-aligned within the file. The hugepage is + * guaranteed to be order-aligned within the file, but we must + * check that the order-aligned addresses in the VMA map to + * order-aligned offsets within the file, else the hugepage will + * not be mappable. + * - For all vmas, check if the haddr is in an aligned hugepage * area. */ -static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, - unsigned long addr) +static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, + unsigned long addr, int order) { + unsigned long hpage_size = PAGE_SIZE << order; unsigned long haddr; /* Don't have to check pgoff for anonymous vma */ if (!vma_is_anonymous(vma)) { if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR)) + hpage_size >> PAGE_SHIFT)) return false; } - haddr = addr & HPAGE_PMD_MASK; + haddr = ALIGN_DOWN(addr, hpage_size); - if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + if (haddr < vma->vm_start || haddr + hpage_size > vma->vm_end) return false; return true; } +/* + * Filter the bitfield of input orders to the ones suitable for use in the vma. + * See thp_vma_suitable_order(). + * All orders that pass the checks are returned as a bitfield. + */ +static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, + unsigned long addr, unsigned long orders) +{ + int order; + + /* + * Iterate over orders, highest to lowest, removing orders that don't + * meet alignment requirements from the set. Exit loop at first order + * that meets requirements, since all lower orders must also meet + * requirements. + */ + + order = highest_order(orders); + + while (orders) { + if (thp_vma_suitable_order(vma, addr, order)) + break; + order = next_order(&orders, order); + } + + return orders; +} + static inline bool file_thp_enabled(struct vm_area_struct *vma) { struct inode *inode; @@ -126,12 +206,55 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) inode = vma->vm_file->f_inode; return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) && - (vma->vm_flags & VM_EXEC) && !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } -bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, - bool smaps, bool in_pf, bool enforce_sysfs); +unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs, + unsigned long orders); + +/** + * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma + * @vma: the vm area to check + * @vm_flags: use these vm_flags instead of vma->vm_flags + * @smaps: whether answer will be used for smaps file + * @in_pf: whether answer will be used by page fault handler + * @enforce_sysfs: whether sysfs config should be taken into account + * @orders: bitfield of all orders to consider + * + * Calculates the intersection of the requested hugepage orders and the allowed + * hugepage orders for the provided vma. Permitted orders are encoded as a set + * bit at the corresponding bit position (bit-2 corresponds to order-2, bit-3 + * corresponds to order-3, etc). Order-0 is never considered a hugepage order. + * + * Return: bitfield of orders allowed for hugepage in the vma. 0 if no hugepage + * orders are allowed. + */ +static inline +unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs, + unsigned long orders) +{ + /* Optimization to check if required orders are enabled early. */ + if (enforce_sysfs && vma_is_anonymous(vma)) { + unsigned long mask = READ_ONCE(huge_anon_orders_always); + + if (vm_flags & VM_HUGEPAGE) + mask |= READ_ONCE(huge_anon_orders_madvise); + if (hugepage_global_always() || + ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled())) + mask |= READ_ONCE(huge_anon_orders_inherit); + + orders &= mask; + if (!orders) + return 0; + } + + return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, + enforce_sysfs, orders); +} #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ @@ -140,14 +263,13 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -void prep_transhuge_page(struct page *page); -void free_transhuge_page(struct page *page); - +void folio_prep_large_rmappable(struct folio *folio); bool can_split_folio(struct folio *folio, int *pextra_pins); -int split_huge_page_to_list(struct page *page, struct list_head *list); +int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order); static inline int split_huge_page(struct page *page) { - return split_huge_page_to_list(page, NULL); + return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio); @@ -269,20 +391,27 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) return false; } -static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, - unsigned long addr) +static inline bool thp_vma_suitable_order(struct vm_area_struct *vma, + unsigned long addr, int order) { return false; } -static inline bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags, bool smaps, - bool in_pf, bool enforce_sysfs) +static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, + unsigned long addr, unsigned long orders) { - return false; + return 0; +} + +static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs, + unsigned long orders) +{ + return 0; } -static inline void prep_transhuge_page(struct page *page) {} +static inline void folio_prep_large_rmappable(struct folio *folio) {} #define transparent_hugepage_flags 0UL @@ -294,7 +423,8 @@ can_split_folio(struct folio *folio, int *pextra_pins) return false; } static inline int -split_huge_page_to_list(struct page *page, struct list_head *list) +split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order) { return 0; } @@ -391,17 +521,20 @@ static inline bool thp_migration_supported(void) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline int split_folio_to_list(struct folio *folio, - struct list_head *list) +static inline int split_folio_to_list_to_order(struct folio *folio, + struct list_head *list, int new_order) { - return split_huge_page_to_list(&folio->page, list); + return split_huge_page_to_list_to_order(&folio->page, list, new_order); } -static inline int split_folio(struct folio *folio) +static inline int split_folio_to_order(struct folio *folio, int new_order) { - return split_folio_to_list(folio, NULL); + return split_folio_to_list_to_order(folio, NULL, new_order); } +#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) +#define split_folio(f) split_folio_to_order(f, 0) + /* * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to * limitations in the implementation like arm64 MTE can override this to diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ca3c8e10f24a..77b30a8c6076 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -26,9 +26,11 @@ typedef struct { unsigned long pd; } hugepd_t; #define __hugepd(x) ((hugepd_t) { (x) }) #endif +void free_huge_folio(struct folio *folio); + #ifdef CONFIG_HUGETLB_PAGE -#include <linux/mempolicy.h> +#include <linux/pagemap.h> #include <linux/shm.h> #include <asm/tlbflush.h> @@ -58,6 +60,7 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; + struct rw_semaphore rw_sema; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored @@ -131,14 +134,12 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); -long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, - struct page **, unsigned long *, unsigned long *, - long, unsigned int, int *); + unsigned long address, unsigned int flags, + unsigned int *page_mask); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); -void __unmap_hugepage_range_final(struct mmu_gather *tlb, +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags); @@ -167,7 +168,6 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void folio_putback_active_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); -void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); @@ -178,7 +178,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); extern int sysctl_hugetlb_shm_group; -extern struct list_head huge_boot_pages; +extern struct list_head huge_boot_pages[MAX_NUMNODES]; /* arch callbacks */ @@ -246,6 +246,25 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); +extern void __hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *begin, unsigned long *end); +extern void __hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details); + +static inline void hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_begin(vma, start, end); +} + +static inline void hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_end(vma, details); +} + void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); void hugetlb_vma_lock_write(struct vm_area_struct *vma); @@ -261,6 +280,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long cp_flags); bool is_hugetlb_entry_migration(pte_t pte); +bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -297,19 +317,23 @@ static inline void adjust_range_if_pmd_sharing_possible( { } -static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) +static inline void hugetlb_zap_begin( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) { - BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ } -static inline long follow_hugetlb_page(struct mm_struct *mm, - struct vm_area_struct *vma, struct page **pages, - unsigned long *position, unsigned long *nr_pages, - long i, unsigned int flags, int *nonblocking) +static inline void hugetlb_zap_end( + struct vm_area_struct *vma, + struct zap_details *details) { - BUG(); - return 0; +} + +static inline struct page *hugetlb_follow_page_mask( + struct vm_area_struct *vma, unsigned long address, unsigned int flags, + unsigned int *page_mask) +{ + BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ } static inline int copy_hugetlb_page_range(struct mm_struct *dst, @@ -450,7 +474,7 @@ static inline long hugetlb_change_protection( return 0; } -static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags) @@ -521,7 +545,6 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) } struct hugetlbfs_inode_info { - struct shared_policy policy; struct inode vfs_inode; unsigned int seals; }; @@ -725,8 +748,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); -struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, - unsigned long address); int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, @@ -808,7 +829,7 @@ static inline unsigned huge_page_shift(struct hstate *h) static inline bool hstate_is_gigantic(struct hstate *h) { - return huge_page_order(h) > MAX_ORDER; + return huge_page_order(h) > MAX_PAGE_ORDER; } static inline unsigned int pages_per_huge_page(const struct hstate *h) @@ -821,6 +842,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h) return huge_page_size(h) / 512; } +static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, + struct address_space *mapping, pgoff_t idx) +{ + return filemap_lock_folio(mapping, idx << huge_page_order(h)); +} + #include <asm/hugetlb.h> #ifndef is_hugepage_only_range @@ -851,11 +878,6 @@ static inline struct hstate *folio_hstate(struct folio *folio) return size_to_hstate(folio_size(folio)); } -static inline struct hstate *page_hstate(struct page *page) -{ - return folio_hstate(page_folio(page)); -} - static inline unsigned hstate_index_to_shift(unsigned index) { return hstates[index].order + PAGE_SHIFT; @@ -998,7 +1020,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } #endif @@ -1007,6 +1031,11 @@ void hugetlb_register_node(struct node *node); void hugetlb_unregister_node(struct node *node); #endif +/* + * Check if a given raw @page in a hugepage is HWPOISON. + */ +bool is_raw_hwpoison_page_in_hugepage(struct page *page); + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; @@ -1015,6 +1044,12 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio return NULL; } +static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, + struct address_space *mapping, pgoff_t idx) +{ + return NULL; +} + static inline int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { @@ -1035,13 +1070,6 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, return NULL; } -static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, - struct vm_area_struct *vma, - unsigned long address) -{ - return NULL; -} - static inline int __alloc_bootmem_huge_page(struct hstate *h) { return 0; @@ -1067,11 +1095,6 @@ static inline struct hstate *folio_hstate(struct folio *folio) return NULL; } -static inline struct hstate *page_hstate(struct page *page) -{ - return NULL; -} - static inline struct hstate *size_to_hstate(unsigned long size) { return NULL; @@ -1187,7 +1210,7 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, } static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { } @@ -1245,6 +1268,8 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma) return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; } +bool __vma_private_lock(struct vm_area_struct *vma); + /* * Safe version of huge_pte_offset() to check the locks. See comments * above huge_pte_offset(). diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 3d82d91f49ac..e5d64b8b59c2 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -22,13 +22,6 @@ struct resv_map; struct file_region; #ifdef CONFIG_CGROUP_HUGETLB -/* - * Minimum page order trackable by hugetlb cgroup. - * At least 3 pages are necessary for all the tracking information. - * The second tail page contains all of the hugetlb-specific fields. - */ -#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE) - enum hugetlb_memory_event { HUGETLB_MAX, HUGETLB_NR_MEMORY_EVENTS, @@ -68,8 +61,6 @@ static inline struct hugetlb_cgroup * __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) - return NULL; if (rsvd) return folio->_hugetlb_cgroup_rsvd; else @@ -91,8 +82,6 @@ static inline void __set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) - return; if (rsvd) folio->_hugetlb_cgroup_rsvd = h_cg; else diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 7fbb45911273..db199d653dd1 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -90,9 +90,6 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); -int arch_reserve_bp_slot(struct perf_event *bp); -void arch_release_bp_slot(struct perf_event *bp); -void arch_unregister_hw_breakpoint(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8a3115516a1b..136e9842120e 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -63,5 +63,6 @@ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); +extern long hwrng_yield(struct hwrng *rng); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 8cd6a6b33593..edf96f249eb5 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -141,6 +141,7 @@ enum hwmon_in_attributes { hwmon_in_rated_min, hwmon_in_rated_max, hwmon_in_beep, + hwmon_in_fault, }; #define HWMON_I_ENABLE BIT(hwmon_in_enable) @@ -162,6 +163,7 @@ enum hwmon_in_attributes { #define HWMON_I_RATED_MIN BIT(hwmon_in_rated_min) #define HWMON_I_RATED_MAX BIT(hwmon_in_rated_max) #define HWMON_I_BEEP BIT(hwmon_in_beep) +#define HWMON_I_FAULT BIT(hwmon_in_fault) enum hwmon_curr_attributes { hwmon_curr_enable, @@ -293,6 +295,8 @@ enum hwmon_humidity_attributes { hwmon_humidity_fault, hwmon_humidity_rated_min, hwmon_humidity_rated_max, + hwmon_humidity_min_alarm, + hwmon_humidity_max_alarm, }; #define HWMON_H_ENABLE BIT(hwmon_humidity_enable) @@ -306,6 +310,8 @@ enum hwmon_humidity_attributes { #define HWMON_H_FAULT BIT(hwmon_humidity_fault) #define HWMON_H_RATED_MIN BIT(hwmon_humidity_rated_min) #define HWMON_H_RATED_MAX BIT(hwmon_humidity_rated_max) +#define HWMON_H_MIN_ALARM BIT(hwmon_humidity_min_alarm) +#define HWMON_H_MAX_ALARM BIT(hwmon_humidity_max_alarm) enum hwmon_fan_attributes { hwmon_fan_enable, @@ -425,12 +431,12 @@ struct hwmon_channel_info { const u32 *config; }; -#define HWMON_CHANNEL_INFO(stype, ...) \ - (&(struct hwmon_channel_info) { \ - .type = hwmon_##stype, \ - .config = (u32 []) { \ - __VA_ARGS__, 0 \ - } \ +#define HWMON_CHANNEL_INFO(stype, ...) \ + (&(const struct hwmon_channel_info) { \ + .type = hwmon_##stype, \ + .config = (const u32 []) { \ + __VA_ARGS__, 0 \ + } \ }) /** diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 3ac3974b3c78..96ceb4095425 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { @@ -348,7 +368,7 @@ struct vmtransfer_page_packet_header { u8 sender_owns_set; u8 reserved; u32 range_cnt; - struct vmtransfer_page_range ranges[1]; + struct vmtransfer_page_range ranges[]; } __packed; struct vmgpadl_packet_header { @@ -665,8 +685,8 @@ struct vmbus_channel_initiate_contact { u64 interrupt_page; struct { u8 msg_sint; - u8 padding1[3]; - u32 padding2; + u8 msg_vtl; + u8 reserved[6]; }; }; u64 monitor_page1; @@ -812,6 +832,7 @@ struct vmbus_gpadl { u32 gpadl_handle; u32 size; void *buffer; + bool decrypted; }; struct vmbus_channel { diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h new file mode 100644 index 000000000000..4d5da161c225 --- /dev/null +++ b/include/linux/i2c-atr.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * I2C Address Translator + * + * Copyright (c) 2019,2022 Luca Ceresoli <luca@lucaceresoli.net> + * Copyright (c) 2022,2023 Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> + * + * Based on i2c-mux.h + */ + +#ifndef _LINUX_I2C_ATR_H +#define _LINUX_I2C_ATR_H + +#include <linux/i2c.h> +#include <linux/types.h> + +struct device; +struct fwnode_handle; +struct i2c_atr; + +/** + * struct i2c_atr_ops - Callbacks from ATR to the device driver. + * @attach_client: Notify the driver of a new device connected on a child + * bus, with the alias assigned to it. The driver must + * configure the hardware to use the alias. + * @detach_client: Notify the driver of a device getting disconnected. The + * driver must configure the hardware to stop using the + * alias. + * + * All these functions return 0 on success, a negative error code otherwise. + */ +struct i2c_atr_ops { + int (*attach_client)(struct i2c_atr *atr, u32 chan_id, + const struct i2c_client *client, u16 alias); + void (*detach_client)(struct i2c_atr *atr, u32 chan_id, + const struct i2c_client *client); +}; + +/** + * i2c_atr_new() - Allocate and initialize an I2C ATR helper. + * @parent: The parent (upstream) adapter + * @dev: The device acting as an ATR + * @ops: Driver-specific callbacks + * @max_adapters: Maximum number of child adapters + * + * The new ATR helper is connected to the parent adapter but has no child + * adapters. Call i2c_atr_add_adapter() to add some. + * + * Call i2c_atr_delete() to remove. + * + * Return: pointer to the new ATR helper object, or ERR_PTR + */ +struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev, + const struct i2c_atr_ops *ops, int max_adapters); + +/** + * i2c_atr_delete - Delete an I2C ATR helper. + * @atr: I2C ATR helper to be deleted. + * + * Precondition: all the adapters added with i2c_atr_add_adapter() must be + * removed by calling i2c_atr_del_adapter(). + */ +void i2c_atr_delete(struct i2c_atr *atr); + +/** + * i2c_atr_add_adapter - Create a child ("downstream") I2C bus. + * @atr: The I2C ATR + * @chan_id: Index of the new adapter (0 .. max_adapters-1). This value is + * passed to the callbacks in `struct i2c_atr_ops`. + * @adapter_parent: The device used as the parent of the new i2c adapter, or NULL + * to use the i2c-atr device as the parent. + * @bus_handle: The fwnode handle that points to the adapter's i2c + * peripherals, or NULL. + * + * After calling this function a new i2c bus will appear. Adding and removing + * devices on the downstream bus will result in calls to the + * &i2c_atr_ops->attach_client and &i2c_atr_ops->detach_client callbacks for the + * driver to assign an alias to the device. + * + * The adapter's fwnode is set to @bus_handle, or if @bus_handle is NULL the + * function looks for a child node whose 'reg' property matches the chan_id + * under the i2c-atr device's 'i2c-atr' node. + * + * Call i2c_atr_del_adapter() to remove the adapter. + * + * Return: 0 on success, a negative error code otherwise. + */ +int i2c_atr_add_adapter(struct i2c_atr *atr, u32 chan_id, + struct device *adapter_parent, + struct fwnode_handle *bus_handle); + +/** + * i2c_atr_del_adapter - Remove a child ("downstream") I2C bus added by + * i2c_atr_add_adapter(). If no I2C bus has been added + * this function is a no-op. + * @atr: The I2C ATR + * @chan_id: Index of the adapter to be removed (0 .. max_adapters-1) + */ +void i2c_atr_del_adapter(struct i2c_atr *atr, u32 chan_id); + +/** + * i2c_atr_set_driver_data - Set private driver data to the i2c-atr instance. + * @atr: The I2C ATR + * @data: Pointer to the data to store + */ +void i2c_atr_set_driver_data(struct i2c_atr *atr, void *data); + +/** + * i2c_atr_get_driver_data - Get the stored drive data. + * @atr: The I2C ATR + * + * Return: Pointer to the stored data + */ +void *i2c_atr_get_driver_data(struct i2c_atr *atr); + +#endif /* _LINUX_I2C_ATR_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3430cc2b05a6..5e6cd43a6dbd 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -23,9 +23,9 @@ #include <linux/swab.h> /* for swab16 */ #include <uapi/linux/i2c.h> -extern struct bus_type i2c_bus_type; -extern struct device_type i2c_adapter_type; -extern struct device_type i2c_client_type; +extern const struct bus_type i2c_bus_type; +extern const struct device_type i2c_adapter_type; +extern const struct device_type i2c_client_type; /* --- General options ------------------------------------------------ */ @@ -237,7 +237,6 @@ enum i2c_driver_flags { * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @probe: Callback for device binding - * @probe_new: Transitional callback for device binding - do not use * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol @@ -272,16 +271,8 @@ enum i2c_driver_flags { struct i2c_driver { unsigned int class; - union { /* Standard driver model interfaces */ - int (*probe)(struct i2c_client *client); - /* - * Legacy callback that was part of a conversion of .probe(). - * Today it has the same semantic as .probe(). Don't use for new - * code. - */ - int (*probe_new)(struct i2c_client *client); - }; + int (*probe)(struct i2c_client *client); void (*remove)(struct i2c_client *client); @@ -755,6 +746,8 @@ struct i2c_adapter { struct irq_domain *host_notify_domain; struct regulator *bus_regulator; + + struct dentry *debugfs; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) @@ -859,7 +852,6 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) /* i2c adapter classes (bitmask) */ #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ -#define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ /* Warn users that the adapter doesn't support classes anymore */ #define I2C_CLASS_DEPRECATED (1<<8) @@ -939,7 +931,7 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap) static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) { - return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0); + return (msg->addr << 1) | (msg->flags & I2C_M_RD); } u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 90fa83464f00..e119f11948ef 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -54,6 +54,7 @@ enum i3c_hdr_mode { * struct i3c_priv_xfer - I3C SDR private transfer * @rnw: encodes the transfer direction. true for a read, false for a write * @len: transfer length in bytes of the transfer + * @actual_len: actual length in bytes are transferred by the controller * @data: input/output buffer * @data.in: input buffer. Must point to a DMA-able buffer * @data.out: output buffer. Must point to a DMA-able buffer @@ -62,6 +63,7 @@ enum i3c_hdr_mode { struct i3c_priv_xfer { u8 rnw; u16 len; + u16 actual_len; union { void *in; const void *out; @@ -96,7 +98,7 @@ enum i3c_dcr { /** * struct i3c_device_info - I3C device information - * @pid: Provisional ID + * @pid: Provisioned ID * @bcr: Bus Characteristic Register * @dcr: Device Characteristic Register * @static_addr: static/I2C address diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0b52da4f2346..0ca27dd86956 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -24,6 +24,12 @@ struct i2c_client; +/* notifier actions. notifier call data is the struct i3c_bus */ +enum { + I3C_NOTIFY_BUS_ADD, + I3C_NOTIFY_BUS_REMOVE, +}; + struct i3c_master_controller; struct i3c_bus; struct i3c_device; @@ -70,7 +76,6 @@ struct i2c_dev_boardinfo { /** * struct i2c_dev_desc - I2C device descriptor * @common: common part of the I2C device descriptor - * @boardinfo: pointer to the boardinfo attached to this I2C device * @dev: I2C device object registered to the I2C framework * @addr: I2C device address * @lvr: LVR (Legacy Virtual Register) needed by the I3C core to know about @@ -129,6 +134,7 @@ struct i3c_ibi_slot { * rejected by the master * @num_slots: number of IBI slots reserved for this device * @enabled: reflect the IBI status + * @wq: workqueue used to execute IBI handlers. * @handler: IBI handler specified at i3c_device_request_ibi() call time. This * handler will be called from the controller workqueue, and as such * is allowed to sleep (though it is recommended to process the IBI @@ -151,6 +157,7 @@ struct i3c_device_ibi_info { unsigned int max_payload_len; unsigned int num_slots; unsigned int enabled; + struct workqueue_struct *wq; void (*handler)(struct i3c_device *dev, const struct i3c_ibi_payload *payload); }; @@ -166,7 +173,7 @@ struct i3c_device_ibi_info { * assigned a dynamic address by the master. Will be used during * bus initialization to assign it a specific dynamic address * before starting DAA (Dynamic Address Assignment) - * @pid: I3C Provisional ID exposed by the device. This is a unique identifier + * @pid: I3C Provisioned ID exposed by the device. This is a unique identifier * that may be used to attach boardinfo to i3c_dev_desc when the device * does not have a static address * @of_node: optional DT node in case the device has been described in the DT @@ -426,6 +433,8 @@ struct i3c_bus { * for a future IBI * This method is mandatory only if ->request_ibi is not * NULL. + * @enable_hotjoin: enable hot join event detect. + * @disable_hotjoin: disable hot join event detect. */ struct i3c_master_controller_ops { int (*bus_init)(struct i3c_master_controller *master); @@ -452,6 +461,8 @@ struct i3c_master_controller_ops { int (*disable_ibi)(struct i3c_dev_desc *dev); void (*recycle_ibi_slot)(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot); + int (*enable_hotjoin)(struct i3c_master_controller *master); + int (*disable_hotjoin)(struct i3c_master_controller *master); }; /** @@ -465,11 +476,12 @@ struct i3c_master_controller_ops { * @ops: master operations. See &struct i3c_master_controller_ops * @secondary: true if the master is a secondary master * @init_done: true when the bus initialization is done + * @hotjoin: true if the master support hotjoin * @boardinfo.i3c: list of I3C boardinfo objects * @boardinfo.i2c: list of I2C boardinfo objects * @boardinfo: board-level information attached to devices connected on the bus * @bus: I3C bus exposed by this master - * @wq: workqueue used to execute IBI handlers. Can also be used by master + * @wq: workqueue which can be used by master * drivers if they need to postpone operations that need to take place * in a thread context. Typical examples are Hot Join processing which * requires taking the bus lock in maintenance, which in turn, can only @@ -487,6 +499,7 @@ struct i3c_master_controller { const struct i3c_master_controller_ops *ops; unsigned int secondary : 1; unsigned int init_done : 1; + unsigned int hotjoin: 1; struct { struct list_head i3c; struct list_head i2c; @@ -543,6 +556,8 @@ int i3c_master_register(struct i3c_master_controller *master, const struct i3c_master_controller_ops *ops, bool secondary); void i3c_master_unregister(struct i3c_master_controller *master); +int i3c_master_enable_hotjoin(struct i3c_master_controller *master); +int i3c_master_disable_hotjoin(struct i3c_master_controller *master); /** * i3c_dev_get_master_data() - get master private data attached to an I3C @@ -652,4 +667,9 @@ void i3c_master_queue_ibi(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot); struct i3c_ibi_slot *i3c_master_get_free_ibi_slot(struct i3c_dev_desc *dev); +void i3c_for_each_bus_locked(int (*fn)(struct i3c_bus *bus, void *data), + void *data); +int i3c_register_notifier(struct notifier_block *nb); +int i3c_unregister_notifier(struct notifier_block *nb); + #endif /* I3C_MASTER_H */ diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index db0f4fcfdaf4..e3b3b0fa2a8f 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -85,12 +85,10 @@ extern void icmpv6_param_prob_reason(struct sk_buff *skb, struct flowi6; struct in6_addr; -extern void icmpv6_flow_init(struct sock *sk, - struct flowi6 *fl6, - u8 type, - const struct in6_addr *saddr, - const struct in6_addr *daddr, - int oif); + +void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type, + const struct in6_addr *saddr, + const struct in6_addr *daddr, int oif); static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { diff --git a/include/linux/idr.h b/include/linux/idr.h index a0dce14090a9..da5f5fa4a3a6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -200,7 +200,7 @@ static inline void idr_preload_end(void) */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** @@ -224,10 +224,12 @@ static inline void idr_preload_end(void) * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. + * After normal termination @entry is left with the value NULL. This + * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ - tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \ + ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 4b998090898e..3385a2cc5b09 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2023 Intel Corporation + * Copyright (c) 2018 - 2024 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -172,11 +172,11 @@ #define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) -/* PV1 Layout 11ah 9.8.3.1 */ +/* PV1 Layout IEEE 802.11-2020 9.8.3.1 */ #define IEEE80211_PV1_FCTL_VERS 0x0003 #define IEEE80211_PV1_FCTL_FTYPE 0x001c #define IEEE80211_PV1_FCTL_STYPE 0x00e0 -#define IEEE80211_PV1_FCTL_TODS 0x0100 +#define IEEE80211_PV1_FCTL_FROMDS 0x0100 #define IEEE80211_PV1_FCTL_MOREFRAGS 0x0200 #define IEEE80211_PV1_FCTL_PM 0x0400 #define IEEE80211_PV1_FCTL_MOREDATA 0x0800 @@ -191,6 +191,11 @@ static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } +static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2) +{ + return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1); +} + static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) { return (sn1 + sn2) & IEEE80211_SN_MASK; @@ -307,6 +312,13 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_TRIGGER_TYPE_BQRP 0x6 #define IEEE80211_TRIGGER_TYPE_NFRP 0x7 +/* UL-bandwidth within common_info of trigger frame */ +#define IEEE80211_TRIGGER_ULBW_MASK 0xc0000 +#define IEEE80211_TRIGGER_ULBW_20MHZ 0x0 +#define IEEE80211_TRIGGER_ULBW_40MHZ 0x1 +#define IEEE80211_TRIGGER_ULBW_80MHZ 0x2 +#define IEEE80211_TRIGGER_ULBW_160_80P80MHZ 0x3 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; @@ -801,6 +813,11 @@ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); } +static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr) +{ + return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; @@ -836,9 +853,14 @@ enum ieee80211_preq_target_flags { }; /** - * struct ieee80211_quiet_ie + * struct ieee80211_quiet_ie - Quiet element + * @count: Quiet Count + * @period: Quiet Period + * @duration: Quiet Duration + * @offset: Quiet Offset * - * This structure refers to "Quiet information element" + * This structure represents the payload of the "Quiet element" as + * described in IEEE Std 802.11-2020 section 9.4.2.22. */ struct ieee80211_quiet_ie { u8 count; @@ -848,9 +870,15 @@ struct ieee80211_quiet_ie { } __packed; /** - * struct ieee80211_msrment_ie + * struct ieee80211_msrment_ie - Measurement element + * @token: Measurement Token + * @mode: Measurement Report Mode + * @type: Measurement Type + * @request: Measurement Request or Measurement Report * - * This structure refers to "Measurement Request/Report information element" + * This structure represents the payload of both the "Measurement + * Request element" and the "Measurement Report element" as described + * in IEEE Std 802.11-2020 sections 9.4.2.20 and 9.4.2.21. */ struct ieee80211_msrment_ie { u8 token; @@ -860,9 +888,14 @@ struct ieee80211_msrment_ie { } __packed; /** - * struct ieee80211_channel_sw_ie + * struct ieee80211_channel_sw_ie - Channel Switch Announcement element + * @mode: Channel Switch Mode + * @new_ch_num: New Channel Number + * @count: Channel Switch Count * - * This structure refers to "Channel Switch Announcement information element" + * This structure represents the payload of the "Channel Switch + * Announcement element" as described in IEEE Std 802.11-2020 section + * 9.4.2.18. */ struct ieee80211_channel_sw_ie { u8 mode; @@ -871,9 +904,14 @@ struct ieee80211_channel_sw_ie { } __packed; /** - * struct ieee80211_ext_chansw_ie + * struct ieee80211_ext_chansw_ie - Extended Channel Switch Announcement element + * @mode: Channel Switch Mode + * @new_operating_class: New Operating Class + * @new_ch_num: New Channel Number + * @count: Channel Switch Count * - * This structure represents the "Extended Channel Switch Announcement element" + * This structure represents the "Extended Channel Switch Announcement + * element" as described in IEEE Std 802.11-2020 section 9.4.2.52. */ struct ieee80211_ext_chansw_ie { u8 mode; @@ -894,8 +932,14 @@ struct ieee80211_sec_chan_offs_ie { /** * struct ieee80211_mesh_chansw_params_ie - mesh channel switch parameters IE + * @mesh_ttl: Time To Live + * @mesh_flags: Flags + * @mesh_reason: Reason Code + * @mesh_pre_value: Precedence Value * - * This structure represents the "Mesh Channel Switch Paramters element" + * This structure represents the payload of the "Mesh Channel Switch + * Parameters element" as described in IEEE Std 802.11-2020 section + * 9.4.2.102. */ struct ieee80211_mesh_chansw_params_ie { u8 mesh_ttl; @@ -906,6 +950,13 @@ struct ieee80211_mesh_chansw_params_ie { /** * struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE + * @new_channel_width: New Channel Width + * @new_center_freq_seg0: New Channel Center Frequency Segment 0 + * @new_center_freq_seg1: New Channel Center Frequency Segment 1 + * + * This structure represents the payload of the "Wide Bandwidth + * Channel Switch element" as described in IEEE Std 802.11-2020 + * section 9.4.2.160. */ struct ieee80211_wide_bw_chansw_ie { u8 new_channel_width; @@ -913,22 +964,42 @@ struct ieee80211_wide_bw_chansw_ie { } __packed; /** - * struct ieee80211_tim + * struct ieee80211_tim_ie - Traffic Indication Map information element + * @dtim_count: DTIM Count + * @dtim_period: DTIM Period + * @bitmap_ctrl: Bitmap Control + * @required_octet: "Syntatic sugar" to force the struct size to the + * minimum valid size when carried in a non-S1G PPDU + * @virtual_map: Partial Virtual Bitmap * - * This structure refers to "Traffic Indication Map information element" + * This structure represents the payload of the "TIM element" as + * described in IEEE Std 802.11-2020 section 9.4.2.5. Note that this + * definition is only applicable when the element is carried in a + * non-S1G PPDU. When the TIM is carried in an S1G PPDU, the Bitmap + * Control and Partial Virtual Bitmap may not be present. */ struct ieee80211_tim_ie { u8 dtim_count; u8 dtim_period; u8 bitmap_ctrl; - /* variable size: 1 - 251 bytes */ - u8 virtual_map[1]; + union { + u8 required_octet; + DECLARE_FLEX_ARRAY(u8, virtual_map); + }; } __packed; /** - * struct ieee80211_meshconf_ie + * struct ieee80211_meshconf_ie - Mesh Configuration element + * @meshconf_psel: Active Path Selection Protocol Identifier + * @meshconf_pmetric: Active Path Selection Metric Identifier + * @meshconf_congest: Congestion Control Mode Identifier + * @meshconf_synch: Synchronization Method Identifier + * @meshconf_auth: Authentication Protocol Identifier + * @meshconf_form: Mesh Formation Info + * @meshconf_cap: Mesh Capability (see &enum mesh_config_capab_flags) * - * This structure refers to "Mesh Configuration information element" + * This structure represents the payload of the "Mesh Configuration + * element" as described in IEEE Std 802.11-2020 section 9.4.2.97. */ struct ieee80211_meshconf_ie { u8 meshconf_psel; @@ -950,6 +1021,9 @@ struct ieee80211_meshconf_ie { * is ongoing * @IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL: STA is in deep sleep mode or has * neighbors in deep sleep mode + * + * Enumerates the "Mesh Capability" as described in IEEE Std + * 802.11-2020 section 9.4.2.97.7. */ enum mesh_config_capab_flags { IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS = 0x01, @@ -960,7 +1034,7 @@ enum mesh_config_capab_flags { #define IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE 0x1 -/** +/* * mesh channel switch parameters element's flag indicator * */ @@ -969,9 +1043,17 @@ enum mesh_config_capab_flags { #define WLAN_EID_CHAN_SWITCH_PARAM_REASON BIT(2) /** - * struct ieee80211_rann_ie + * struct ieee80211_rann_ie - RANN (root announcement) element + * @rann_flags: Flags + * @rann_hopcount: Hop Count + * @rann_ttl: Element TTL + * @rann_addr: Root Mesh STA Address + * @rann_seq: HWMP Sequence Number + * @rann_interval: Interval + * @rann_metric: Metric * - * This structure refers to "Root Announcement information element" + * This structure represents the payload of the "RANN element" as + * described in IEEE Std 802.11-2020 section 9.4.2.111. */ struct ieee80211_rann_ie { u8 rann_flags; @@ -993,7 +1075,7 @@ enum ieee80211_ht_chanwidth_values { }; /** - * enum ieee80211_opmode_bits - VHT operating mode field bits + * enum ieee80211_vht_opmode_bits - VHT operating mode field bits * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width @@ -1042,9 +1124,12 @@ enum ieee80211_s1g_chanwidth { #define WLAN_USER_POSITION_LEN 16 /** - * struct ieee80211_tpc_report_ie + * struct ieee80211_tpc_report_ie - TPC Report element + * @tx_power: Transmit Power + * @link_margin: Link Margin * - * This structure refers to "TPC Report element" + * This structure represents the payload of the "TPC Report element" as + * described in IEEE Std 802.11-2020 section 9.4.2.16. */ struct ieee80211_tpc_report_ie { u8 tx_power; @@ -1062,9 +1147,14 @@ struct ieee80211_addba_ext_ie { } __packed; /** - * struct ieee80211_s1g_bcn_compat_ie + * struct ieee80211_s1g_bcn_compat_ie - S1G Beacon Compatibility element + * @compat_info: Compatibility Information + * @beacon_int: Beacon Interval + * @tsf_completion: TSF Completion * - * S1G Beacon Compatibility element + * This structure represents the payload of the "S1G Beacon + * Compatibility element" as described in IEEE Std 802.11-2020 section + * 9.4.2.196. */ struct ieee80211_s1g_bcn_compat_ie { __le16 compat_info; @@ -1073,9 +1163,15 @@ struct ieee80211_s1g_bcn_compat_ie { } __packed; /** - * struct ieee80211_s1g_oper_ie + * struct ieee80211_s1g_oper_ie - S1G Operation element + * @ch_width: S1G Operation Information Channel Width + * @oper_class: S1G Operation Information Operating Class + * @primary_ch: S1G Operation Information Primary Channel Number + * @oper_ch: S1G Operation Information Channel Center Frequency + * @basic_mcs_nss: Basic S1G-MCS and NSS Set * - * S1G Operation element + * This structure represents the payload of the "S1G Operation + * element" as described in IEEE Std 802.11-2020 section 9.4.2.212. */ struct ieee80211_s1g_oper_ie { u8 ch_width; @@ -1086,9 +1182,13 @@ struct ieee80211_s1g_oper_ie { } __packed; /** - * struct ieee80211_aid_response_ie + * struct ieee80211_aid_response_ie - AID Response element + * @aid: AID/Group AID + * @switch_count: AID Switch Count + * @response_int: AID Response Interval * - * AID Response element + * This structure represents the payload of the "AID Response element" + * as described in IEEE Std 802.11-2020 section 9.4.2.194. */ struct ieee80211_aid_response_ie { __le16 aid; @@ -1163,6 +1263,30 @@ struct ieee80211_twt_setup { u8 params[]; } __packed; +#define IEEE80211_TTLM_MAX_CNT 2 +#define IEEE80211_TTLM_CONTROL_DIRECTION 0x03 +#define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04 +#define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08 +#define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10 +#define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20 + +#define IEEE80211_TTLM_DIRECTION_DOWN 0 +#define IEEE80211_TTLM_DIRECTION_UP 1 +#define IEEE80211_TTLM_DIRECTION_BOTH 2 + +/** + * struct ieee80211_ttlm_elem - TID-To-Link Mapping element + * + * Defined in section 9.4.2.314 in P802.11be_D4 + * + * @control: the first part of control field + * @optional: the second part of control field + */ +struct ieee80211_ttlm_elem { + u8 control; + u8 optional[]; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -1340,6 +1464,20 @@ struct ieee80211_mgmt { u8 max_tod_error; u8 max_toa_error; } __packed wnm_timing_msr; + struct { + u8 action_code; + u8 dialog_token; + u8 variable[]; + } __packed ttlm_req; + struct { + u8 action_code; + u8 dialog_token; + u8 status_code; + u8 variable[]; + } __packed ttlm_res; + struct { + u8 action_code; + } __packed ttlm_tear_down; } u; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ @@ -1489,7 +1627,7 @@ struct ieee80211_tdls_data { /* * Peer-to-Peer IE attribute related definitions. */ -/** +/* * enum ieee80211_p2p_attr_id - identifies type of peer-to-peer attribute. */ enum ieee80211_p2p_attr_id { @@ -1539,11 +1677,17 @@ struct ieee80211_p2p_noa_attr { #define IEEE80211_P2P_OPPPS_CTWINDOW_MASK 0x7F /** - * struct ieee80211_bar - HT Block Ack Request + * struct ieee80211_bar - Block Ack Request frame format + * @frame_control: Frame Control + * @duration: Duration + * @ra: RA + * @ta: TA + * @control: BAR Control + * @start_seq_num: Starting Sequence Number (see Figure 9-37) * - * This structure refers to "HT BlockAckReq" as - * described in 802.11n draft section 7.2.1.7.1 - */ + * This structure represents the "BlockAckReq frame format" + * as described in IEEE Std 802.11-2020 section 9.3.1.7. +*/ struct ieee80211_bar { __le16 frame_control; __le16 duration; @@ -1563,13 +1707,17 @@ struct ieee80211_bar { #define IEEE80211_HT_MCS_MASK_LEN 10 /** - * struct ieee80211_mcs_info - MCS information + * struct ieee80211_mcs_info - Supported MCS Set field * @rx_mask: RX mask * @rx_highest: highest supported RX rate. If set represents * the highest supported RX data rate in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest RX data rate supported. * @tx_params: TX parameters + * @reserved: Reserved bits + * + * This structure represents the "Supported MCS Set field" as + * described in IEEE Std 802.11-2020 section 9.4.2.55.4. */ struct ieee80211_mcs_info { u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN]; @@ -1588,6 +1736,8 @@ struct ieee80211_mcs_info { #define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 #define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 +#define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3))) + /* * 802.11n D5.0 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream @@ -1600,10 +1750,16 @@ struct ieee80211_mcs_info { (IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8) /** - * struct ieee80211_ht_cap - HT capabilities + * struct ieee80211_ht_cap - HT capabilities element + * @cap_info: HT Capability Information + * @ampdu_params_info: A-MPDU Parameters + * @mcs: Supported MCS Set + * @extended_ht_cap_info: HT Extended Capabilities + * @tx_BF_cap_info: Transmit Beamforming Capabilities + * @antenna_selection_info: ASEL Capability * - * This structure is the "HT capabilities element" as - * described in 802.11n D5.0 7.3.2.57 + * This structure represents the payload of the "HT Capabilities + * element" as described in IEEE Std 802.11-2020 section 9.4.2.55. */ struct ieee80211_ht_cap { __le16 cap_info; @@ -1691,9 +1847,14 @@ enum ieee80211_min_mpdu_spacing { /** * struct ieee80211_ht_operation - HT operation IE + * @primary_chan: Primary Channel + * @ht_param: HT Operation Information parameters + * @operation_mode: HT Operation Information operation mode + * @stbc_param: HT Operation Information STBC params + * @basic_set: Basic HT-MCS Set * - * This structure is the "HT operation element" as - * described in 802.11n-2009 7.3.2.57 + * This structure represents the payload of the "HT Operation + * element" as described in IEEE Std 802.11-2020 section 9.4.2.56. */ struct ieee80211_ht_operation { u8 primary_chan; @@ -1862,9 +2023,12 @@ struct ieee80211_vht_operation { /** * struct ieee80211_he_cap_elem - HE capabilities element + * @mac_cap_info: HE MAC Capabilities Information + * @phy_cap_info: HE PHY Capabilities Information * - * This structure is the "HE capabilities element" fixed fields as - * described in P802.11ax_D4.0 section 9.4.2.242.2 and 9.4.2.242.3 + * This structure represents the fixed fields of the payload of the + * "HE capabilities element" as described in IEEE Std 802.11ax-2021 + * sections 9.4.2.248.2 and 9.4.2.248.3. */ struct ieee80211_he_cap_elem { u8 mac_cap_info[6]; @@ -1923,35 +2087,45 @@ struct ieee80211_he_mcs_nss_supp { } __packed; /** - * struct ieee80211_he_operation - HE capabilities element + * struct ieee80211_he_operation - HE Operation element + * @he_oper_params: HE Operation Parameters + BSS Color Information + * @he_mcs_nss_set: Basic HE-MCS And NSS Set + * @optional: Optional fields VHT Operation Information, Max Co-Hosted + * BSSID Indicator, and 6 GHz Operation Information * - * This structure is the "HE operation element" fields as - * described in P802.11ax_D4.0 section 9.4.2.243 + * This structure represents the payload of the "HE Operation + * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.249. */ struct ieee80211_he_operation { __le32 he_oper_params; __le16 he_mcs_nss_set; - /* Optional 0,1,3,4,5,7 or 8 bytes: depends on @he_oper_params */ u8 optional[]; } __packed; /** - * struct ieee80211_he_spr - HE spatial reuse element + * struct ieee80211_he_spr - Spatial Reuse Parameter Set element + * @he_sr_control: SR Control + * @optional: Optional fields Non-SRG OBSS PD Max Offset, SRG OBSS PD + * Min Offset, SRG OBSS PD Max Offset, SRG BSS Color + * Bitmap, and SRG Partial BSSID Bitmap * - * This structure is the "HE spatial reuse element" element as - * described in P802.11ax_D4.0 section 9.4.2.241 + * This structure represents the payload of the "Spatial Reuse + * Parameter Set element" as described in IEEE Std 802.11ax-2021 + * section 9.4.2.252. */ struct ieee80211_he_spr { u8 he_sr_control; - /* Optional 0 to 19 bytes: depends on @he_sr_control */ u8 optional[]; } __packed; /** * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field + * @aifsn: ACI/AIFSN + * @ecw_min_max: ECWmin/ECWmax + * @mu_edca_timer: MU EDCA Timer * - * This structure is the "MU AC Parameter Record" fields as - * described in P802.11ax_D4.0 section 9.4.2.245 + * This structure represents the "MU AC Parameter Record" as described + * in IEEE Std 802.11ax-2021 section 9.4.2.251, Figure 9-788p. */ struct ieee80211_he_mu_edca_param_ac_rec { u8 aifsn; @@ -1961,9 +2135,14 @@ struct ieee80211_he_mu_edca_param_ac_rec { /** * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element + * @mu_qos_info: QoS Info + * @ac_be: MU AC_BE Parameter Record + * @ac_bk: MU AC_BK Parameter Record + * @ac_vi: MU AC_VI Parameter Record + * @ac_vo: MU AC_VO Parameter Record * - * This structure is the "MU EDCA Parameter Set element" fields as - * described in P802.11ax_D4.0 section 9.4.2.245 + * This structure represents the payload of the "MU EDCA Parameter Set + * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.251. */ struct ieee80211_mu_edca_param_set { u8 mu_qos_info; @@ -2177,9 +2356,9 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, * enum ieee80211_ap_reg_power - regulatory power for a Access Point * * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode - * @IEEE80211_REG_LPI: Indoor Access Point - * @IEEE80211_REG_SP: Standard power Access Point - * @IEEE80211_REG_VLP: Very low power Access Point + * @IEEE80211_REG_LPI_AP: Indoor Access Point + * @IEEE80211_REG_SP_AP: Standard power Access Point + * @IEEE80211_REG_VLP_AP: Very low power Access Point * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal * @IEEE80211_REG_AP_POWER_MAX: maximum value */ @@ -2565,9 +2744,10 @@ static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) #define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 #define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 +#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 /** - * ieee80211_he_6ghz_oper - HE 6 GHz operation Information field + * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field * @primary: primary channel * @control: control flags * @ccfs0: channel center frequency segment 0 @@ -2614,9 +2794,13 @@ enum ieee80211_tx_power_intrpt_type { }; /** - * struct ieee80211_tx_pwr_env + * struct ieee80211_tx_pwr_env - Transmit Power Envelope + * @tx_power_info: Transmit Power Information field + * @tx_power: Maximum Transmit Power field * - * This structure represents the "Transmit Power Envelope element" + * This structure represents the payload of the "Transmit Power + * Envelope element" as described in IEEE Std 802.11ax-2021 section + * 9.4.2.161 */ struct ieee80211_tx_pwr_env { u8 tx_power_info; @@ -2671,12 +2855,14 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) static inline const struct ieee80211_he_6ghz_oper * ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) { - const u8 *ret = (const void *)&he_oper->optional; + const u8 *ret; u32 he_oper_params; if (!he_oper) return NULL; + ret = (const void *)&he_oper->optional; + he_oper_params = le32_to_cpu(he_oper->he_oper_params); if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)) @@ -2874,6 +3060,9 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 @@ -3013,6 +3202,44 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +/* must validate ieee80211_eht_oper_size_ok() first */ +static inline u16 +ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper) +{ + const struct ieee80211_eht_operation_info *info = + (const void *)eht_oper->optional; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) + return 0; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) + return 0; + + return get_unaligned_le16(info->optional); +} + +#define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) + +struct ieee80211_bandwidth_indication { + u8 params; + struct ieee80211_eht_operation_info info; +} __packed; + +static inline bool +ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_bandwidth_indication *bwi = (const void *)data; + + if (len < sizeof(*bwi)) + return false; + + if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT && + len < sizeof(*bwi) + 2) + return false; + + return true; +} + #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) @@ -3173,6 +3400,8 @@ enum ieee80211_statuscode { WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126, WLAN_STATUS_SAE_PK = 127, + WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133, + WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134, }; @@ -3470,6 +3699,8 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, + WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; /* Action category code */ @@ -3496,6 +3727,7 @@ enum ieee80211_category { WLAN_CATEGORY_UNPROT_DMG = 20, WLAN_CATEGORY_VHT = 21, WLAN_CATEGORY_S1G = 22, + WLAN_CATEGORY_PROTECTED_EHT = 37, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; @@ -3559,6 +3791,13 @@ enum ieee80211_unprotected_wnm_actioncode { WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, }; +/* Protected EHT action codes */ +enum ieee80211_protected_eht_actioncode { + WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0, + WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1, + WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2, +}; + /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, @@ -4237,6 +4476,36 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, } /** + * ieee80211_is_protected_dual_of_public_action - check if skb contains a + * protected dual of public action management frame + * @skb: the skb containing the frame, length will be checked + * + * Return: true if the skb contains a protected dual of public action + * management frame, false otherwise. + */ +static inline bool +ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) +{ + u8 action; + + if (!ieee80211_is_public_action((void *)skb->data, skb->len) || + skb->len < IEEE80211_MIN_ACTION_SIZE + 1) + return false; + + action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); + + return action != WLAN_PUB_ACTION_20_40_BSS_COEX && + action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && + action != WLAN_PUB_ACTION_MSMT_PILOT && + action != WLAN_PUB_ACTION_TDLS_DISCOVER_RES && + action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && + action != WLAN_PUB_ACTION_FTM_REQUEST && + action != WLAN_PUB_ACTION_FTM_RESPONSE && + action != WLAN_PUB_ACTION_FILS_DISCOVERY && + action != WLAN_PUB_ACTION_VENDOR_SPECIFIC; +} + +/** * _ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame * @hdr: the frame @@ -4307,12 +4576,11 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, /** * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet) * @skb: the skb containing the frame, length will not be checked - * @hdr_size: the size of the ieee80211_hdr that starts at skb->data * * This function assumes the frame is a data frame, and that the network header * is in the correct place. */ -static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size) +static inline int ieee80211_get_tdls_action(struct sk_buff *skb) { if (!skb_is_nonlinear(skb) && skb->len > (skb_network_offset(skb) + 2)) { @@ -4478,7 +4746,7 @@ static inline bool for_each_element_completed(const struct element *element, return (const u8 *)element == (const u8 *)data + datalen; } -/** +/* * RSNX Capabilities: * bits 0-3: Field length (n-1) */ @@ -4630,6 +4898,10 @@ struct ieee80211_multi_link_elem { #define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f #define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_NO_SUPP 0 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_RESERVED 2 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3 #define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 #define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 @@ -4693,18 +4965,43 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) } /** + * ieee80211_mle_get_link_id - returns the link ID + * @data: the basic multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the BSS link ID can't be found, -1 will be returned + */ +static inline int ieee80211_mle_get_link_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* common points now at the beginning of ieee80211_mle_basic_common_info */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID)) + return -1; + + return *common; +} + +/** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count - * @mle: the basic multi link element + * @data: pointer to the basic multi link element * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). * * If the BSS parameter change count value can't be found (the presence bit - * for it is clear), 0 will be returned. + * for it is clear), -1 will be returned. */ -static inline u8 -ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) +static inline int +ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) { + const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; @@ -4712,7 +5009,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)) - return 0; + return -1; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; @@ -4721,7 +5018,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) } /** - * ieee80211_mle_get_eml_sync_delay - returns the medium sync delay + * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay * @data: pointer to the multi link EHT IE * * The element is assumed to be of the correct type (BASIC) and big enough, @@ -4782,6 +5079,81 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) } /** + * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. + * @data: pointer to the multi link EHT IE + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD capabilities and operations field is not present, 0 will be + * returned. + */ +static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + + return get_unaligned_le16(common); +} + +/** + * ieee80211_mle_get_mld_id - returns the MLD ID + * @data: pointer to the multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD ID is not present, 0 will be returned. + */ +static inline u8 ieee80211_mle_get_mld_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + + return *common; +} + +/** * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element @@ -5006,6 +5378,39 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, fixed + prof->sta_info_len - 1 <= len; } +static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) +{ + const struct ieee80211_ttlm_elem *t2l = (const void *)data; + u8 control, fixed = sizeof(*t2l), elem_len = 0; + + if (len < fixed) + return false; + + control = t2l->control; + + if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) + elem_len += 2; + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) + elem_len += 3; + + if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) { + u8 bm_size; + + elem_len += 1; + if (len < fixed + elem_len) + return false; + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + bm_size = 1; + else + bm_size = 2; + + elem_len += hweight8(t2l->optional[0]) * bm_size; + } + + return len >= fixed + elem_len; +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 1ed52441972f..10a1e81434cb 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -53,6 +53,10 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_NONE: case ARPHRD_RAWIP: case ARPHRD_PIMREG: + /* PPP adds its l2 header automatically in ppp_start_xmit(). + * This makes it look like an l3 device to __bpf_redirect() and tcf_mirred_init(). + */ + case ARPHRD_PPP: return false; default: return true; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 8de6b6e67829..cdc684e04a2f 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -162,8 +162,8 @@ struct team_option { bool per_port; unsigned int array_size; /* != 0 means the option is array */ enum team_option_type type; - int (*init)(struct team *team, struct team_option_inst_info *info); - int (*getter)(struct team *team, struct team_gsetter_ctx *ctx); + void (*init)(struct team *team, struct team_option_inst_info *info); + void (*getter)(struct team *team, struct team_gsetter_ctx *ctx); int (*setter)(struct team *team, struct team_gsetter_ctx *ctx); }; @@ -189,6 +189,8 @@ struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; + const struct header_ops *header_ops_cache; + struct mutex lock; /* used for overall locking, e.g. port lists write */ /* diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 2a7660843444..043d442994b0 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -27,44 +27,54 @@ struct tun_xdp_hdr { #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); + static inline bool tun_is_xdp_frame(void *ptr) { - return (unsigned long)ptr & TUN_XDP_FLAG; + return (unsigned long)ptr & TUN_XDP_FLAG; } + static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { - return (void *)((unsigned long)xdp | TUN_XDP_FLAG); + return (void *)((unsigned long)xdp | TUN_XDP_FLAG); } + static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { - return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); + return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); } + void tun_ptr_free(void *ptr); #else #include <linux/err.h> #include <linux/errno.h> struct file; struct socket; + static inline struct socket *tun_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } + static inline struct ptr_ring *tun_get_tx_ring(struct file *f) { return ERR_PTR(-EINVAL); } + static inline bool tun_is_xdp_frame(void *ptr) { return false; } + static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { return NULL; } + static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { return NULL; } + static inline void tun_ptr_free(void *ptr) { } diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 6ba71957851e..c1645c86eed9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -408,7 +408,7 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, * @mac_len: MAC header length including outer vlan headers * * Inserts the VLAN tag into @skb as part of the payload at offset mac_len - * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. @@ -437,7 +437,7 @@ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. @@ -457,7 +457,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * Returns a VLAN tagged skb. This might change skb->head. * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. @@ -540,7 +540,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb); if (!eth_type_vlan(veth->h_vlan_proto)) - return -EINVAL; + return -ENODATA; *vlan_tci = ntohs(veth->h_vlan_TCI); return 0; @@ -561,7 +561,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, return 0; } else { *vlan_tci = 0; - return -EINVAL; + return -ENODATA; } } diff --git a/include/linux/igmp.h b/include/linux/igmp.h index ebf4349a53af..5171231f70a8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -39,7 +39,7 @@ struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - __be32 sl_addr[]; + __be32 sl_addr[] __counted_by(sl_max); }; #define IP_SFBLOCK 10 /* allocate this many at once */ diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714..719cf9cc6e1a 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include <linux/iio/iio.h> + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; diff --git a/include/linux/iio/adc/adi-axi-adc.h b/include/linux/iio/adc/adi-axi-adc.h deleted file mode 100644 index 52620e5b8052..000000000000 --- a/include/linux/iio/adc/adi-axi-adc.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Analog Devices Generic AXI ADC IP core driver/library - * Link: https://wiki.analog.com/resources/fpga/docs/axi_adc_ip - * - * Copyright 2012-2020 Analog Devices Inc. - */ -#ifndef __ADI_AXI_ADC_H__ -#define __ADI_AXI_ADC_H__ - -struct device; -struct iio_chan_spec; - -/** - * struct adi_axi_adc_chip_info - Chip specific information - * @name Chip name - * @id Chip ID (usually product ID) - * @channels Channel specifications of type @struct iio_chan_spec - * @num_channels Number of @channels - * @scale_table Supported scales by the chip; tuples of 2 ints - * @num_scales Number of scales in the table - * @max_rate Maximum sampling rate supported by the device - */ -struct adi_axi_adc_chip_info { - const char *name; - unsigned int id; - - const struct iio_chan_spec *channels; - unsigned int num_channels; - - const unsigned int (*scale_table)[2]; - int num_scales; - - unsigned long max_rate; -}; - -/** - * struct adi_axi_adc_conv - data of the ADC attached to the AXI ADC - * @chip_info chip info details for the client ADC - * @preenable_setup op to run in the client before enabling the AXI ADC - * @reg_access IIO debugfs_reg_access hook for the client ADC - * @read_raw IIO read_raw hook for the client ADC - * @write_raw IIO write_raw hook for the client ADC - */ -struct adi_axi_adc_conv { - const struct adi_axi_adc_chip_info *chip_info; - - int (*preenable_setup)(struct adi_axi_adc_conv *conv); - int (*reg_access)(struct adi_axi_adc_conv *conv, unsigned int reg, - unsigned int writeval, unsigned int *readval); - int (*read_raw)(struct adi_axi_adc_conv *conv, - struct iio_chan_spec const *chan, - int *val, int *val2, long mask); - int (*write_raw)(struct adi_axi_adc_conv *conv, - struct iio_chan_spec const *chan, - int val, int val2, long mask); -}; - -struct adi_axi_adc_conv *devm_adi_axi_adc_conv_register(struct device *dev, - size_t sizeof_priv); - -void *adi_axi_adc_conv_priv(struct adi_axi_adc_conv *conv); - -#endif diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h new file mode 100644 index 000000000000..a6d79381866e --- /dev/null +++ b/include/linux/iio/backend.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _IIO_BACKEND_H_ +#define _IIO_BACKEND_H_ + +#include <linux/types.h> + +struct fwnode_handle; +struct iio_backend; +struct device; +struct iio_dev; + +enum iio_backend_data_type { + IIO_BACKEND_TWOS_COMPLEMENT, + IIO_BACKEND_OFFSET_BINARY, + IIO_BACKEND_DATA_TYPE_MAX +}; + +/** + * struct iio_backend_data_fmt - Backend data format + * @type: Data type. + * @sign_extend: Bool to tell if the data is sign extended. + * @enable: Enable/Disable the data format module. If disabled, + * not formatting will happen. + */ +struct iio_backend_data_fmt { + enum iio_backend_data_type type; + bool sign_extend; + bool enable; +}; + +/** + * struct iio_backend_ops - operations structure for an iio_backend + * @enable: Enable backend. + * @disable: Disable backend. + * @chan_enable: Enable one channel. + * @chan_disable: Disable one channel. + * @data_format_set: Configure the data format for a specific channel. + * @request_buffer: Request an IIO buffer. + * @free_buffer: Free an IIO buffer. + **/ +struct iio_backend_ops { + int (*enable)(struct iio_backend *back); + void (*disable)(struct iio_backend *back); + int (*chan_enable)(struct iio_backend *back, unsigned int chan); + int (*chan_disable)(struct iio_backend *back, unsigned int chan); + int (*data_format_set)(struct iio_backend *back, unsigned int chan, + const struct iio_backend_data_fmt *data); + struct iio_buffer *(*request_buffer)(struct iio_backend *back, + struct iio_dev *indio_dev); + void (*free_buffer)(struct iio_backend *back, + struct iio_buffer *buffer); +}; + +int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan); +int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan); +int devm_iio_backend_enable(struct device *dev, struct iio_backend *back); +int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan, + const struct iio_backend_data_fmt *data); +int devm_iio_backend_request_buffer(struct device *dev, + struct iio_backend *back, + struct iio_dev *indio_dev); + +void *iio_backend_get_priv(const struct iio_backend *conv); +struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name); +struct iio_backend * +__devm_iio_backend_get_from_fwnode_lookup(struct device *dev, + struct fwnode_handle *fwnode); + +int devm_iio_backend_register(struct device *dev, + const struct iio_backend_ops *ops, void *priv); + +#endif diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 6564bdcdac66..18d3702fa95d 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -19,14 +19,12 @@ struct device; /** * enum iio_block_state - State of a struct iio_dma_buffer_block - * @IIO_BLOCK_STATE_DEQUEUED: Block is not queued * @IIO_BLOCK_STATE_QUEUED: Block is on the incoming queue * @IIO_BLOCK_STATE_ACTIVE: Block is currently being processed by the DMA * @IIO_BLOCK_STATE_DONE: Block is on the outgoing queue * @IIO_BLOCK_STATE_DEAD: Block has been marked as to be freed */ enum iio_block_state { - IIO_BLOCK_STATE_DEQUEUED, IIO_BLOCK_STATE_QUEUED, IIO_BLOCK_STATE_ACTIVE, IIO_BLOCK_STATE_DONE, @@ -73,12 +71,15 @@ struct iio_dma_buffer_block { * @active_block: Block being used in read() * @pos: Read offset in the active block * @block_size: Size of each block + * @next_dequeue: index of next block that will be dequeued */ struct iio_dma_buffer_queue_fileio { struct iio_dma_buffer_block *blocks[2]; struct iio_dma_buffer_block *active_block; size_t pos; size_t block_size; + + unsigned int next_dequeue; }; /** @@ -93,7 +94,6 @@ struct iio_dma_buffer_queue_fileio { * list and typically also a list of active blocks in the part that handles * the DMA controller * @incoming: List of buffers on the incoming queue - * @outgoing: List of buffers on the outgoing queue * @active: Whether the buffer is currently active * @fileio: FileIO state */ @@ -105,7 +105,6 @@ struct iio_dma_buffer_queue { struct mutex lock; spinlock_t list_lock; struct list_head incoming; - struct list_head outgoing; bool active; diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 5c355be89814..cbb8ba957fad 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -10,6 +10,9 @@ struct iio_dev; struct device; +struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev, + const char *channel); +void iio_dmaengine_buffer_free(struct iio_buffer *buffer); int devm_iio_dmaengine_buffer_setup(struct device *dev, struct iio_dev *indio_dev, const char *channel); diff --git a/include/linux/iio/common/inv_sensors_timestamp.h b/include/linux/iio/common/inv_sensors_timestamp.h new file mode 100644 index 000000000000..a47d304d1ba7 --- /dev/null +++ b/include/linux/iio/common/inv_sensors_timestamp.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Invensense, Inc. + */ + +#ifndef INV_SENSORS_TIMESTAMP_H_ +#define INV_SENSORS_TIMESTAMP_H_ + +/** + * struct inv_sensors_timestamp_chip - chip internal properties + * @clock_period: internal clock period in ns + * @jitter: acceptable jitter in per-mille + * @init_period: chip initial period at reset in ns + */ +struct inv_sensors_timestamp_chip { + uint32_t clock_period; + uint32_t jitter; + uint32_t init_period; +}; + +/** + * struct inv_sensors_timestamp_interval - timestamps interval + * @lo: interval lower bound + * @up: interval upper bound + */ +struct inv_sensors_timestamp_interval { + int64_t lo; + int64_t up; +}; + +/** + * struct inv_sensors_timestamp_acc - accumulator for computing an estimation + * @val: current estimation of the value, the mean of all values + * @idx: current index of the next free place in values table + * @values: table of all measured values, use for computing the mean + */ +struct inv_sensors_timestamp_acc { + uint32_t val; + size_t idx; + uint32_t values[32]; +}; + +/** + * struct inv_sensors_timestamp - timestamp management states + * @chip: chip internal characteristics + * @min_period: minimal acceptable clock period + * @max_period: maximal acceptable clock period + * @it: interrupts interval timestamps + * @timestamp: store last timestamp for computing next data timestamp + * @mult: current internal period multiplier + * @new_mult: new set internal period multiplier (not yet effective) + * @period: measured current period of the sensor + * @chip_period: accumulator for computing internal chip period + */ +struct inv_sensors_timestamp { + struct inv_sensors_timestamp_chip chip; + uint32_t min_period; + uint32_t max_period; + struct inv_sensors_timestamp_interval it; + int64_t timestamp; + uint32_t mult; + uint32_t new_mult; + uint32_t period; + struct inv_sensors_timestamp_acc chip_period; +}; + +void inv_sensors_timestamp_init(struct inv_sensors_timestamp *ts, + const struct inv_sensors_timestamp_chip *chip); + +int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts, + uint32_t period, bool fifo); + +void inv_sensors_timestamp_interrupt(struct inv_sensors_timestamp *ts, + uint32_t fifo_period, size_t fifo_nb, + size_t sensor_nb, int64_t timestamp); + +static inline int64_t inv_sensors_timestamp_pop(struct inv_sensors_timestamp *ts) +{ + ts->timestamp += ts->period; + return ts->timestamp; +} + +void inv_sensors_timestamp_apply_odr(struct inv_sensors_timestamp *ts, + uint32_t fifo_period, size_t fifo_nb, + unsigned int fifo_no); + +static inline void inv_sensors_timestamp_reset(struct inv_sensors_timestamp *ts) +{ + const struct inv_sensors_timestamp_interval interval_init = {0LL, 0LL}; + + ts->it = interval_init; + ts->timestamp = 0; +} + +#endif diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 607c3a89a647..f9ae5cdd884f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -258,9 +258,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 6802596b017c..e9910b41d48e 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -201,8 +201,9 @@ struct iio_dev * @chan: The channel being queried. * @val: Value read back. * - * Note raw reads from iio channels are in adc counts and hence - * scale will need to be applied if standard units required. + * Note, if standard units are required, raw reads from iio channels + * need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. */ int iio_read_channel_raw(struct iio_channel *chan, int *val); @@ -212,8 +213,9 @@ int iio_read_channel_raw(struct iio_channel *chan, * @chan: The channel being queried. * @val: Value read back. * - * Note raw reads from iio channels are in adc counts and hence - * scale will need to be applied if standard units required. + * Note, if standard units are required, raw reads from iio channels + * need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. * * In opposit to the normal iio_read_channel_raw this function * returns the average of multiple reads. @@ -281,8 +283,9 @@ int iio_read_channel_attribute(struct iio_channel *chan, int *val, * @chan: The channel being queried. * @val: Value being written. * - * Note raw writes to iio channels are in dac counts and hence - * scale will need to be applied if standard units required. + * Note that for raw writes to iio channels, if the value provided is + * in standard units, the affect of the scale and offset must be removed + * as (value / scale) - offset. */ int iio_write_channel_raw(struct iio_channel *chan, int val); @@ -292,12 +295,25 @@ int iio_write_channel_raw(struct iio_channel *chan, int val); * @chan: The channel being queried. * @val: Value read back. * - * Note raw reads from iio channels are in adc counts and hence - * scale will need to be applied if standard units are required. + * Note, if standard units are required, raw reads from iio channels + * need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. */ int iio_read_max_channel_raw(struct iio_channel *chan, int *val); /** + * iio_read_min_channel_raw() - read minimum available raw value from a given + * channel, i.e. the minimum possible value. + * @chan: The channel being queried. + * @val: Value read back. + * + * Note, if standard units are required, raw reads from iio channels + * need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. + */ +int iio_read_min_channel_raw(struct iio_channel *chan, int *val); + +/** * iio_read_avail_channel_raw() - read available raw values from a given channel * @chan: The channel being queried. * @vals: Available values read back. @@ -308,8 +324,9 @@ int iio_read_max_channel_raw(struct iio_channel *chan, int *val); * For ranges, three vals are always returned; min, step and max. * For lists, all the possible values are enumerated. * - * Note raw available values from iio channels are in adc counts and - * hence scale will need to be applied if standard units are required. + * Note, if standard units are required, raw available values from iio + * channels need the offset (default 0) and scale (default 1) to be applied + * as (raw + offset) * scale. */ int iio_read_avail_channel_raw(struct iio_channel *chan, const int **vals, int *length); diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 202e55b0a28b..e370a7bb3300 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/cdev.h> +#include <linux/cleanup.h> #include <linux/slab.h> #include <linux/iio/types.h> /* IIO TODO LIST */ @@ -427,18 +428,14 @@ struct iio_trigger; /* forward declaration */ * @write_event_config: set if the event is enabled. * @read_event_value: read a configuration value associated with the event. * @write_event_value: write a configuration value for the event. + * @read_event_label: function to request label name for a specified label, + * for better event identification. * @validate_trigger: function to validate the trigger when the * current trigger gets changed. * @update_scan_mode: function to configure device and scan buffer when * channels have changed * @debugfs_reg_access: function to read or write register value of device - * @of_xlate: function pointer to obtain channel specifier index. - * When #iio-cells is greater than '0', the driver could - * provide a custom of_xlate function that reads the - * *args* and returns the appropriate index in registered - * IIO channels array. * @fwnode_xlate: fwnode based function pointer to obtain channel specifier index. - * Functionally the same as @of_xlate. * @hwfifo_set_watermark: function pointer to set the current hardware * fifo watermark level; see hwfifo_* entries in * Documentation/ABI/testing/sysfs-bus-iio for details on @@ -511,6 +508,12 @@ struct iio_info { enum iio_event_direction dir, enum iio_event_info info, int val, int val2); + int (*read_event_label)(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + enum iio_event_type type, + enum iio_event_direction dir, + char *label); + int (*validate_trigger)(struct iio_dev *indio_dev, struct iio_trigger *trig); int (*update_scan_mode)(struct iio_dev *indio_dev, @@ -556,7 +559,9 @@ struct iio_buffer_setup_ops { * and owner * @buffer: [DRIVER] any buffer present * @scan_bytes: [INTERN] num bytes captured to be fed to buffer demux - * @available_scan_masks: [DRIVER] optional array of allowed bitmasks + * @available_scan_masks: [DRIVER] optional array of allowed bitmasks. Sort the + * array in order of preference, the most preferred + * masks first. * @masklength: [INTERN] the length of the mask established from * channels * @active_scan_mask: [INTERN] union of all scan masks requested by buffers @@ -634,10 +639,37 @@ int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); int iio_device_claim_direct_mode(struct iio_dev *indio_dev); void iio_device_release_direct_mode(struct iio_dev *indio_dev); + +/* + * This autocleanup logic is normally used via + * iio_device_claim_direct_scoped(). + */ +DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T), + iio_device_release_direct_mode(_T)) + +DEFINE_GUARD_COND(iio_claim_direct, _try, ({ + struct iio_dev *dev; + int d = iio_device_claim_direct_mode(_T); + + if (d < 0) + dev = NULL; + else + dev = _T; + dev; + })) + +/** + * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct. + * @fail: What to do on failure to claim device. + * @iio_dev: Pointer to the IIO devices structure + */ +#define iio_device_claim_direct_scoped(fail, iio_dev) \ + scoped_cond_guard(iio_claim_direct_try, fail, iio_dev) + int iio_device_claim_buffer_mode(struct iio_dev *indio_dev); void iio_device_release_buffer_mode(struct iio_dev *indio_dev); -extern struct bus_type iio_bus_type; +extern const struct bus_type iio_bus_type; /** * iio_device_put() - reference counted deallocation of struct device diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index dc9ea299e088..8898966bc0f0 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include <linux/spi/spi.h> #include <linux/interrupt.h> +#include <linux/iio/iio.h> #include <linux/iio/types.h> #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index eff1e6b2595c..0f7fe7b522e3 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -51,9 +51,6 @@ void iio_unregister_sw_device_type(struct iio_sw_device_type *dt); struct iio_sw_device *iio_sw_device_create(const char *, const char *); void iio_sw_device_destroy(struct iio_sw_device *); -int iio_sw_device_type_configfs_register(struct iio_sw_device_type *dt); -void iio_sw_device_type_configfs_unregister(struct iio_sw_device_type *dt); - static inline void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h index 47de2443e984..bc77f88df303 100644 --- a/include/linux/iio/sw_trigger.h +++ b/include/linux/iio/sw_trigger.h @@ -51,9 +51,6 @@ void iio_unregister_sw_trigger_type(struct iio_sw_trigger_type *tt); struct iio_sw_trigger *iio_sw_trigger_create(const char *, const char *); void iio_sw_trigger_destroy(struct iio_sw_trigger *); -int iio_sw_trigger_type_configfs_register(struct iio_sw_trigger_type *tt); -void iio_sw_trigger_type_configfs_unregister(struct iio_sw_trigger_type *tt); - static inline void iio_swt_group_init_type_name(struct iio_sw_trigger *t, const char *name, diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 82faa98c719a..d89982c98368 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -19,6 +19,8 @@ enum iio_event_info { IIO_EV_INFO_TIMEOUT, IIO_EV_INFO_RESET_TIMEOUT, IIO_EV_INFO_TAP2_MIN_DELAY, + IIO_EV_INFO_RUNNING_PERIOD, + IIO_EV_INFO_RUNNING_COUNT, }; #define IIO_VAL_INT 1 @@ -66,6 +68,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_THERMOCOUPLE_TYPE, IIO_CHAN_INFO_CALIBAMBIENT, IIO_CHAN_INFO_ZEROPOINT, + IIO_CHAN_INFO_TROUGH, }; #endif /* _IIO_TYPES_H_ */ diff --git a/include/linux/ima.h b/include/linux/ima.h index 86b57757c7b1..0bae61a15b60 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -16,23 +16,6 @@ struct linux_binprm; #ifdef CONFIG_IMA extern enum hash_algo ima_get_current_hash_algo(void); -extern int ima_bprm_check(struct linux_binprm *bprm); -extern int ima_file_check(struct file *file, int mask); -extern void ima_post_create_tmpfile(struct mnt_idmap *idmap, - struct inode *inode); -extern void ima_file_free(struct file *file); -extern int ima_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); -extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot); -extern int ima_load_data(enum kernel_load_data_id id, bool contents); -extern int ima_post_load_data(char *buf, loff_t size, - enum kernel_load_data_id id, char *description); -extern int ima_read_file(struct file *file, enum kernel_read_file_id id, - bool contents); -extern int ima_post_read_file(struct file *file, void *buf, loff_t size, - enum kernel_read_file_id id); -extern void ima_post_path_mknod(struct mnt_idmap *idmap, - struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); @@ -57,68 +40,6 @@ static inline enum hash_algo ima_get_current_hash_algo(void) return HASH_ALGO__LAST; } -static inline int ima_bprm_check(struct linux_binprm *bprm) -{ - return 0; -} - -static inline int ima_file_check(struct file *file, int mask) -{ - return 0; -} - -static inline void ima_post_create_tmpfile(struct mnt_idmap *idmap, - struct inode *inode) -{ -} - -static inline void ima_file_free(struct file *file) -{ - return; -} - -static inline int ima_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) -{ - return 0; -} - -static inline int ima_file_mprotect(struct vm_area_struct *vma, - unsigned long prot) -{ - return 0; -} - -static inline int ima_load_data(enum kernel_load_data_id id, bool contents) -{ - return 0; -} - -static inline int ima_post_load_data(char *buf, loff_t size, - enum kernel_load_data_id id, - char *description) -{ - return 0; -} - -static inline int ima_read_file(struct file *file, enum kernel_read_file_id id, - bool contents) -{ - return 0; -} - -static inline int ima_post_read_file(struct file *file, void *buf, loff_t size, - enum kernel_read_file_id id) -{ - return 0; -} - -static inline void ima_post_path_mknod(struct mnt_idmap *idmap, - struct dentry *dentry) -{ - return; -} - static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size) { return -EOPNOTSUPP; @@ -169,76 +90,13 @@ static inline void ima_add_kexec_buffer(struct kimage *image) {} #endif -#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS -extern void ima_post_key_create_or_update(struct key *keyring, - struct key *key, - const void *payload, size_t plen, - unsigned long flags, bool create); -#else -static inline void ima_post_key_create_or_update(struct key *keyring, - struct key *key, - const void *payload, - size_t plen, - unsigned long flags, - bool create) {} -#endif /* CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS */ - #ifdef CONFIG_IMA_APPRAISE extern bool is_ima_appraise_enabled(void); -extern void ima_inode_post_setattr(struct mnt_idmap *idmap, - struct dentry *dentry); -extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, - const void *xattr_value, size_t xattr_value_len); -extern int ima_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl); -static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return ima_inode_set_acl(idmap, dentry, acl_name, NULL); -} -extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name); #else static inline bool is_ima_appraise_enabled(void) { return 0; } - -static inline void ima_inode_post_setattr(struct mnt_idmap *idmap, - struct dentry *dentry) -{ - return; -} - -static inline int ima_inode_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len) -{ - return 0; -} - -static inline int ima_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl) -{ - - return 0; -} - -static inline int ima_inode_removexattr(struct dentry *dentry, - const char *xattr_name) -{ - return 0; -} - -static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return 0; -} #endif /* CONFIG_IMA_APPRAISE */ #if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index c1c76a70a6ce..35227d47cfc9 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -2,7 +2,7 @@ #ifndef _LINUX_INDIRECT_CALL_WRAPPER_H #define _LINUX_INDIRECT_CALL_WRAPPER_H -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* * INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin @@ -11,7 +11,7 @@ * @__VA_ARGS__: arguments for @f * * Avoid retpoline overhead for known builtin, checking @f vs each of them and - * eventually invoking directly the builtin function. The functions are check + * eventually invoking directly the builtin function. The functions are checked * in the given order. Fallback to the indirect call. */ #define INDIRECT_CALL_1(f, f1, ...) \ diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 84abb30a3fbb..a9033696b0aa 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -8,6 +8,7 @@ struct inet_hashinfo; struct inet_diag_handler { + struct module *owner; void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ddb27fc0ee8c..cb5280e6cc21 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -53,13 +53,15 @@ struct in_device { }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1]) +#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr)) #define IPV4_DEVCONF_ALL(net, attr) \ IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr) +#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr)) -static inline int ipv4_devconf_get(struct in_device *in_dev, int index) +static inline int ipv4_devconf_get(const struct in_device *in_dev, int index) { index--; - return in_dev->cnf.data[index]; + return READ_ONCE(in_dev->cnf.data[index]); } static inline void ipv4_devconf_set(struct in_device *in_dev, int index, @@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, { index--; set_bit(index, in_dev->cnf.state); - in_dev->cnf.data[index] = val; + WRITE_ONCE(in_dev->cnf.data[index], val); } static inline void ipv4_devconf_setall(struct in_device *in_dev) @@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_NET_ORCONF(in_dev, net, attr) \ - (IPV4_DEVCONF_ALL(net, attr) || \ + (IPV4_DEVCONF_ALL_RO(net, attr) || \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr) #define IN_DEV_MAXCONF(in_dev, attr) \ - (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \ + (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \ IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) diff --git a/include/linux/init.h b/include/linux/init.h index 266c3e1640d4..58cef4c2e59a 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -89,9 +89,6 @@ __latent_entropy #define __meminitdata __section(".meminit.data") #define __meminitconst __section(".meminit.rodata") -#define __memexit __section(".memexit.text") __exitused __cold notrace -#define __memexitdata __section(".memexit.data") -#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" @@ -171,17 +168,20 @@ extern initcall_entry_t __initcall_end[]; extern struct file_system_type rootfs_fs_type; -#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) extern bool rodata_enabled; -#endif -#ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void); -#endif extern void (*late_time_init)(void); extern bool initcall_debug; +#ifdef MODULE +extern struct module __this_module; +#define THIS_MODULE (&__this_module) +#else +#define THIS_MODULE ((struct module *)0) +#endif + #endif #ifndef MODULE diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 40fc5813cf93..bccb3f1f6262 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -37,13 +37,6 @@ extern struct cred init_cred; #define INIT_TASK_COMM "swapper" -/* Attach to the init_task data structure for proper alignment */ -#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK -#define __init_task_data __section(".data..init_task") -#else -#define __init_task_data /**/ -#endif - /* Attach to the thread_info data structure for proper alignment */ #define __init_thread_info __section(".data..init_thread_info") diff --git a/include/linux/input.h b/include/linux/input.h index c22ac465254b..89a0be6ee0e2 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -275,7 +275,8 @@ struct input_handle; * it may not sleep * @events: event sequence handler. This method is being called by * input core with interrupts disabled and dev->event_lock - * spinlock held and so it may not sleep + * spinlock held and so it may not sleep. The method must return + * number of events passed to it. * @filter: similar to @event; separates normal event handlers from * "filters". * @match: called after comparing device's id with handler's id_table @@ -312,8 +313,8 @@ struct input_handler { void *private; void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value); - void (*events)(struct input_handle *handle, - const struct input_value *vals, unsigned int count); + unsigned int (*events)(struct input_handle *handle, + struct input_value *vals, unsigned int count); bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value); bool (*match)(struct input_handler *handler, struct input_dev *dev); int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id); diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h index cda1f706eaeb..aa0b3ffea935 100644 --- a/include/linux/instruction_pointer.h +++ b/include/linux/instruction_pointer.h @@ -2,7 +2,12 @@ #ifndef _LINUX_INSTRUCTION_POINTER_H #define _LINUX_INSTRUCTION_POINTER_H +#include <asm/linkage.h> + #define _RET_IP_ (unsigned long)__builtin_return_address(0) + +#ifndef _THIS_IP_ #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif #endif /* _LINUX_INSTRUCTION_POINTER_H */ diff --git a/include/linux/int_log.h b/include/linux/int_log.h new file mode 100644 index 000000000000..0a6f58c38b61 --- /dev/null +++ b/include/linux/int_log.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Provides fixed-point logarithm operations. + * + * Copyright (C) 2006 Christoph Pfister (christophpfister@gmail.com) + */ + +#ifndef __LINUX_INT_LOG_H +#define __LINUX_INT_LOG_H + +#include <linux/types.h> + +/** + * intlog2 - computes log2 of a value; the result is shifted left by 24 bits + * + * @value: The value (must be != 0) + * + * to use rational values you can use the following method: + * + * intlog2(value) = intlog2(value * 2^x) - x * 2^24 + * + * Some usecase examples: + * + * intlog2(8) will give 3 << 24 = 3 * 2^24 + * + * intlog2(9) will give 3 << 24 + ... = 3.16... * 2^24 + * + * intlog2(1.5) = intlog2(3) - 2^24 = 0.584... * 2^24 + * + * + * return: log2(value) * 2^24 + */ +extern unsigned int intlog2(u32 value); + +/** + * intlog10 - computes log10 of a value; the result is shifted left by 24 bits + * + * @value: The value (must be != 0) + * + * to use rational values you can use the following method: + * + * intlog10(value) = intlog10(value * 10^x) - x * 2^24 + * + * An usecase example: + * + * intlog10(1000) will give 3 << 24 = 3 * 2^24 + * + * due to the implementation intlog10(1000) might be not exactly 3 * 2^24 + * + * look at intlog2 for similar examples + * + * return: log10(value) * 2^24 + */ +extern unsigned int intlog10(u32 value); + +#endif diff --git a/include/linux/integrity.h b/include/linux/integrity.h index 2ea0f2f65ab6..459b79683783 100644 --- a/include/linux/integrity.h +++ b/include/linux/integrity.h @@ -19,40 +19,13 @@ enum integrity_status { INTEGRITY_UNKNOWN, }; -/* List of EVM protected security xattrs */ #ifdef CONFIG_INTEGRITY -extern struct integrity_iint_cache *integrity_inode_get(struct inode *inode); -extern void integrity_inode_free(struct inode *inode); extern void __init integrity_load_keys(void); #else -static inline struct integrity_iint_cache * - integrity_inode_get(struct inode *inode) -{ - return NULL; -} - -static inline void integrity_inode_free(struct inode *inode) -{ - return; -} - static inline void integrity_load_keys(void) { } #endif /* CONFIG_INTEGRITY */ -#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS - -extern int integrity_kernel_module_request(char *kmod_name); - -#else - -static inline int integrity_kernel_module_request(char *kmod_name) -{ - return 0; -} - -#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */ - #endif /* _LINUX_INTEGRITY_H */ diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index f45f13304add..771622650247 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -94,6 +94,9 @@ int ishtp_cl_link(struct ishtp_cl *cl); void ishtp_cl_unlink(struct ishtp_cl *cl); int ishtp_cl_disconnect(struct ishtp_cl *cl); int ishtp_cl_connect(struct ishtp_cl *cl); +int ishtp_cl_establish_connection(struct ishtp_cl *cl, const guid_t *uuid, + int tx_size, int rx_size, bool reset); +void ishtp_cl_destroy_connection(struct ishtp_cl *cl, bool reset); int ishtp_cl_send(struct ishtp_cl *cl, uint8_t *buf, size_t length); int ishtp_cl_flush_queues(struct ishtp_cl *cl); int ishtp_cl_io_rb_recycle(struct ishtp_cl_rb *rb); diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 33f21bd85dbf..f3196f82fd8a 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -178,6 +178,12 @@ struct rapl_package { struct rapl_if_priv *priv; }; +struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu); +struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu); +void rapl_remove_package_cpuslocked(struct rapl_package *rp); + struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu); struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu); void rapl_remove_package(struct rapl_package *rp); diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h index f422612c28d6..8ff8eabb4a98 100644 --- a/include/linux/intel_tcc.h +++ b/include/linux/intel_tcc.h @@ -13,6 +13,6 @@ int intel_tcc_get_tjmax(int cpu); int intel_tcc_get_offset(int cpu); int intel_tcc_set_offset(int cpu, int offset); -int intel_tcc_get_temp(int cpu, bool pkg); +int intel_tcc_get_temp(int cpu, int *temp, bool pkg); #endif /* __INTEL_TCC_H__ */ diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index f505788c05da..a3529b962be6 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -6,6 +6,25 @@ #ifndef _INTEL_TPMI_H_ #define _INTEL_TPMI_H_ +#include <linux/bitfield.h> + +#define TPMI_VERSION_INVALID 0xff +#define TPMI_MINOR_VERSION(val) FIELD_GET(GENMASK(4, 0), val) +#define TPMI_MAJOR_VERSION(val) FIELD_GET(GENMASK(7, 5), val) + +/* + * List of supported TMPI IDs. + * Some TMPI IDs are not used by Linux, so the numbers are not consecutive. + */ +enum intel_tpmi_id { + TPMI_ID_RAPL = 0, /* Running Average Power Limit */ + TPMI_ID_PEM = 1, /* Power and Perf excursion Monitor */ + TPMI_ID_UNCORE = 2, /* Uncore Frequency Scaling */ + TPMI_ID_SST = 5, /* Speed Select Technology */ + TPMI_CONTROL_ID = 0x80, /* Special ID for getting feature status */ + TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */ +}; + /** * struct intel_tpmi_plat_info - Platform information for a TPMI device instance * @package_id: CPU Package id @@ -26,5 +45,6 @@ struct intel_tpmi_plat_info { struct intel_tpmi_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev); struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index); int tpmi_get_resource_count(struct auxiliary_device *auxdev); - +int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, bool *read_blocked, + bool *write_blocked); #endif diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index e6d8aca6886d..f5aef8784692 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -33,10 +33,10 @@ struct icc_node_data { */ struct icc_onecell_data { unsigned int num_nodes; - struct icc_node *nodes[]; + struct icc_node *nodes[] __counted_by(num_nodes); }; -struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, +struct icc_node *of_icc_xlate_onecell(const struct of_phandle_args *spec, void *data); /** @@ -65,8 +65,9 @@ struct icc_provider { u32 peak_bw, u32 *agg_avg, u32 *agg_peak); void (*pre_aggregate)(struct icc_node *node); int (*get_bw)(struct icc_node *node, u32 *avg, u32 *peak); - struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data); - struct icc_node_data* (*xlate_extended)(struct of_phandle_args *spec, void *data); + struct icc_node* (*xlate)(const struct of_phandle_args *spec, void *data); + struct icc_node_data* (*xlate_extended)(const struct of_phandle_args *spec, + void *data); struct device *dev; int users; bool inter_set; @@ -124,7 +125,7 @@ int icc_nodes_remove(struct icc_provider *provider); void icc_provider_init(struct icc_provider *provider); int icc_provider_register(struct icc_provider *provider); void icc_provider_deregister(struct icc_provider *provider); -struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec); +struct icc_node_data *of_icc_get_from_provider(const struct of_phandle_args *spec); void icc_sync_state(struct device *dev); #else @@ -171,7 +172,7 @@ static inline int icc_provider_register(struct icc_provider *provider) static inline void icc_provider_deregister(struct icc_provider *provider) { } -static inline struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec) +static inline struct icc_node_data *of_icc_get_from_provider(const struct of_phandle_args *spec) { return ERR_PTR(-ENOTSUPP); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b..3a36e64119c8 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/bitops.h> +#include <linux/cleanup.h> #include <linux/cpumask.h> #include <linux/irqreturn.h> #include <linux/irqnr.h> @@ -67,6 +68,8 @@ * later. * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers, * depends on IRQF_PERCPU. + * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared + * interrupt. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -82,6 +85,7 @@ #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_NO_AUTOEN 0x00080000 #define IRQF_NO_DEBUG 0x00100000 +#define IRQF_COND_ONESHOT 0x00200000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) @@ -232,6 +236,9 @@ extern void enable_percpu_irq(unsigned int irq, unsigned int type); extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); +DEFINE_LOCK_GUARD_1(disable_irq, int, + disable_irq(*_T->lock), enable_irq(*_T->lock)) + extern void disable_nmi_nosync(unsigned int irq); extern void disable_percpu_nmi(unsigned int irq); extern void enable_nmi(unsigned int irq); @@ -566,11 +573,15 @@ enum * * _ RCU: * 1) rcutree_migrate_callbacks() migrates the queue. - * 2) rcu_report_dead() reports the final quiescent states. + * 2) rcutree_report_cpu_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + * + * _ (HR)TIMER_SOFTIRQ: (hr)timers_dead_cpu() migrates the queue */ -#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\ + BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ)) + /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 1b7a44b35616..86cf1f7ae389 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -100,6 +100,30 @@ struct io_pgtable_cfg { const struct iommu_flush_ops *tlb; struct device *iommu_dev; + /** + * @alloc: Custom page allocator. + * + * Optional hook used to allocate page tables. If this function is NULL, + * @free must be NULL too. + * + * Memory returned should be zeroed and suitable for dma_map_single() and + * virt_to_phys(). + * + * Not all formats support custom page allocators. Before considering + * passing a non-NULL value, make sure the chosen page format supports + * this feature. + */ + void *(*alloc)(void *cookie, size_t size, gfp_t gfp); + + /** + * @free: Custom page de-allocator. + * + * Optional hook used to free page tables allocated with the @alloc + * hook. Must be non-NULL if @alloc is not NULL, must be NULL + * otherwise. + */ + void (*free)(void *cookie, void *pages, size_t size); + /* Low-level data specific to the table format */ union { struct { @@ -166,6 +190,10 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty); }; /** @@ -238,15 +266,25 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, } /** + * enum io_pgtable_caps - IO page table backend capabilities. + */ +enum io_pgtable_caps { + /** @IO_PGTABLE_CAP_CUSTOM_ALLOCATOR: Backend accepts custom page table allocators. */ + IO_PGTABLE_CAP_CUSTOM_ALLOCATOR = BIT(0), +}; + +/** * struct io_pgtable_init_fns - Alloc/free a set of page tables for a * particular format. * * @alloc: Allocate a set of page tables described by cfg. * @free: Free the page tables associated with iop. + * @caps: Combination of @io_pgtable_caps flags encoding the backend capabilities. */ struct io_pgtable_init_fns { struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie); void (*free)(struct io_pgtable *iop); + u32 caps; }; extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns; diff --git a/include/linux/io.h b/include/linux/io.h index 7304f2a69960..235ba7d80a8f 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -23,12 +23,19 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot); +int vmap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot); #else static inline int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { return 0; } +static inline int vmap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) +{ + return 0; +} #endif /* diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index bb9c666bd584..68ed6697fece 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -6,63 +6,13 @@ #include <linux/xarray.h> #include <uapi/linux/io_uring.h> -enum io_uring_cmd_flags { - IO_URING_F_COMPLETE_DEFER = 1, - IO_URING_F_UNLOCKED = 2, - /* the request is executed from poll, it should not be freed */ - IO_URING_F_MULTISHOT = 4, - /* executed by io-wq */ - IO_URING_F_IOWQ = 8, - /* int's last bit, sign checks are usually faster than a bit test */ - IO_URING_F_NONBLOCK = INT_MIN, - - /* ctx state flags, for URING_CMD */ - IO_URING_F_SQE128 = (1 << 8), - IO_URING_F_CQE32 = (1 << 9), - IO_URING_F_IOPOLL = (1 << 10), -}; - -struct io_uring_cmd { - struct file *file; - const struct io_uring_sqe *sqe; - union { - /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); - /* used for polled completion */ - void *cookie; - }; - u32 cmd_op; - u32 flags; - u8 pdu[32]; /* available inline for free use */ -}; - -static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) -{ - return sqe->cmd; -} - #if defined(CONFIG_IO_URING) -int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd); -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, - unsigned issue_flags); -struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); const char *io_uring_get_opcode(u8 opcode); -void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned), - unsigned flags); -/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */ -void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)); - -static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)) -{ - __io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0); -} +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); +bool io_is_uring_fops(struct file *file); static inline void io_uring_files_cancel(void) { @@ -82,27 +32,6 @@ static inline void io_uring_free(struct task_struct *tsk) __io_uring_free(tsk); } #else -static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd) -{ - return -EOPNOTSUPP; -} -static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, - ssize_t ret2, unsigned issue_flags) -{ -} -static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)) -{ -} -static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)) -{ -} -static inline struct sock *io_uring_get_socket(struct file *file) -{ - return NULL; -} static inline void io_uring_task_cancel(void) { } @@ -116,6 +45,15 @@ static inline const char *io_uring_get_opcode(u8 opcode) { return ""; } +static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + return -EOPNOTSUPP; +} +static inline bool io_is_uring_fops(struct file *file) +{ + return false; +} #endif #endif diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h new file mode 100644 index 000000000000..e453a997c060 --- /dev/null +++ b/include/linux/io_uring/cmd.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IO_URING_CMD_H +#define _LINUX_IO_URING_CMD_H + +#include <uapi/linux/io_uring.h> +#include <linux/io_uring_types.h> + +/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ +#define IORING_URING_CMD_CANCELABLE (1U << 30) + +struct io_uring_cmd { + struct file *file; + const struct io_uring_sqe *sqe; + /* callback to defer completions to task context */ + void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); + u32 cmd_op; + u32 flags; + u8 pdu[32]; /* available inline for free use */ +}; + +static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) +{ + return sqe->cmd; +} + +#if defined(CONFIG_IO_URING) +int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + struct iov_iter *iter, void *ioucmd); +void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, + unsigned issue_flags); +void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned), + unsigned flags); + +void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags); + +#else +static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + struct iov_iter *iter, void *ioucmd) +{ + return -EOPNOTSUPP; +} +static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, + ssize_t ret2, unsigned issue_flags) +{ +} +static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned), + unsigned flags) +{ +} +static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ +} +#endif + +/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */ +static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) +{ + __io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE); +} + +static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) +{ + __io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0); +} + +static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd) +{ + return cmd_to_io_kiocb(cmd)->task; +} + +#endif /* _LINUX_IO_URING_CMD_H */ diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f04ce513fadb..ac333ea81d31 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -2,11 +2,43 @@ #define IO_URING_TYPES_H #include <linux/blkdev.h> +#include <linux/hashtable.h> #include <linux/task_work.h> #include <linux/bitmap.h> #include <linux/llist.h> #include <uapi/linux/io_uring.h> +enum { + /* + * A hint to not wake right away but delay until there are enough of + * tw's queued to match the number of CQEs the task is waiting for. + * + * Must not be used with requests generating more than one CQE. + * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set. + */ + IOU_F_TWQ_LAZY_WAKE = 1, +}; + +enum io_uring_cmd_flags { + IO_URING_F_COMPLETE_DEFER = 1, + IO_URING_F_UNLOCKED = 2, + /* the request is executed from poll, it should not be freed */ + IO_URING_F_MULTISHOT = 4, + /* executed by io-wq */ + IO_URING_F_IOWQ = 8, + /* int's last bit, sign checks are usually faster than a bit test */ + IO_URING_F_NONBLOCK = INT_MIN, + + /* ctx state flags, for URING_CMD */ + IO_URING_F_SQE128 = (1 << 8), + IO_URING_F_CQE32 = (1 << 9), + IO_URING_F_IOPOLL = (1 << 10), + + /* set when uring wants to cancel a previously issued command */ + IO_URING_F_CANCEL = (1 << 11), + IO_URING_F_COMPAT = (1 << 12), +}; + struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -69,8 +101,8 @@ struct io_uring_task { }; struct io_uring { - u32 head ____cacheline_aligned_in_smp; - u32 tail ____cacheline_aligned_in_smp; + u32 head; + u32 tail; }; /* @@ -176,7 +208,6 @@ struct io_submit_state { unsigned short submit_nr; unsigned int cqes_count; struct blk_plug plug; - struct io_uring_cqe cqes[16]; }; struct io_ev_fd { @@ -205,25 +236,19 @@ struct io_ring_ctx { unsigned int has_evfd: 1; /* all CQEs should be posted only by the submitter task */ unsigned int task_complete: 1; + unsigned int lockless_cq: 1; unsigned int syscall_iopoll: 1; unsigned int poll_activated: 1; unsigned int drain_disabled: 1; unsigned int compat: 1; + unsigned int iowq_limits_set : 1; - enum task_work_notify_mode notify_method; + struct task_struct *submitter_task; + struct io_rings *rings; + struct percpu_ref refs; - /* - * If IORING_SETUP_NO_MMAP is used, then the below holds - * the gup'ed pages for the two rings, and the sqes. - */ - unsigned short n_ring_pages; - unsigned short n_sqe_pages; - struct page **ring_pages; - struct page **sqe_pages; - - struct io_rings *rings; - struct task_struct *submitter_task; - struct percpu_ref refs; + enum task_work_notify_mode notify_method; + unsigned sq_thread_idle; } ____cacheline_aligned_in_smp; /* submission data */ @@ -252,40 +277,35 @@ struct io_ring_ctx { */ struct io_rsrc_node *rsrc_node; atomic_t cancel_seq; + + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + bool poll_multi_queue; + struct io_wq_work_list iopoll_list; + struct io_file_table file_table; + struct io_mapped_ubuf **user_bufs; unsigned nr_user_files; unsigned nr_user_bufs; - struct io_mapped_ubuf **user_bufs; struct io_submit_state submit_state; - struct io_buffer_list *io_bl; struct xarray io_bl_xa; - struct list_head io_buffers_cache; struct io_hash_table cancel_table_locked; - struct list_head cq_overflow_list; struct io_alloc_cache apoll_cache; struct io_alloc_cache netmsg_cache; - } ____cacheline_aligned_in_smp; - /* IRQ completion list, under ->completion_lock */ - struct io_wq_work_list locked_free_list; - unsigned int locked_free_nr; - - const struct cred *sq_creds; /* cred used for __io_sq_thread() */ - struct io_sq_data *sq_data; /* if using sq thread polling */ - - struct wait_queue_head sqo_sq_wait; - struct list_head sqd_list; - - unsigned long check_cq; - - unsigned int file_alloc_start; - unsigned int file_alloc_end; - - struct xarray personalities; - u32 pers_next; + /* + * Any cancelable uring_cmd is added to this list in + * ->uring_cmd() by io_uring_cmd_insert_cancelable() + */ + struct hlist_head cancelable_uring_cmd; + } ____cacheline_aligned_in_smp; struct { /* @@ -298,39 +318,62 @@ struct io_ring_ctx { unsigned cached_cq_tail; unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; - struct wait_queue_head cq_wait; unsigned cq_extra; } ____cacheline_aligned_in_smp; + /* + * task_work and async notification delivery cacheline. Expected to + * regularly bounce b/w CPUs. + */ struct { - spinlock_t completion_lock; - - bool poll_multi_queue; - atomic_t cq_wait_nr; - - /* - * ->iopoll_list is protected by the ctx->uring_lock for - * io_uring instances that don't use IORING_SETUP_SQPOLL. - * For SQPOLL, only the single threaded io_sq_thread() will - * manipulate the list, hence no extra locking is needed there. - */ - struct io_wq_work_list iopoll_list; - struct io_hash_table cancel_table; - struct llist_head work_llist; - - struct list_head io_buffers_comp; + unsigned long check_cq; + atomic_t cq_wait_nr; + atomic_t cq_timeouts; + struct wait_queue_head cq_wait; } ____cacheline_aligned_in_smp; /* timeouts */ struct { spinlock_t timeout_lock; - atomic_t cq_timeouts; struct list_head timeout_list; struct list_head ltimeout_list; unsigned cq_last_tm_flush; } ____cacheline_aligned_in_smp; + struct io_uring_cqe completion_cqes[16]; + + spinlock_t completion_lock; + + /* IRQ completion list, under ->completion_lock */ + unsigned int locked_free_nr; + struct io_wq_work_list locked_free_list; + + struct list_head io_buffers_comp; + struct list_head cq_overflow_list; + struct io_hash_table cancel_table; + + struct hlist_head waitid_list; + +#ifdef CONFIG_FUTEX + struct hlist_head futex_list; + struct io_alloc_cache futex_cache; +#endif + + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ + struct io_sq_data *sq_data; /* if using sq thread polling */ + + struct wait_queue_head sqo_sq_wait; + struct list_head sqd_list; + + unsigned int file_alloc_start; + unsigned int file_alloc_end; + + struct list_head io_buffers_cache; + + /* deferred free list, protected by ->uring_lock */ + struct hlist_head io_buf_list; + /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; @@ -346,11 +389,9 @@ struct io_ring_ctx { struct wait_queue_head rsrc_quiesce_wq; unsigned rsrc_quiesce; - struct list_head io_buffers_pages; + u32 pers_next; + struct xarray personalities; - #if defined(CONFIG_UNIX) - struct socket *ring_sock; - #endif /* hashed buffered write serialization */ struct io_wq_hash *hash_map; @@ -367,13 +408,33 @@ struct io_ring_ctx { /* io-wq management, e.g. thread count */ u32 iowq_limits[2]; - bool iowq_limits_set; struct callback_head poll_wq_task_work; struct list_head defer_list; - unsigned sq_thread_idle; + +#ifdef CONFIG_NET_RX_BUSY_POLL + struct list_head napi_list; /* track busy poll napi_id */ + spinlock_t napi_lock; /* napi_list lock */ + + /* napi busy poll default timeout */ + unsigned int napi_busy_poll_to; + bool napi_prefer_busy_poll; + bool napi_enabled; + + DECLARE_HASHTABLE(napi_ht, 4); +#endif + /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; + + /* + * If IORING_SETUP_NO_MMAP is used, then the below holds + * the gup'ed pages for the two rings, and the sqes. + */ + unsigned short n_ring_pages; + unsigned short n_sqe_pages; + struct page **ring_pages; + struct page **sqe_pages; }; struct io_tw_state { @@ -408,83 +469,95 @@ enum { REQ_F_SKIP_LINK_CQES_BIT, REQ_F_SINGLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT, - REQ_F_PARTIAL_IO_BIT, - REQ_F_CQE32_INIT_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, REQ_F_HASH_LOCKED_BIT, /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, + REQ_F_POLL_NO_LAZY_BIT, + REQ_F_CANCEL_SEQ_BIT, + REQ_F_CAN_POLL_BIT, + REQ_F_BL_EMPTY_BIT, + REQ_F_BL_NO_RECYCLE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, }; +typedef u64 __bitwise io_req_flags_t; +#define IO_REQ_FLAG(bitno) ((__force io_req_flags_t) BIT_ULL((bitno))) + enum { /* ctx owns file */ - REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), + REQ_F_FIXED_FILE = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT), /* drain existing IO first */ - REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), + REQ_F_IO_DRAIN = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT), /* linked sqes */ - REQ_F_LINK = BIT(REQ_F_LINK_BIT), + REQ_F_LINK = IO_REQ_FLAG(REQ_F_LINK_BIT), /* doesn't sever on completion < 0 */ - REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), + REQ_F_HARDLINK = IO_REQ_FLAG(REQ_F_HARDLINK_BIT), /* IOSQE_ASYNC */ - REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), + REQ_F_FORCE_ASYNC = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT), /* IOSQE_BUFFER_SELECT */ - REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT), /* IOSQE_CQE_SKIP_SUCCESS */ - REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT), /* fail rest of links */ - REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT), /* on inflight list, should be cancelled and waited on exit reliably */ - REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), + REQ_F_INFLIGHT = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT), /* read/write uses file position */ - REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + REQ_F_CUR_POS = IO_REQ_FLAG(REQ_F_CUR_POS_BIT), /* must not punt to workers */ - REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), + REQ_F_NOWAIT = IO_REQ_FLAG(REQ_F_NOWAIT_BIT), /* has or had linked timeout */ - REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), + REQ_F_LINK_TIMEOUT = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT), /* needs cleanup */ - REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), + REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ - REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), + REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT), /* buffer already selected */ - REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT), /* buffer selected from ring, needs commit */ - REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), + REQ_F_BUFFER_RING = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT), /* caller should reissue async */ - REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + REQ_F_REISSUE = IO_REQ_FLAG(REQ_F_REISSUE_BIT), /* supports async reads/writes */ - REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), + REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT), /* regular file */ - REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + REQ_F_ISREG = IO_REQ_FLAG(REQ_F_ISREG_BIT), /* has creds assigned */ - REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + REQ_F_CREDS = IO_REQ_FLAG(REQ_F_CREDS_BIT), /* skip refcounting if not set */ - REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + REQ_F_REFCOUNT = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT), /* there is a linked timeout that has to be armed */ - REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT), /* ->async_data allocated */ - REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + REQ_F_ASYNC_DATA = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT), /* don't post CQEs while failing linked requests */ - REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), + REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT), /* single poll may be active */ - REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), + REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT), /* double poll may active */ - REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), - /* request has already done partial IO */ - REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT), /* fast poll multishot mode */ - REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), - /* ->extra1 and ->extra2 are initialised */ - REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT), + REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), /* recvmsg special flag, clear EPOLLIN */ - REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), + REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT), /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ - REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), + REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), + /* don't use lazy poll wake for this request */ + REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), + /* cancel sequence is set and valid */ + REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), + /* file is pollable */ + REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), + /* buffer list was empty after selection of buffer */ + REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT), + /* don't recycle provided buffers for this request */ + REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); @@ -545,15 +618,17 @@ struct io_kiocb { * and after selection it points to the buffer ID itself. */ u16 buf_index; - unsigned int flags; + + unsigned nr_tw; + + /* REQ_F_* flags */ + io_req_flags_t flags; struct io_cqe cqe; struct io_ring_ctx *ctx; struct task_struct *task; - struct io_rsrc_node *rsrc_node; - union { /* store used ubuf, so we can prevent reloading */ struct io_mapped_ubuf *imu; @@ -574,18 +649,14 @@ struct io_kiocb { /* cache ->apoll->events */ __poll_t apoll_events; }; + + struct io_rsrc_node *rsrc_node; + atomic_t refs; atomic_t poll_refs; struct io_task_work io_task_work; - unsigned nr_tw; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ - union { - struct hlist_node hash_node; - struct { - u64 extra1; - u64 extra2; - }; - }; + struct hlist_node hash_node; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; /* opcode allocated if it needs to store data for async defer */ @@ -595,6 +666,11 @@ struct io_kiocb { /* custom credentials, valid IFF REQ_F_CREDS is set */ const struct cred *creds; struct io_wq_work work; + + struct { + u64 extra1; + u64 extra2; + } big_cqe; }; struct io_overflow_cqe { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e2b836c2e119..6fc1c858013d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -58,7 +58,11 @@ struct vm_fault; #define IOMAP_F_DIRTY (1U << 1) #define IOMAP_F_SHARED (1U << 2) #define IOMAP_F_MERGED (1U << 3) +#ifdef CONFIG_BUFFER_HEAD #define IOMAP_F_BUFFER_HEAD (1U << 4) +#else +#define IOMAP_F_BUFFER_HEAD 0 +#endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) /* @@ -261,9 +265,10 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); -struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos); +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); +bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, @@ -288,22 +293,32 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ - u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ - struct bio *io_bio; /* bio being built */ - struct bio io_inline_bio; /* MUST BE LAST! */ + struct bio io_bio; /* MUST BE LAST! */ }; +static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) +{ + return container_of(bio, struct iomap_ioend, io_bio); +} + struct iomap_writeback_ops { /* * Required, maps the blocks so that writeback can be performed on * the range starting at offset. + * + * Can return arbitrarily large regions, but we need to call into it at + * least once per folio to allow the file systems to synchronize with + * the write path that could be invalidating mappings. + * + * An existing mapping from a previous call to this method can be reused + * by the file system if it is still valid. */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset); + loff_t offset, unsigned len); /* * Optional, allows the file systems to perform actions just before @@ -324,6 +339,7 @@ struct iomap_writepage_ctx { struct iomap iomap; struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; + u32 nr_folios; /* folios added to the ioend */ }; void iomap_finish_ioends(struct iomap_ioend *ioend, int error); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d31642596675..2e925b5eba53 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -13,7 +13,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/of.h> -#include <uapi/linux/iommu.h> +#include <linux/iova_bitmap.h> #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -37,10 +37,113 @@ struct bus_type; struct device; struct iommu_domain; struct iommu_domain_ops; +struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; -struct iommu_fault_event; struct iommu_dma_cookie; +struct iommu_fault_param; + +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ + +/* Generic fault types, can be expanded IRQ remapping fault */ +enum iommu_fault_type { + IOMMU_FAULT_PAGE_REQ = 1, /* page request fault */ +}; + +/** + * struct iommu_fault_page_request - Page Request data + * @flags: encodes whether the corresponding fields are valid and whether this + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values). + * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response + * must have the same PASID value as the page request. When it is clear, + * the page response should not have a PASID. + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) + * @addr: page address + * @private_data: device-specific private information + */ +struct iommu_fault_page_request { +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) +#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) +#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) + u32 flags; + u32 pasid; + u32 grpid; + u32 perm; + u64 addr; + u64 private_data[2]; +}; + +/** + * struct iommu_fault - Generic fault data + * @type: fault type from &enum iommu_fault_type + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ + */ +struct iommu_fault { + u32 type; + struct iommu_fault_page_request prm; +}; + +/** + * enum iommu_page_response_code - Return status of fault handlers + * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is "Success" in PCI PRI. + * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is "Response Failure" in PCI PRI. + * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is "Invalid Request" in PCI PRI. + */ +enum iommu_page_response_code { + IOMMU_PAGE_RESP_SUCCESS = 0, + IOMMU_PAGE_RESP_INVALID, + IOMMU_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_page_response - Generic page response information + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @code: response code from &enum iommu_page_response_code + */ +struct iommu_page_response { + u32 pasid; + u32 grpid; + u32 code; +}; + +struct iopf_fault { + struct iommu_fault fault; + /* node for pending lists */ + struct list_head list; +}; + +struct iopf_group { + struct iopf_fault last_fault; + struct list_head faults; + /* list node for iommu_fault_param::faults */ + struct list_head pending_node; + struct work_struct work; + struct iommu_domain *domain; + /* The device's fault data parameter. */ + struct iommu_fault_param *fault_param; +}; + +/** + * struct iopf_queue - IO Page Fault queue + * @wq: the fault workqueue + * @devices: devices attached to this queue + * @lock: protects the device list + */ +struct iopf_queue { + struct workqueue_struct *wq; + struct list_head devices; + struct mutex lock; +}; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -48,7 +151,6 @@ struct iommu_dma_cookie; typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); -typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -64,6 +166,10 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ #define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ +#define __IOMMU_DOMAIN_PLATFORM (1U << 5) + +#define __IOMMU_DOMAIN_NESTED (1U << 6) /* User-managed address space nested + on a stage-2 translation */ #define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ /* @@ -81,6 +187,8 @@ struct iommu_domain_geometry { * invalidation. * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses * represented by mm_struct's. + * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own + * dma_api stuff. Do not use in new drivers. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -91,15 +199,18 @@ struct iommu_domain_geometry { __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) #define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) +#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM) +#define IOMMU_DOMAIN_NESTED (__IOMMU_DOMAIN_NESTED) struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; + const struct iommu_dirty_ops *dirty_ops; + const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; - enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, - void *data); + int (*iopf_handler)(struct iopf_group *group); void *fault_data; union { struct { @@ -109,6 +220,11 @@ struct iommu_domain { struct { /* IOMMU_DOMAIN_SVA */ struct mm_struct *mm; int users; + /* + * Next iommu_domain in mm->iommu_mm->sva-domains list + * protected by iommu_sva_lock. + */ + struct list_head next; }; }; }; @@ -133,6 +249,7 @@ enum iommu_cap { * usefully support the non-strict DMA flush queue. */ IOMMU_CAP_DEFERRED_FLUSH, + IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */ }; /* These are the possible reserved region types */ @@ -196,6 +313,8 @@ enum iommu_dev_features { IOMMU_DEV_FEAT_IOPF, }; +#define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ +#define IOMMU_FIRST_GLOBAL_PASID (1U) /*starting range for allocation */ #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; @@ -226,16 +345,183 @@ struct iommu_iotlb_gather { }; /** + * struct iommu_dirty_bitmap - Dirty IOVA bitmap state + * @bitmap: IOVA bitmap + * @gather: Range information for a pending IOTLB flush + */ +struct iommu_dirty_bitmap { + struct iova_bitmap *bitmap; + struct iommu_iotlb_gather *gather; +}; + +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + +/** + * struct iommu_dirty_ops - domain specific dirty tracking operations + * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain + * @read_and_clear_dirty: Walk IOMMU page tables for dirtied PTEs marshalled + * into a bitmap, with a bit represented as a page. + * Reads the dirty PTE bits and clears it from IO + * pagetables. + */ +struct iommu_dirty_ops { + int (*set_dirty_tracking)(struct iommu_domain *domain, bool enabled); + int (*read_and_clear_dirty)(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty); +}; + +/** + * struct iommu_user_data - iommu driver specific user space data info + * @type: The data type of the user buffer + * @uptr: Pointer to the user buffer for copy_from_user() + * @len: The length of the user buffer in bytes + * + * A user space data is an uAPI that is defined in include/uapi/linux/iommufd.h + * @type, @uptr and @len should be just copied from an iommufd core uAPI struct. + */ +struct iommu_user_data { + unsigned int type; + void __user *uptr; + size_t len; +}; + +/** + * struct iommu_user_data_array - iommu driver specific user space data array + * @type: The data type of all the entries in the user buffer array + * @uptr: Pointer to the user buffer array + * @entry_len: The fixed-width length of an entry in the array, in bytes + * @entry_num: The number of total entries in the array + * + * The user buffer includes an array of requests with format defined in + * include/uapi/linux/iommufd.h + */ +struct iommu_user_data_array { + unsigned int type; + void __user *uptr; + size_t entry_len; + u32 entry_num; +}; + +/** + * __iommu_copy_struct_from_user - Copy iommu driver specific user space data + * @dst_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @src_data: Pointer to a struct iommu_user_data for user space data info + * @data_type: The data type of the @dst_data. Must match with @src_data.type + * @data_len: Length of current user data structure, i.e. sizeof(struct _dst) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int __iommu_copy_struct_from_user( + void *dst_data, const struct iommu_user_data *src_data, + unsigned int data_type, size_t data_len, size_t min_len) +{ + if (src_data->type != data_type) + return -EINVAL; + if (WARN_ON(!dst_data || !src_data)) + return -EINVAL; + if (src_data->len < min_len || data_len < src_data->len) + return -EINVAL; + return copy_struct_from_user(dst_data, data_len, src_data->uptr, + src_data->len); +} + +/** + * iommu_copy_struct_from_user - Copy iommu driver specific user space data + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @user_data: Pointer to a struct iommu_user_data for user space data info + * @data_type: The data type of the @kdst. Must match with @user_data->type + * @min_last: The last memember of the data structure @kdst points in the + * initial version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ + __iommu_copy_struct_from_user(kdst, user_data, data_type, \ + sizeof(*kdst), \ + offsetofend(typeof(*kdst), min_last)) + +/** + * __iommu_copy_struct_from_user_array - Copy iommu driver specific user space + * data from an iommu_user_data_array + * @dst_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @src_array: Pointer to a struct iommu_user_data_array for a user space array + * @data_type: The data type of the @dst_data. Must match with @src_array.type + * @index: Index to the location in the array to copy user data from + * @data_len: Length of current user data structure, i.e. sizeof(struct _dst) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int __iommu_copy_struct_from_user_array( + void *dst_data, const struct iommu_user_data_array *src_array, + unsigned int data_type, unsigned int index, size_t data_len, + size_t min_len) +{ + struct iommu_user_data src_data; + + if (WARN_ON(!src_array || index >= src_array->entry_num)) + return -EINVAL; + if (!src_array->entry_num) + return -EINVAL; + src_data.uptr = src_array->uptr + src_array->entry_len * index; + src_data.len = src_array->entry_len; + src_data.type = src_array->type; + + return __iommu_copy_struct_from_user(dst_data, &src_data, data_type, + data_len, min_len); +} + +/** + * iommu_copy_struct_from_user_array - Copy iommu driver specific user space + * data from an iommu_user_data_array + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @user_array: Pointer to a struct iommu_user_data_array for a user space + * array + * @data_type: The data type of the @kdst. Must match with @user_array->type + * @index: Index to the location in the array to copy user data from + * @min_last: The last member of the data structure @kdst points in the + * initial version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \ + min_last) \ + __iommu_copy_struct_from_user_array( \ + kdst, user_array, data_type, index, sizeof(*(kdst)), \ + offsetofend(typeof(*(kdst)), min_last)) + +/** * struct iommu_ops - iommu ops and capabilities * @capable: check capability - * @domain_alloc: allocate iommu domain + * @hw_info: report iommu hardware information. The data buffer returned by this + * op is allocated in the iommu driver and freed by the caller after + * use. The information type is one of enum iommu_hw_info_type defined + * in include/uapi/linux/iommufd.h. + * @domain_alloc: allocate and return an iommu domain if success. Otherwise + * NULL is returned. The domain is not fully initialized until + * the caller iommu_domain_alloc() returns. + * @domain_alloc_user: Allocate an iommu domain corresponding to the input + * parameters as defined in include/uapi/linux/iommufd.h. + * Unlike @domain_alloc, it is called only by IOMMUFD and + * must fully initialize the new domain before return. + * Upon success, if the @user_data is valid and the @parent + * points to a kernel-managed domain, the new domain must be + * IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be + * NULL while the @user_data can be optionally provided, the + * new domain must support __IOMMU_DOMAIN_PAGING. + * Upon failure, ERR_PTR must be returned. + * @domain_alloc_paging: Allocate an iommu_domain that can be used for + * UNMANAGED, DMA, and DMA_FQ domain types. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain - * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op - * is to support old IOMMU drivers, new drivers should use - * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -254,32 +540,42 @@ struct iommu_iotlb_gather { * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops + * @identity_domain: An always available, always attachable identity + * translation. + * @blocked_domain: An always available, always attachable blocking + * translation. + * @default_domain: If not NULL this will always be set as the default domain. + * This should be an IDENTITY/BLOCKED/PLATFORM domain. + * Do not use in new drivers. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); + void *(*hw_info)(struct device *dev, u32 *length, u32 *type); /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_user)( + struct device *dev, u32 flags, struct iommu_domain *parent, + const struct iommu_user_data *user_data); + struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); - void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); - int (*of_xlate)(struct device *dev, struct of_phandle_args *args); + int (*of_xlate)(struct device *dev, const struct of_phandle_args *args); bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - int (*page_response)(struct device *dev, - struct iommu_fault_event *evt, - struct iommu_page_response *msg); + void (*page_response)(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); @@ -287,6 +583,10 @@ struct iommu_ops { const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; + struct iommu_domain *identity_domain; + struct iommu_domain *blocked_domain; + struct iommu_domain *release_domain; + struct iommu_domain *default_domain; }; /** @@ -305,15 +605,20 @@ struct iommu_ops { * * ENODEV - device specific errors, not able to be attached * * <others> - treated as ENODEV by the caller. Use is discouraged * @set_dev_pasid: set an iommu domain to a pasid of device - * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue + * @cache_invalidate_user: Flush hardware cache for user space IO page table. + * The @domain must be IOMMU_DOMAIN_NESTED. The @array + * passes in the cache invalidation requests, in form + * of a driver data structure. The driver must update + * array->entry_num to report the number of handled + * invalidation requests. The driver data structure + * must be defined in include/uapi/linux/iommufd.h * @iova_to_phys: translate iova to physical address * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE, * including no-snoop TLPs on PCIe or other platform @@ -327,22 +632,20 @@ struct iommu_domain_ops { int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); + int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); + int (*cache_invalidate_user)(struct iommu_domain *domain, + struct iommu_user_data_array *array); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); @@ -361,6 +664,7 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs */ struct iommu_device { @@ -368,60 +672,62 @@ struct iommu_device { const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + struct iommu_group *singleton_group; u32 max_pasids; }; /** - * struct iommu_fault_event - Generic fault event - * - * Can represent recoverable faults such as a page requests or - * unrecoverable faults such as DMA or IRQ remapping faults. - * - * @fault: fault descriptor - * @list: pending fault event list, used for tracking responses - */ -struct iommu_fault_event { - struct iommu_fault fault; - struct list_head list; -}; - -/** * struct iommu_fault_param - per-device IOMMU fault data - * @handler: Callback function to handle IOMMU faults at device level - * @data: handler private data - * @faults: holds the pending faults which needs response * @lock: protect pending faults list + * @users: user counter to manage the lifetime of the data + * @rcu: rcu head for kfree_rcu() + * @dev: the device that owns this param + * @queue: IOPF queue + * @queue_list: index into queue->devices + * @partial: faults that are part of a Page Request Group for which the last + * request hasn't been submitted yet. + * @faults: holds the pending faults which need response */ struct iommu_fault_param { - iommu_dev_fault_handler_t handler; - void *data; - struct list_head faults; struct mutex lock; + refcount_t users; + struct rcu_head rcu; + + struct device *dev; + struct iopf_queue *queue; + struct list_head queue_list; + + struct list_head partial; + struct list_head faults; }; /** * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data - * @iopf_param: I/O Page Fault queue and data * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume * @attach_deferred: the dma domain attachment is deferred + * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs + * @require_direct: device requires IOMMU_RESV_DIRECT regions + * @shadow_on_flush: IOTLB flushes are used to sync shadow tables * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; */ struct dev_iommu { struct mutex lock; - struct iommu_fault_param *fault_param; - struct iopf_device_param *iopf_param; + struct iommu_fault_param __rcu *fault_param; struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; u32 max_pasids; u32 attach_deferred:1; + u32 pci_32bit_workaround:1; + u32 require_direct:1; + u32 shadow_on_flush:1; }; int iommu_device_register(struct iommu_device *iommu, @@ -442,6 +748,22 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return (struct iommu_device *)dev_get_drvdata(dev); } +/** + * iommu_get_iommu_dev - Get iommu_device for a device + * @dev: an end-point device + * + * Note that this function must be called from the iommu_ops + * to retrieve the iommu_device for a device, which the core code + * guarentees it will not invoke the op without an attached iommu. + */ +static inline struct iommu_device *__iommu_get_iommu_dev(struct device *dev) +{ + return dev->iommu->iommu_dev; +} + +#define iommu_get_iommu_dev(dev, type, member) \ + container_of(__iommu_get_iommu_dev(dev), type, member) + static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { @@ -450,17 +772,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } -static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) -{ - /* - * Assume that valid ops must be installed if iommu_probe_device() - * has succeeded. The device ops are essentially for internal use - * within the IOMMU subsystem itself, so we should be able to trust - * ourselves not to misuse the helper. - */ - return dev->iommu->iommu_dev->ops; -} - extern int bus_iommu_probe(const struct bus_type *bus); extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); @@ -518,16 +829,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data, extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data); - -extern int iommu_unregister_device_fault_handler(struct device *dev); - -extern int iommu_report_device_fault(struct device *dev, - struct iommu_fault_event *evt); -extern int iommu_page_response(struct device *dev, - struct iommu_page_response *msg); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); @@ -632,12 +933,35 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) return gather && gather->queued; } +static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty, + struct iova_bitmap *bitmap, + struct iommu_iotlb_gather *gather) +{ + if (gather) + iommu_iotlb_gather_init(gather); + + dirty->bitmap = bitmap; + dirty->gather = gather; +} + +static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty, + unsigned long iova, + unsigned long length) +{ + if (dirty->bitmap) + iova_bitmap_set(dirty->bitmap, iova, length); + + if (dirty->gather) + iommu_iotlb_gather_add_range(dirty->gather, iova, length); +} + /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ extern struct iommu_group *generic_device_group(struct device *dev); /* FSL-MC device grouping function */ struct iommu_group *fsl_mc_device_group(struct device *dev); +extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data @@ -668,13 +992,21 @@ struct iommu_fwspec { struct iommu_sva { struct device *dev; struct iommu_domain *domain; + struct list_head handle_item; + refcount_t users; +}; + +struct iommu_mm_data { + u32 pasid; + struct list_head sva_domains; + struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); -int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); +int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -698,11 +1030,9 @@ static inline void *dev_iommu_priv_get(struct device *dev) return NULL; } -static inline void dev_iommu_priv_set(struct device *dev, void *priv) -{ - dev->iommu->priv = priv; -} +void dev_iommu_priv_set(struct device *dev, void *priv); +extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); @@ -718,8 +1048,6 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group); int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); void iommu_detach_device_pasid(struct iommu_domain *domain, @@ -727,6 +1055,8 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct iommu_domain * iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, unsigned int type); +ioasid_t iommu_alloc_global_pasid(struct device *dev); +void iommu_free_global_pasid(ioasid_t pasid); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -735,6 +1065,8 @@ struct iommu_fwspec {}; struct iommu_device {}; struct iommu_fault_param {}; struct iommu_iotlb_gather {}; +struct iommu_dirty_bitmap {}; +struct iommu_dirty_ops {}; static inline bool iommu_present(const struct bus_type *bus) { @@ -904,31 +1236,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline -int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data) -{ - return -ENODEV; -} - -static inline int iommu_unregister_device_fault_handler(struct device *dev) -{ - return 0; -} - -static inline -int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) -{ - return -ENODEV; -} - -static inline int iommu_page_response(struct device *dev, - struct iommu_page_response *msg) -{ - return -ENODEV; -} - static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; @@ -967,6 +1274,18 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) return false; } +static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty, + struct iova_bitmap *bitmap, + struct iommu_iotlb_gather *gather) +{ +} + +static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty, + unsigned long iova, + unsigned long length) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } @@ -1010,7 +1329,7 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, } static inline -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) { return NULL; } @@ -1065,12 +1384,6 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) return -ENODEV; } -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} - static inline int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { @@ -1088,8 +1401,23 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, { return NULL; } + +static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) +{ + return IOMMU_PASID_INVALID; +} + +static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ +#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) +void iommu_group_mutex_assert(struct device *dev); +#else +static inline void iommu_group_mutex_assert(struct device *dev) +{ +} +#endif + /** * iommu_map_sgtable - Map the given buffer to the IOMMU domain * @domain: The IOMMU domain to perform the mapping @@ -1100,7 +1428,7 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, * Creates a mapping at @iova for the buffer described by a scatterlist * stored in the given sg_table object in the provided IOMMU domain. */ -static inline size_t iommu_map_sgtable(struct iommu_domain *domain, +static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, @@ -1171,20 +1499,40 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream return false; } -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_IOMMU_MM_DATA static inline void mm_pasid_init(struct mm_struct *mm) { - mm->pasid = IOMMU_PASID_INVALID; + /* + * During dup_mm(), a new mm will be memcpy'd from an old one and that makes + * the new mm and the old one point to a same iommu_mm instance. When either + * one of the two mms gets released, the iommu_mm instance is freed, leaving + * the other mm running into a use-after-free/double-free problem. To avoid + * the problem, zeroing the iommu_mm pointer of a new mm is needed here. + */ + mm->iommu_mm = NULL; } + static inline bool mm_valid_pasid(struct mm_struct *mm) { - return mm->pasid != IOMMU_PASID_INVALID; + return READ_ONCE(mm->iommu_mm); } + +static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) +{ + struct iommu_mm_data *iommu_mm = READ_ONCE(mm->iommu_mm); + + if (!iommu_mm) + return IOMMU_PASID_INVALID; + return iommu_mm->pasid; +} + void mm_pasid_drop(struct mm_struct *mm); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1202,7 +1550,75 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) } static inline void mm_pasid_init(struct mm_struct *mm) {} static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; } + +static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) +{ + return IOMMU_PASID_INVALID; +} + static inline void mm_pasid_drop(struct mm_struct *mm) {} + +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} #endif /* CONFIG_IOMMU_SVA */ +#ifdef CONFIG_IOMMU_IOPF +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); +void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev); +int iopf_queue_flush_dev(struct device *dev); +struct iopf_queue *iopf_queue_alloc(const char *name); +void iopf_queue_free(struct iopf_queue *queue); +int iopf_queue_discard_partial(struct iopf_queue *queue); +void iopf_free_group(struct iopf_group *group); +void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status); +#else +static inline int +iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) +{ + return -ENODEV; +} + +static inline void +iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) +{ +} + +static inline int iopf_queue_flush_dev(struct device *dev) +{ + return -ENODEV; +} + +static inline struct iopf_queue *iopf_queue_alloc(const char *name) +{ + return NULL; +} + +static inline void iopf_queue_free(struct iopf_queue *queue) +{ +} + +static inline int iopf_queue_discard_partial(struct iopf_queue *queue) +{ + return -ENODEV; +} + +static inline void iopf_free_group(struct iopf_group *group) +{ +} + +static inline void +iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +{ +} + +static inline void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status) +{ +} +#endif /* CONFIG_IOMMU_IOPF */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 1129a36a74c4..ffc3a949f837 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -16,14 +16,19 @@ struct page; struct iommufd_ctx; struct iommufd_access; struct file; +struct iommu_group; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); +int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev); +struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); +u32 iommufd_device_to_id(struct iommufd_device *idev); + struct iommufd_access_ops { u8 needs_pin_pages : 1; void (*unmap)(void *data, unsigned long iova, unsigned long length); @@ -44,12 +49,16 @@ iommufd_access_create(struct iommufd_ctx *ictx, const struct iommufd_access_ops *ops, void *data, u32 *id); void iommufd_access_destroy(struct iommufd_access *access); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id); +int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id); +void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); +struct iommufd_ctx *iommufd_ctx_from_fd(int fd); void iommufd_ctx_put(struct iommufd_ctx *ictx); +bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group); int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 25d768d48970..db7fe25f3370 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -229,7 +229,7 @@ static inline unsigned long resource_ext_type(const struct resource *res) return res->flags & IORESOURCE_EXT_TYPE_BITS; } /* True iff r1 completely contains r2 */ -static inline bool resource_contains(struct resource *r1, struct resource *r2) +static inline bool resource_contains(const struct resource *r1, const struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; @@ -239,13 +239,13 @@ static inline bool resource_contains(struct resource *r1, struct resource *r2) } /* True if any part of r1 overlaps r2 */ -static inline bool resource_overlaps(struct resource *r1, struct resource *r2) +static inline bool resource_overlaps(const struct resource *r1, const struct resource *r2) { return r1->start <= r2->end && r1->end >= r2->start; } -static inline bool -resource_intersection(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_intersection(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; @@ -254,8 +254,8 @@ resource_intersection(struct resource *r1, struct resource *r2, struct resource return true; } -static inline bool -resource_union(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_union(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; @@ -331,6 +331,9 @@ extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int +walk_system_ram_res_rev(u64 start, u64 end, void *arg, + int (*func)(struct resource *, void *)); +extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 7578d4f6a969..db1249cd9692 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -47,7 +47,30 @@ static inline int task_nice_ioclass(struct task_struct *task) } #ifdef CONFIG_BLOCK -int __get_task_ioprio(struct task_struct *p); +/* + * If the task has set an I/O priority, use that. Otherwise, return + * the default I/O priority. + * + * Expected to be called for current task or with task_lock() held to keep + * io_context stable. + */ +static inline int __get_task_ioprio(struct task_struct *p) +{ + struct io_context *ioc = p->io_context; + int prio; + + if (!ioc) + return IOPRIO_DEFAULT; + + if (p != current) + lockdep_assert_held(&p->alloc_lock); + + prio = ioc->ioprio; + if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) + prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); + return prio; +} #else static inline int __get_task_ioprio(struct task_struct *p) { diff --git a/include/linux/ioremap.h b/include/linux/ioremap.h new file mode 100644 index 000000000000..f0e99fc7dd8b --- /dev/null +++ b/include/linux/ioremap.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IOREMAP_H +#define _LINUX_IOREMAP_H + +#include <linux/kasan.h> +#include <asm/pgtable.h> + +#if defined(CONFIG_HAS_IOMEM) || defined(CONFIG_GENERIC_IOREMAP) +/* + * Ioremap often, but not always uses the generic vmalloc area. E.g on + * Power ARCH, it could have different ioremap space. + */ +#ifndef IOREMAP_START +#define IOREMAP_START VMALLOC_START +#define IOREMAP_END VMALLOC_END +#endif +static inline bool is_ioremap_addr(const void *x) +{ + unsigned long addr = (unsigned long)kasan_reset_tag(x); + + return addr >= IOREMAP_START && addr < IOREMAP_END; +} +#else +static inline bool is_ioremap_addr(const void *x) +{ + return false; +} +#endif + +#endif /* _LINUX_IOREMAP_H */ diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index cb71aa616bd3..4696abfd311c 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -34,7 +34,7 @@ * the same driver for allocation, read and write operations. * * Open-coding access to :c:type:`struct iosys_map <iosys_map>` is considered - * bad style. Rather then accessing its fields directly, use one of the provided + * bad style. Rather than accessing its fields directly, use one of the provided * helper functions, or implement your own. For example, instances of * :c:type:`struct iosys_map <iosys_map>` can be initialized statically with * IOSYS_MAP_INIT_VADDR(), or at runtime with iosys_map_set_vaddr(). These @@ -168,9 +168,9 @@ struct iosys_map { * about the use of uninitialized variable. */ #define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({ \ - struct iosys_map copy = *map_; \ - iosys_map_incr(©, offset_); \ - copy; \ + struct iosys_map copy_ = *map_; \ + iosys_map_incr(©_, offset_); \ + copy_; \ }) /** @@ -391,14 +391,14 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * Returns: * The value read from the mapping. */ -#define iosys_map_rd(map__, offset__, type__) ({ \ - type__ val; \ - if ((map__)->is_iomem) { \ - __iosys_map_rd_io(val, (map__)->vaddr_iomem + (offset__), type__);\ - } else { \ - __iosys_map_rd_sys(val, (map__)->vaddr + (offset__), type__); \ - } \ - val; \ +#define iosys_map_rd(map__, offset__, type__) ({ \ + type__ val_; \ + if ((map__)->is_iomem) { \ + __iosys_map_rd_io(val_, (map__)->vaddr_iomem + (offset__), type__); \ + } else { \ + __iosys_map_rd_sys(val_, (map__)->vaddr + (offset__), type__); \ + } \ + val_; \ }) /** @@ -413,20 +413,20 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * or if pointer may be unaligned (and problematic for the architecture * supported), use iosys_map_memcpy_to() */ -#define iosys_map_wr(map__, offset__, type__, val__) ({ \ - type__ val = (val__); \ - if ((map__)->is_iomem) { \ - __iosys_map_wr_io(val, (map__)->vaddr_iomem + (offset__), type__);\ - } else { \ - __iosys_map_wr_sys(val, (map__)->vaddr + (offset__), type__); \ - } \ +#define iosys_map_wr(map__, offset__, type__, val__) ({ \ + type__ val_ = (val__); \ + if ((map__)->is_iomem) { \ + __iosys_map_wr_io(val_, (map__)->vaddr_iomem + (offset__), type__); \ + } else { \ + __iosys_map_wr_sys(val_, (map__)->vaddr + (offset__), type__); \ + } \ }) /** * iosys_map_rd_field - Read a member from a struct in the iosys_map * * @map__: The iosys_map structure - * @struct_offset__: Offset from the beggining of the map, where the struct + * @struct_offset__: Offset from the beginning of the map, where the struct * is located * @struct_type__: The struct describing the layout of the mapping * @field__: Member of the struct to read @@ -485,16 +485,16 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * The value read from the mapping. */ #define iosys_map_rd_field(map__, struct_offset__, struct_type__, field__) ({ \ - struct_type__ *s; \ + struct_type__ *s_; \ iosys_map_rd(map__, struct_offset__ + offsetof(struct_type__, field__), \ - typeof(s->field__)); \ + typeof(s_->field__)); \ }) /** * iosys_map_wr_field - Write to a member of a struct in the iosys_map * * @map__: The iosys_map structure - * @struct_offset__: Offset from the beggining of the map, where the struct + * @struct_offset__: Offset from the beginning of the map, where the struct * is located * @struct_type__: The struct describing the layout of the mapping * @field__: Member of the struct to read @@ -508,9 +508,9 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * usage and memory layout. */ #define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({ \ - struct_type__ *s; \ + struct_type__ *s_; \ iosys_map_wr(map__, struct_offset__ + offsetof(struct_type__, field__), \ - typeof(s->field__), val__); \ + typeof(s_->field__), val__); \ }) #endif /* __IOSYS_MAP_H__ */ diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h new file mode 100644 index 000000000000..270454a6703d --- /dev/null +++ b/include/linux/iov_iter.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* I/O iterator iteration building functions. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_IOV_ITER_H +#define _LINUX_IOV_ITER_H + +#include <linux/uio.h> +#include <linux/bvec.h> + +typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2); +typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len, + void *priv, void *priv2); + +/* + * Handle ITER_UBUF. + */ +static __always_inline +size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + void __user *base = iter->ubuf; + size_t progress = 0, remain; + + remain = step(base + iter->iov_offset, 0, len, priv, priv2); + progress = len - remain; + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_IOVEC. + */ +static __always_inline +size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + const struct iovec *p = iter->__iov; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->__iov; + iter->__iov = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_KVEC. + */ +static __always_inline +size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct kvec *p = iter->kvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->kvec; + iter->kvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_BVEC. + */ +static __always_inline +size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct bio_vec *p = iter->bvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t offset = p->bv_offset + skip, part; + void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE); + + part = min3(len, + (size_t)(p->bv_len - skip), + (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); + remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2); + kunmap_local(kaddr); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + if (skip >= p->bv_len) { + skip = 0; + p++; + } + if (remain) + break; + } while (len); + + iter->nr_segs -= p - iter->bvec; + iter->bvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_XARRAY. + */ +static __always_inline +size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + struct folio *folio; + size_t progress = 0; + loff_t start = iter->xarray_start + iter->iov_offset; + pgoff_t index = start / PAGE_SIZE; + XA_STATE(xas, iter->xarray, index); + + rcu_read_lock(); + xas_for_each(&xas, folio, ULONG_MAX) { + size_t remain, consumed, offset, part, flen; + + if (xas_retry(&xas, folio)) + continue; + if (WARN_ON(xa_is_value(folio))) + break; + if (WARN_ON(folio_test_hugetlb(folio))) + break; + + offset = offset_in_folio(folio, start + progress); + flen = min(folio_size(folio) - offset, len); + + while (flen) { + void *base = kmap_local_folio(folio, offset); + + part = min_t(size_t, flen, + PAGE_SIZE - offset_in_page(offset)); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + + consumed = part - remain; + progress += consumed; + len -= consumed; + + if (remain || len == 0) + goto out; + flen -= consumed; + offset += consumed; + } + } + +out: + rcu_read_unlock(); + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_DISCARD. + */ +static __always_inline +size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + size_t progress = len; + + iter->count -= progress; + return progress; +} + +/** + * iterate_and_advance2 - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @priv2: More data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * Iterate over the next part of an iterator, up to the specified length. The + * buffer is presented in segments, which for kernel iteration are broken up by + * physical pages and mapped, with the mapped address being presented. + * + * Two step functions, @step and @ustep, must be provided, one for handling + * mapped kernel addresses and the other is given user addresses which have the + * potential to fault since no pinning is performed. + * + * The step functions are passed the address and length of the segment, @priv, + * @priv2 and the amount of data so far iterated over (which can, for example, + * be added to @priv to point to the right part of a second buffer). The step + * functions should return the amount of the segment they didn't process (ie. 0 + * indicates complete processsing). + * + * This function returns the amount of data processed (ie. 0 means nothing was + * processed and the value of @len means processes to completion). + */ +static __always_inline +size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, + void *priv2, iov_ustep_f ustep, iov_step_f step) +{ + if (unlikely(iter->count < len)) + len = iter->count; + if (unlikely(!len)) + return 0; + + if (likely(iter_is_ubuf(iter))) + return iterate_ubuf(iter, len, priv, priv2, ustep); + if (likely(iter_is_iovec(iter))) + return iterate_iovec(iter, len, priv, priv2, ustep); + if (iov_iter_is_bvec(iter)) + return iterate_bvec(iter, len, priv, priv2, step); + if (iov_iter_is_kvec(iter)) + return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_xarray(iter)) + return iterate_xarray(iter, len, priv, priv2, step); + return iterate_discard(iter, len, priv, priv2, step); +} + +/** + * iterate_and_advance - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * As iterate_and_advance2(), but priv2 is always NULL. + */ +static __always_inline +size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, + iov_ustep_f ustep, iov_step_f step) +{ + return iterate_and_advance2(iter, len, priv, NULL, ustep, step); +} + +#endif /* _LINUX_IOV_ITER_H */ diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h index c006cf0a25f3..1c338f5e5b7a 100644 --- a/include/linux/iova_bitmap.h +++ b/include/linux/iova_bitmap.h @@ -7,6 +7,7 @@ #define _IOVA_BITMAP_H_ #include <linux/types.h> +#include <linux/errno.h> struct iova_bitmap; @@ -14,6 +15,7 @@ typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap, unsigned long iova, size_t length, void *opaque); +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, unsigned long page_size, u64 __user *data); @@ -22,5 +24,29 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, iova_bitmap_fn_t fn); void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long iova, size_t length); +#else +static inline struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, + size_t length, + unsigned long page_size, + u64 __user *data) +{ + return NULL; +} + +static inline void iova_bitmap_free(struct iova_bitmap *bitmap) +{ +} + +static inline int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, + iova_bitmap_fn_t fn) +{ + return -EOPNOTSUPP; +} + +static inline void iova_bitmap_set(struct iova_bitmap *bitmap, + unsigned long iova, size_t length) +{ +} +#endif #endif diff --git a/include/linux/ipc.h b/include/linux/ipc.h index e1c9eea6015b..9b1434247aab 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -2,7 +2,7 @@ #ifndef _LINUX_IPC_H #define _LINUX_IPC_H -#include <linux/spinlock.h> +#include <linux/spinlock_types.h> #include <linux/uidgid.h> #include <linux/rhashtable-types.h> #include <uapi/linux/ipc.h> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 839247a4f48e..383a0ea2ab91 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -3,6 +3,7 @@ #define _IPV6_H #include <uapi/linux/ipv6.h> +#include <linux/cache.h> #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) @@ -10,9 +11,16 @@ * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { - __s32 forwarding; + /* RX & TX fastpath fields. */ + __cacheline_group_begin(ipv6_devconf_read_txrx); + __s32 disable_ipv6; __s32 hop_limit; __s32 mtu6; + __s32 forwarding; + __s32 disable_policy; + __s32 proxy_ndp; + __cacheline_group_end(ipv6_devconf_read_txrx); + __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; @@ -27,12 +35,14 @@ struct ipv6_devconf { __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; + __s32 regen_min_advance; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; __u32 ra_defrtr_metric; __s32 accept_ra_min_hop_limit; + __s32 accept_ra_min_lft; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; #ifdef CONFIG_IPV6_ROUTER_PREF @@ -43,7 +53,6 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif - __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -53,7 +62,6 @@ struct ipv6_devconf { #ifdef CONFIG_IPV6_MROUTE atomic_t mc_forwarding; #endif - __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; @@ -74,13 +82,13 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; - __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; __u32 ioam6_id; __u32 ioam6_id_wide; __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; + __u8 ra_honor_pio_life; struct ctl_table_header *sysctl_header; }; @@ -146,6 +154,7 @@ struct inet6_skb_parm { #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 #define IP6SKB_FAKEJUMBO 512 +#define IP6SKB_MULTIPATH 1024 }; #if defined(CONFIG_NET_L3_MASTER_DEV) @@ -199,14 +208,7 @@ struct inet6_cork { u8 tclass; }; -/** - * struct ipv6_pinfo - ipv6 private area - * - * In the struct sock hierarchy (tcp6_sock, upd6_sock, etc) - * this _must_ be the last member, so that inet6_sk_generic - * is able to calculate its offset from the base struct sock - * by using the struct proto->slab_obj_size member. -acme - */ +/* struct ipv6_pinfo - ipv6 private area */ struct ipv6_pinfo { struct in6_addr saddr; struct in6_pktinfo sticky_pktinfo; @@ -218,28 +220,9 @@ struct ipv6_pinfo { __be32 flow_label; __u32 frag_size; - /* - * Packed in 16bits. - * Omit one shift by putting the signed field at MSB. - */ -#if defined(__BIG_ENDIAN_BITFIELD) - __s16 hop_limit:9; - __u16 __unused_1:7; -#else - __u16 __unused_1:7; - __s16 hop_limit:9; -#endif + s16 hop_limit; + u8 mcast_hops; -#if defined(__BIG_ENDIAN_BITFIELD) - /* Packed in 16bits. */ - __s16 mcast_hops:9; - __u16 __unused_2:6, - mc_loop:1; -#else - __u16 mc_loop:1, - __unused_2:6; - __s16 mcast_hops:9; -#endif int ucast_oif; int mcast_oif; @@ -267,21 +250,11 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 recverr:1, - sndflow:1, - repflow:1, - pmtudisc:3, - padding:1, /* 1 bit hole */ - srcprefs:3, /* 001: prefer temporary address + __u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1, - autoflowlabel:1, - autoflowlabel_set:1, - mc_all:1, - recverr_rfc4884:1, - rtalert_isolate:1; + __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; @@ -298,6 +271,18 @@ struct ipv6_pinfo { struct inet6_cork cork; }; +/* We currently use available bits from inet_sk(sk)->inet_flags, + * this could change in the future. + */ +#define inet6_test_bit(nr, sk) \ + test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_set_bit(nr, sk) \ + set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_clear_bit(nr, sk) \ + clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_assign_bit(nr, sk, val) \ + assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) + /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ @@ -306,19 +291,19 @@ struct raw6_sock { __u32 offset; /* checksum offset */ struct icmp6_filter filter; __u32 ip6mr_table; - /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */ + struct ipv6_pinfo inet6; }; struct udp6_sock { struct udp_sock udp; - /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */ + struct ipv6_pinfo inet6; }; struct tcp6_sock { struct tcp_sock tcp; - /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */ + struct ipv6_pinfo inet6; }; diff --git a/include/linux/irq.h b/include/linux/irq.h index d8a6fdce9373..97baa937ab5b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -179,7 +179,7 @@ struct irq_common_data { struct irq_data { u32 mask; unsigned int irq; - unsigned long hwirq; + irq_hw_number_t hwirq; struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; @@ -215,8 +215,6 @@ struct irq_data { * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set * IRQD_CAN_RESERVE - Can use reservation mode - * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change - * required * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked * from actual interrupt context. * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call @@ -247,11 +245,10 @@ enum { IRQD_SINGLE_TARGET = BIT(24), IRQD_DEFAULT_TRIGGER_SET = BIT(25), IRQD_CAN_RESERVE = BIT(26), - IRQD_MSI_NOMASK_QUIRK = BIT(27), - IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28), - IRQD_AFFINITY_ON_ACTIVATE = BIT(29), - IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30), - IRQD_RESEND_WHEN_IN_PROGRESS = BIT(31), + IRQD_HANDLE_ENFORCE_IRQCTX = BIT(27), + IRQD_AFFINITY_ON_ACTIVATE = BIT(28), + IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(29), + IRQD_RESEND_WHEN_IN_PROGRESS = BIT(30), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -426,21 +423,6 @@ static inline bool irqd_can_reserve(struct irq_data *d) return __irqd_to_state(d) & IRQD_CAN_RESERVE; } -static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) -{ - __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; -} - -static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) -{ - __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; -} - -static inline bool irqd_msi_nomask_quirk(struct irq_data *d) -{ - return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; -} - static inline void irqd_set_affinity_on_activate(struct irq_data *d) { __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 8cd11a223260..136f2980cba3 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -66,6 +66,9 @@ void irq_work_sync(struct irq_work *work); void irq_work_run(void); bool irq_work_needs_cpu(void); void irq_work_single(void *arg); + +void arch_irq_work_raise(void); + #else static inline bool irq_work_needs_cpu(void) { return false; } static inline void irq_work_run(void) { } diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 51c254b7fec2..21ecf582a0fe 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -174,7 +174,7 @@ struct irq_domain { irq_hw_number_t hwirq_max; unsigned int revmap_size; struct radix_tree_root revmap_tree; - struct irq_data __rcu *revmap[]; + struct irq_data __rcu *revmap[] __counted_by(revmap_size); }; /* Irq domain flags */ @@ -619,6 +619,23 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +#ifdef CONFIG_GENERIC_MSI_IRQ +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type); +void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq); +#else +static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} +static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq) +{ + WARN_ON_ONCE(1); +} +#endif + #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } static inline struct irq_domain *irq_find_matching_fwnode( diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h index c29921fd8cd1..5c1fe6f1fcde 100644 --- a/include/linux/irqdomain_defs.h +++ b/include/linux/irqdomain_defs.h @@ -26,6 +26,8 @@ enum irq_domain_bus_token { DOMAIN_BUS_DMAR, DOMAIN_BUS_AMDVI, DOMAIN_BUS_PCI_DEVICE_IMS, + DOMAIN_BUS_DEVICE_MSI, + DOMAIN_BUS_WIRED_TO_MSI, }; #endif /* _LINUX_IRQDOMAIN_DEFS_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 2b665c32f5fe..3f003d5fde53 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,6 +12,7 @@ #ifndef _LINUX_TRACE_IRQFLAGS_H #define _LINUX_TRACE_IRQFLAGS_H +#include <linux/irqflags_types.h> #include <linux/typecheck.h> #include <linux/cleanup.h> #include <asm/irqflags.h> @@ -34,19 +35,6 @@ #ifdef CONFIG_TRACE_IRQFLAGS -/* Per-task IRQ trace events information. */ -struct irqtrace_events { - unsigned int irq_events; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; -}; - DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); @@ -126,7 +114,7 @@ do { \ # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false -# define lockdep_hrtimer_exit(__context) do { } while (0) +# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) diff --git a/include/linux/irqflags_types.h b/include/linux/irqflags_types.h new file mode 100644 index 000000000000..c13f0d915097 --- /dev/null +++ b/include/linux/irqflags_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IRQFLAGS_TYPES_H +#define _LINUX_IRQFLAGS_TYPES_H + +#ifdef CONFIG_TRACE_IRQFLAGS + +/* Per-task IRQ trace events information. */ +struct irqtrace_events { + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; +}; + +#endif + +#endif /* _LINUX_IRQFLAGS_TYPES_H */ diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index c30f454a9518..72dd1eb3a0e7 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -8,7 +8,7 @@ */ struct irq_desc; -struct irq_data; + typedef void (*irq_flow_handler_t)(struct irq_desc *desc); #endif diff --git a/include/linux/ism.h b/include/linux/ism.h index 9a4c204df3da..5428edd90982 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -86,7 +86,6 @@ int ism_register_dmb(struct ism_dev *dev, struct ism_dmb *dmb, int ism_unregister_dmb(struct ism_dev *dev, struct ism_dmb *dmb); int ism_move(struct ism_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); -u8 *ism_get_seid(void); const struct smcd_ops *ism_get_smcd_ops(void); diff --git a/include/linux/iversion.h b/include/linux/iversion.h index f174ff1b59ee..8f972eaca2ed 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -256,7 +256,7 @@ inode_peek_iversion(const struct inode *inode) * For filesystems without any sort of change attribute, the best we can * do is fake one up from the ctime: */ -static inline u64 time_to_chattr(struct timespec64 *t) +static inline u64 time_to_chattr(const struct timespec64 *t) { u64 chattr = t->tv_sec; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 44c298aa58d4..971f3e826e15 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -631,11 +631,6 @@ struct transaction_s struct list_head t_inode_list; /* - * Protects info related to handles - */ - spinlock_t t_handle_lock; - - /* * Longest time some handle had to wait for running transaction */ unsigned long t_max_wait; @@ -761,11 +756,6 @@ struct journal_s unsigned long j_flags; /** - * @j_atomic_flags: Atomic journaling state flags. - */ - unsigned long j_atomic_flags; - - /** * @j_errno: * * Is there an outstanding uncleared error on the journal (from a prior @@ -891,7 +881,7 @@ struct journal_s * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ - struct shrinker j_shrinker; + struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: @@ -1004,6 +994,13 @@ struct journal_s struct block_device *j_fs_dev; /** + * @j_fs_dev_wb_err: + * + * Records the errseq of the client fs's backing block device. + */ + errseq_t j_fs_dev_wb_err; + + /** * @j_total_len: Total maximum capacity of the journal region on disk. */ unsigned int j_total_len; @@ -1379,6 +1376,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) +/* Journal high priority write IO operation flags */ +#define JBD2_JOURNAL_REQ_FLAGS (REQ_META | REQ_SYNC | REQ_IDLE) + /* * Journal flag definitions */ @@ -1402,12 +1402,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) JBD2_JOURNAL_FLUSH_ZEROOUT) /* - * Journal atomic flag definitions - */ -#define JBD2_CHECKPOINT_IO_ERROR 0x001 /* Detect io error while writing - * buffer back to disk */ - -/* * Function declarations for the journaling transaction and buffer * management */ @@ -1700,6 +1694,25 @@ static inline void jbd2_journal_abort_handle(handle_t *handle) handle->h_aborted = 1; } +static inline void jbd2_init_fs_dev_write_error(journal_t *journal) +{ + struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; + + /* + * Save the original wb_err value of client fs's bdev mapping which + * could be used to detect the client fs's metadata async write error. + */ + errseq_check_and_advance(&mapping->wb_err, &journal->j_fs_dev_wb_err); +} + +static inline int jbd2_check_fs_dev_write_error(journal_t *journal) +{ + struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; + + return errseq_check(&mapping->wb_err, + READ_ONCE(journal->j_fs_dev_wb_err)); +} + #endif /* __KERNEL__ */ /* Comparison functions for transaction IDs: perform comparisons using diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 5e13f801c902..d9f1435a5a13 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -72,9 +72,15 @@ extern int register_refined_jiffies(long clock_tick_rate); #endif /* - * The 64-bit value is not atomic - you MUST NOT read it + * The 64-bit value is not atomic on 32-bit systems - you MUST NOT read it * without sampling the sequence number in jiffies_lock. * get_jiffies_64() will do this for you as appropriate. + * + * jiffies and jiffies_64 are at the same address for little-endian systems + * and for 64-bit big-endian systems. + * On 32-bit big-endian systems, jiffies is the lower 32 bits of jiffies_64 + * (i.e., at address @jiffies_64 + 4). + * See arch/ARCH/kernel/vmlinux.lds.S */ extern u64 __cacheline_aligned_in_smp jiffies_64; extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; @@ -82,46 +88,94 @@ extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffi #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); #else +/** + * get_jiffies_64 - read the 64-bit non-atomic jiffies_64 value + * + * When BITS_PER_LONG < 64, this uses sequence number sampling using + * jiffies_lock to protect the 64-bit read. + * + * Return: current 64-bit jiffies value + */ static inline u64 get_jiffies_64(void) { return (u64)jiffies; } #endif -/* - * These inlines deal with timer wrapping correctly. You are - * strongly encouraged to use them - * 1. Because people otherwise forget - * 2. Because if the timer wrap changes in future you won't have to - * alter your driver code. +/** + * DOC: General information about time_* inlines + * + * These inlines deal with timer wrapping correctly. You are strongly encouraged + * to use them: * - * time_after(a,b) returns true if the time a is after time b. + * #. Because people otherwise forget + * #. Because if the timer wrap changes in future you won't have to alter your + * driver code. + */ + +/** + * time_after - returns true if the time a is after time b. + * @a: first comparable as unsigned long + * @b: second comparable as unsigned long * * Do this with "<0" and ">=0" to only test the sign of the result. A * good compiler would generate better code (and a really good compiler * wouldn't care). Gcc is currently neither. + * + * Return: %true is time a is after time b, otherwise %false. */ #define time_after(a,b) \ (typecheck(unsigned long, a) && \ typecheck(unsigned long, b) && \ ((long)((b) - (a)) < 0)) +/** + * time_before - returns true if the time a is before time b. + * @a: first comparable as unsigned long + * @b: second comparable as unsigned long + * + * Return: %true is time a is before time b, otherwise %false. + */ #define time_before(a,b) time_after(b,a) +/** + * time_after_eq - returns true if the time a is after or the same as time b. + * @a: first comparable as unsigned long + * @b: second comparable as unsigned long + * + * Return: %true is time a is after or the same as time b, otherwise %false. + */ #define time_after_eq(a,b) \ (typecheck(unsigned long, a) && \ typecheck(unsigned long, b) && \ ((long)((a) - (b)) >= 0)) +/** + * time_before_eq - returns true if the time a is before or the same as time b. + * @a: first comparable as unsigned long + * @b: second comparable as unsigned long + * + * Return: %true is time a is before or the same as time b, otherwise %false. + */ #define time_before_eq(a,b) time_after_eq(b,a) -/* - * Calculate whether a is in the range of [b, c]. +/** + * time_in_range - Calculate whether a is in the range of [b, c]. + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c], otherwise %false. */ #define time_in_range(a,b,c) \ (time_after_eq(a,b) && \ time_before_eq(a,c)) -/* - * Calculate whether a is in the range of [b, c). +/** + * time_in_range_open - Calculate whether a is in the range of [b, c). + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c), otherwise %false. */ #define time_in_range_open(a,b,c) \ (time_after_eq(a,b) && \ @@ -129,45 +183,138 @@ static inline u64 get_jiffies_64(void) /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of - * get_jiffies_64() */ + * get_jiffies_64()). */ + +/** + * time_after64 - returns true if the time a is after time b. + * @a: first comparable as __u64 + * @b: second comparable as __u64 + * + * This must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64()). + * + * Return: %true is time a is after time b, otherwise %false. + */ #define time_after64(a,b) \ (typecheck(__u64, a) && \ typecheck(__u64, b) && \ ((__s64)((b) - (a)) < 0)) +/** + * time_before64 - returns true if the time a is before time b. + * @a: first comparable as __u64 + * @b: second comparable as __u64 + * + * This must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64()). + * + * Return: %true is time a is before time b, otherwise %false. + */ #define time_before64(a,b) time_after64(b,a) +/** + * time_after_eq64 - returns true if the time a is after or the same as time b. + * @a: first comparable as __u64 + * @b: second comparable as __u64 + * + * This must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64()). + * + * Return: %true is time a is after or the same as time b, otherwise %false. + */ #define time_after_eq64(a,b) \ (typecheck(__u64, a) && \ typecheck(__u64, b) && \ ((__s64)((a) - (b)) >= 0)) +/** + * time_before_eq64 - returns true if the time a is before or the same as time b. + * @a: first comparable as __u64 + * @b: second comparable as __u64 + * + * This must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64()). + * + * Return: %true is time a is before or the same as time b, otherwise %false. + */ #define time_before_eq64(a,b) time_after_eq64(b,a) +/** + * time_in_range64 - Calculate whether a is in the range of [b, c]. + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c], otherwise %false. + */ #define time_in_range64(a, b, c) \ (time_after_eq64(a, b) && \ time_before_eq64(a, c)) /* - * These four macros compare jiffies and 'a' for convenience. + * These eight macros compare jiffies[_64] and 'a' for convenience. */ -/* time_is_before_jiffies(a) return true if a is before jiffies */ +/** + * time_is_before_jiffies - return true if a is before jiffies + * @a: time (unsigned long) to compare to jiffies + * + * Return: %true is time a is before jiffies, otherwise %false. + */ #define time_is_before_jiffies(a) time_after(jiffies, a) +/** + * time_is_before_jiffies64 - return true if a is before jiffies_64 + * @a: time (__u64) to compare to jiffies_64 + * + * Return: %true is time a is before jiffies_64, otherwise %false. + */ #define time_is_before_jiffies64(a) time_after64(get_jiffies_64(), a) -/* time_is_after_jiffies(a) return true if a is after jiffies */ +/** + * time_is_after_jiffies - return true if a is after jiffies + * @a: time (unsigned long) to compare to jiffies + * + * Return: %true is time a is after jiffies, otherwise %false. + */ #define time_is_after_jiffies(a) time_before(jiffies, a) +/** + * time_is_after_jiffies64 - return true if a is after jiffies_64 + * @a: time (__u64) to compare to jiffies_64 + * + * Return: %true is time a is after jiffies_64, otherwise %false. + */ #define time_is_after_jiffies64(a) time_before64(get_jiffies_64(), a) -/* time_is_before_eq_jiffies(a) return true if a is before or equal to jiffies*/ +/** + * time_is_before_eq_jiffies - return true if a is before or equal to jiffies + * @a: time (unsigned long) to compare to jiffies + * + * Return: %true is time a is before or the same as jiffies, otherwise %false. + */ #define time_is_before_eq_jiffies(a) time_after_eq(jiffies, a) +/** + * time_is_before_eq_jiffies64 - return true if a is before or equal to jiffies_64 + * @a: time (__u64) to compare to jiffies_64 + * + * Return: %true is time a is before or the same jiffies_64, otherwise %false. + */ #define time_is_before_eq_jiffies64(a) time_after_eq64(get_jiffies_64(), a) -/* time_is_after_eq_jiffies(a) return true if a is after or equal to jiffies*/ +/** + * time_is_after_eq_jiffies - return true if a is after or equal to jiffies + * @a: time (unsigned long) to compare to jiffies + * + * Return: %true is time a is after or the same as jiffies, otherwise %false. + */ #define time_is_after_eq_jiffies(a) time_before_eq(jiffies, a) +/** + * time_is_after_eq_jiffies64 - return true if a is after or equal to jiffies_64 + * @a: time (__u64) to compare to jiffies_64 + * + * Return: %true is time a is after or the same as jiffies_64, otherwise %false. + */ #define time_is_after_eq_jiffies64(a) time_before_eq64(get_jiffies_64(), a) /* - * Have the 32 bit jiffies value wrap 5 minutes after boot + * Have the 32-bit jiffies value wrap 5 minutes after boot * so jiffies wrap bugs show up earlier. */ #define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) @@ -278,7 +425,7 @@ extern unsigned long preset_lpj; #if BITS_PER_LONG < 64 # define MAX_SEC_IN_JIFFIES \ (long)((u64)((u64)MAX_JIFFY_OFFSET * TICK_NSEC) / NSEC_PER_SEC) -#else /* take care of overflow on 64 bits machines */ +#else /* take care of overflow on 64-bit machines */ # define MAX_SEC_IN_JIFFIES \ (SH_DIV((MAX_JIFFY_OFFSET >> SEC_JIFFIE_SC) * TICK_NSEC, NSEC_PER_SEC, 1) - 1) @@ -290,6 +437,12 @@ extern unsigned long preset_lpj; extern unsigned int jiffies_to_msecs(const unsigned long j); extern unsigned int jiffies_to_usecs(const unsigned long j); +/** + * jiffies_to_nsecs - Convert jiffies to nanoseconds + * @j: jiffies value + * + * Return: nanoseconds value + */ static inline u64 jiffies_to_nsecs(const unsigned long j) { return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; @@ -353,12 +506,14 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) * * msecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the - * code, __msecs_to_jiffies() is called if the value passed does not + * code. __msecs_to_jiffies() is called if the value passed does not * allow constant folding and the actual conversion must be done at * runtime. - * the HZ range specific helpers _msecs_to_jiffies() are called both + * The HZ range specific helpers _msecs_to_jiffies() are called both * directly here and from __msecs_to_jiffies() in the case where * constant folding is not possible. + * + * Return: jiffies value */ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) { @@ -400,12 +555,14 @@ static inline unsigned long _usecs_to_jiffies(const unsigned int u) * * usecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the - * code, __usecs_to_jiffies() is called if the value passed does not + * code. __usecs_to_jiffies() is called if the value passed does not * allow constant folding and the actual conversion must be done at * runtime. - * the HZ range specific helpers _usecs_to_jiffies() are called both + * The HZ range specific helpers _usecs_to_jiffies() are called both * directly here and from __msecs_to_jiffies() in the case where * constant folding is not possible. + * + * Return: jiffies value */ static __always_inline unsigned long usecs_to_jiffies(const unsigned int u) { @@ -422,6 +579,7 @@ extern unsigned long timespec64_to_jiffies(const struct timespec64 *value); extern void jiffies_to_timespec64(const unsigned long jiffies, struct timespec64 *value); extern clock_t jiffies_to_clock_t(unsigned long x); + static inline clock_t jiffies_delta_to_clock_t(long delta) { return jiffies_to_clock_t(max(0L, delta)); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 819b6bc8ac08..70d6a8f6e25d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -4,6 +4,7 @@ #include <linux/bug.h> #include <linux/kasan-enabled.h> +#include <linux/kasan-tags.h> #include <linux/kernel.h> #include <linux/static_key.h> #include <linux/types.h> @@ -54,11 +55,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); +#ifndef kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } +#endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); @@ -127,20 +130,39 @@ static __always_inline void kasan_poison_slab(struct slab *slab) __kasan_poison_slab(slab); } -void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object); -static __always_inline void kasan_unpoison_object_data(struct kmem_cache *cache, +void __kasan_unpoison_new_object(struct kmem_cache *cache, void *object); +/** + * kasan_unpoison_new_object - Temporarily unpoison a new slab object. + * @cache: Cache the object belong to. + * @object: Pointer to the object. + * + * This function is intended for the slab allocator's internal use. It + * temporarily unpoisons an object from a newly allocated slab without doing + * anything else. The object must later be repoisoned by + * kasan_poison_new_object(). + */ +static __always_inline void kasan_unpoison_new_object(struct kmem_cache *cache, void *object) { if (kasan_enabled()) - __kasan_unpoison_object_data(cache, object); + __kasan_unpoison_new_object(cache, object); } -void __kasan_poison_object_data(struct kmem_cache *cache, void *object); -static __always_inline void kasan_poison_object_data(struct kmem_cache *cache, +void __kasan_poison_new_object(struct kmem_cache *cache, void *object); +/** + * kasan_unpoison_new_object - Repoison a new slab object. + * @cache: Cache the object belong to. + * @object: Pointer to the object. + * + * This function is intended for the slab allocator's internal use. It + * repoisons an object that was previously unpoisoned by + * kasan_unpoison_new_object() without doing anything else. + */ +static __always_inline void kasan_poison_new_object(struct kmem_cache *cache, void *object) { if (kasan_enabled()) - __kasan_poison_object_data(cache, object); + __kasan_poison_new_object(cache, object); } void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, @@ -170,13 +192,6 @@ static __always_inline void kasan_kfree_large(void *ptr) __kasan_kfree_large(ptr, _RET_IP_); } -void __kasan_slab_free_mempool(void *ptr, unsigned long ip); -static __always_inline void kasan_slab_free_mempool(void *ptr) -{ - if (kasan_enabled()) - __kasan_slab_free_mempool(ptr, _RET_IP_); -} - void * __must_check __kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags, bool init); static __always_inline void * __must_check kasan_slab_alloc( @@ -217,6 +232,113 @@ static __always_inline void * __must_check kasan_krealloc(const void *object, return (void *)object; } +bool __kasan_mempool_poison_pages(struct page *page, unsigned int order, + unsigned long ip); +/** + * kasan_mempool_poison_pages - Check and poison a mempool page allocation. + * @page: Pointer to the page allocation. + * @order: Order of the allocation. + * + * This function is intended for kernel subsystems that cache page allocations + * to reuse them instead of freeing them back to page_alloc (e.g. mempool). + * + * This function is similar to kasan_mempool_poison_object() but operates on + * page allocations. + * + * Before the poisoned allocation can be reused, it must be unpoisoned via + * kasan_mempool_unpoison_pages(). + * + * Return: true if the allocation can be safely reused; false otherwise. + */ +static __always_inline bool kasan_mempool_poison_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + return __kasan_mempool_poison_pages(page, order, _RET_IP_); + return true; +} + +void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order, + unsigned long ip); +/** + * kasan_mempool_unpoison_pages - Unpoison a mempool page allocation. + * @page: Pointer to the page allocation. + * @order: Order of the allocation. + * + * This function is intended for kernel subsystems that cache page allocations + * to reuse them instead of freeing them back to page_alloc (e.g. mempool). + * + * This function unpoisons a page allocation that was previously poisoned by + * kasan_mempool_poison_pages() without zeroing the allocation's memory. For + * the tag-based modes, this function assigns a new tag to the allocation. + */ +static __always_inline void kasan_mempool_unpoison_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + __kasan_mempool_unpoison_pages(page, order, _RET_IP_); +} + +bool __kasan_mempool_poison_object(void *ptr, unsigned long ip); +/** + * kasan_mempool_poison_object - Check and poison a mempool slab allocation. + * @ptr: Pointer to the slab allocation. + * + * This function is intended for kernel subsystems that cache slab allocations + * to reuse them instead of freeing them back to the slab allocator (e.g. + * mempool). + * + * This function poisons a slab allocation and saves a free stack trace for it + * without initializing the allocation's memory and without putting it into the + * quarantine (for the Generic mode). + * + * This function also performs checks to detect double-free and invalid-free + * bugs and reports them. The caller can use the return value of this function + * to find out if the allocation is buggy. + * + * Before the poisoned allocation can be reused, it must be unpoisoned via + * kasan_mempool_unpoison_object(). + * + * This function operates on all slab allocations including large kmalloc + * allocations (the ones returned by kmalloc_large() or by kmalloc() with the + * size > KMALLOC_MAX_SIZE). + * + * Return: true if the allocation can be safely reused; false otherwise. + */ +static __always_inline bool kasan_mempool_poison_object(void *ptr) +{ + if (kasan_enabled()) + return __kasan_mempool_poison_object(ptr, _RET_IP_); + return true; +} + +void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip); +/** + * kasan_mempool_unpoison_object - Unpoison a mempool slab allocation. + * @ptr: Pointer to the slab allocation. + * @size: Size to be unpoisoned. + * + * This function is intended for kernel subsystems that cache slab allocations + * to reuse them instead of freeing them back to the slab allocator (e.g. + * mempool). + * + * This function unpoisons a slab allocation that was previously poisoned via + * kasan_mempool_poison_object() and saves an alloc stack trace for it without + * initializing the allocation's memory. For the tag-based modes, this function + * does not assign a new tag to the allocation and instead restores the + * original tags based on the pointer value. + * + * This function operates on all slab allocations including large kmalloc + * allocations (the ones returned by kmalloc_large() or by kmalloc() with the + * size > KMALLOC_MAX_SIZE). + */ +static __always_inline void kasan_mempool_unpoison_object(void *ptr, + size_t size) +{ + if (kasan_enabled()) + __kasan_mempool_unpoison_object(ptr, size, _RET_IP_); +} + /* * Unlike kasan_check_read/write(), kasan_check_byte() is performed even for * the hardware tag-based mode that doesn't rely on compiler instrumentation. @@ -240,9 +362,9 @@ static inline bool kasan_unpoison_pages(struct page *page, unsigned int order, return false; } static inline void kasan_poison_slab(struct slab *slab) {} -static inline void kasan_unpoison_object_data(struct kmem_cache *cache, +static inline void kasan_unpoison_new_object(struct kmem_cache *cache, void *object) {} -static inline void kasan_poison_object_data(struct kmem_cache *cache, +static inline void kasan_poison_new_object(struct kmem_cache *cache, void *object) {} static inline void *kasan_init_slab_obj(struct kmem_cache *cache, const void *object) @@ -254,7 +376,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init return false; } static inline void kasan_kfree_large(void *ptr) {} -static inline void kasan_slab_free_mempool(void *ptr) {} static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags, bool init) { @@ -274,6 +395,17 @@ static inline void *kasan_krealloc(const void *object, size_t new_size, { return (void *)object; } +static inline bool kasan_mempool_poison_pages(struct page *page, unsigned int order) +{ + return true; +} +static inline void kasan_mempool_unpoison_pages(struct page *page, unsigned int order) {} +static inline bool kasan_mempool_poison_object(void *ptr) +{ + return true; +} +static inline void kasan_mempool_unpoison_object(void *ptr, size_t size) {} + static inline bool kasan_check_byte(const void *address) { return true; @@ -283,8 +415,10 @@ static inline bool kasan_check_byte(const void *address) #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +static inline void kasan_unpoison_task_stack_below(const void *watermark) {} #endif #ifdef CONFIG_KASAN_GENERIC @@ -295,7 +429,6 @@ struct kasan_cache { }; size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object); -slab_flags_t kasan_never_merge(void); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); @@ -312,11 +445,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache, { return 0; } -/* And thus nothing prevents cache merging. */ -static inline slab_flags_t kasan_never_merge(void) -{ - return 0; -} /* And no cache-related metadata initialization is required. */ static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, @@ -464,10 +592,10 @@ static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -#ifdef CONFIG_KASAN_INLINE +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_non_canonical_hook(unsigned long addr); -#else /* CONFIG_KASAN_INLINE */ +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ static inline void kasan_non_canonical_hook(unsigned long addr) { } -#endif /* CONFIG_KASAN_INLINE */ +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #endif /* LINUX_KASAN_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 0d91e0af0125..be2e8c0a187e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -13,6 +13,7 @@ #include <linux/stdarg.h> #include <linux/align.h> +#include <linux/array_size.h> #include <linux/limits.h> #include <linux/linkage.h> #include <linux/stddef.h> @@ -29,32 +30,21 @@ #include <linux/panic.h> #include <linux/printk.h> #include <linux/build_bug.h> +#include <linux/sprintf.h> #include <linux/static_call_types.h> #include <linux/instruction_pointer.h> +#include <linux/wordpart.h> + #include <asm/byteorder.h> #include <uapi/linux/kernel.h> #define STACK_MAGIC 0xdeadbeef -/** - * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value - * @x: value to repeat - * - * NOTE: @x is not checked for > 0xff; larger values produce odd results. - */ -#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) - /* generic data direction definitions */ #define READ 0 #define WRITE 1 -/** - * ARRAY_SIZE - get the number of elements in array @arr - * @arr: array to be sized - */ -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - #define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) #define u64_to_user_ptr(x) ( \ @@ -64,34 +54,6 @@ } \ ) -/** - * upper_32_bits - return bits 32-63 of a number - * @n: the number we're accessing - * - * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress - * the "right shift count >= width of type" warning when that quantity is - * 32-bits. - */ -#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) - -/** - * lower_32_bits - return bits 0-31 of a number - * @n: the number we're accessing - */ -#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) - -/** - * upper_16_bits - return bits 16-31 of a number - * @n: the number we're accessing - */ -#define upper_16_bits(n) ((u16)((n) >> 16)) - -/** - * lower_16_bits - return bits 0-15 of a number - * @n: the number we're accessing - */ -#define lower_16_bits(n) ((u16)((n) & 0xffff)) - struct completion; struct user; @@ -203,41 +165,6 @@ static inline void might_fault(void) { } void do_exit(long error_code) __noreturn; -extern int num_to_str(char *buf, int size, - unsigned long long num, unsigned int width); - -/* lib/printf utilities */ - -extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...); -extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list); -extern __printf(3, 4) -int snprintf(char *buf, size_t size, const char *fmt, ...); -extern __printf(3, 0) -int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); -extern __printf(3, 4) -int scnprintf(char *buf, size_t size, const char *fmt, ...); -extern __printf(3, 0) -int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); -extern __printf(2, 3) __malloc -char *kasprintf(gfp_t gfp, const char *fmt, ...); -extern __printf(2, 0) __malloc -char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); -extern __printf(2, 0) -const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); - -extern __scanf(2, 3) -int sscanf(const char *, const char *, ...); -extern __scanf(2, 0) -int vsscanf(const char *, const char *, va_list); - -extern int no_hash_pointers_enable(char *str); - -extern int get_option(char **str, int *pint); -extern char *get_options(const char *str, int nints, int *ints); -extern unsigned long long memparse(const char *ptr, char **retptr); -extern bool parse_option_str(const char *str, const char *option); -extern char *next_arg(char *args, char **param, char **val); - extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); @@ -288,6 +215,7 @@ enum ftrace_dump_mode { DUMP_NONE, DUMP_ALL, DUMP_ORIG, + DUMP_PARAM, }; #ifdef CONFIG_TRACING @@ -457,13 +385,6 @@ ftrace_vprintk(const char *fmt, va_list ap) static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ -/* This counts to 12. Any more, it will return 13th argument. */ -#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n -#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) - -#define __CONCAT(a, b) a ## b -#define CONCATENATE(a, b) __CONCAT(a, b) - /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 73f5c120def8..87c79d076d6d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -206,23 +206,25 @@ struct kernfs_node { const void *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ + unsigned short flags; + umode_t mode; + union { struct kernfs_elem_dir dir; struct kernfs_elem_symlink symlink; struct kernfs_elem_attr attr; }; - void *priv; - /* * 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit, * the low 32bits are ino and upper generation. */ u64 id; - unsigned short flags; - umode_t mode; + void *priv; struct kernfs_iattrs *iattr; + + struct rcu_head rcu; }; /* @@ -316,6 +318,7 @@ struct kernfs_ops { struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); + loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence); }; /* @@ -550,6 +553,10 @@ static inline int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { return -ENOSYS; } +static inline __poll_t kernfs_generic_poll(struct kernfs_open_file *of, + struct poll_table_struct *pt) +{ return -ENOSYS; } + static inline void kernfs_notify(struct kernfs_node *kn) { } static inline int kernfs_xattr_get(struct kernfs_node *kn, const char *name, diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 22b5cd24f581..060835bb82d5 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -15,17 +15,14 @@ #if !defined(__ASSEMBLY__) -#include <linux/crash_core.h> +#include <linux/vmcore_info.h> +#include <linux/crash_reserve.h> #include <asm/io.h> #include <linux/range.h> #include <uapi/linux/kexec.h> #include <linux/verification.h> -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; extern note_buf_t __percpu *crash_notes; #ifdef CONFIG_KEXEC_CORE @@ -33,7 +30,9 @@ extern note_buf_t __percpu *crash_notes; #include <linux/compat.h> #include <linux/ioport.h> #include <linux/module.h> +#include <linux/highmem.h> #include <asm/kexec.h> +#include <linux/crash_core.h> /* Verify architecture specific macros are defined */ @@ -230,21 +229,6 @@ static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) } #endif -/* Alignment required for elf header segment */ -#define ELF_CORE_HEADER_ALIGN 4096 - -struct crash_mem { - unsigned int max_nr_ranges; - unsigned int nr_ranges; - struct range ranges[]; -}; - -extern int crash_exclude_mem_range(struct crash_mem *mem, - unsigned long long mstart, - unsigned long long mend); -extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, - void **addr, unsigned long *sz); - #ifndef arch_kexec_apply_relocations_add /* * arch_kexec_apply_relocations_add - apply relocations of type RELA @@ -334,6 +318,10 @@ struct kimage { unsigned int preserve_context : 1; /* If set, we are using file mode kexec syscall */ unsigned int file_mode:1; +#ifdef CONFIG_CRASH_HOTPLUG + /* If set, allow changes to elfcorehdr of kexec_load'd image */ + unsigned int update_elfcorehdr:1; +#endif #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; @@ -360,6 +348,12 @@ struct kimage { struct purgatory_info purgatory_info; #endif +#ifdef CONFIG_CRASH_HOTPLUG + int hp_action; + int elfcorehdr_index; + bool elfcorehdr_updated; +#endif + #ifdef CONFIG_IMA_KEXEC /* Virtual address of IMA measurement buffer for kexec syscall */ void *ima_buffer; @@ -386,13 +380,6 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image, static inline int machine_kexec_post_load(struct kimage *image) { return 0; } #endif -extern void __crash_kexec(struct pt_regs *); -extern void crash_kexec(struct pt_regs *); -int kexec_should_crash(struct task_struct *); -int kexec_crash_loaded(void); -void crash_save_cpu(struct pt_regs *regs, int cpu); -extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); - extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; @@ -404,36 +391,18 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec flags */ #ifndef CONFIG_KEXEC_JUMP -#define KEXEC_FLAGS KEXEC_ON_CRASH +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_UPDATE_ELFCOREHDR) #else -#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_UPDATE_ELFCOREHDR) #endif /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ - KEXEC_FILE_NO_INITRAMFS) + KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -int crash_shrink_memory(unsigned long new_size); -ssize_t crash_get_memory_size(void); - -#ifndef arch_kexec_protect_crashkres -/* - * Protection mechanism for crashkernel reserved memory after - * the kdump kernel is loaded. - * - * Provide an empty default implementation here -- architecture - * code may override this - */ -static inline void arch_kexec_protect_crashkres(void) { } -#endif - -#ifndef arch_kexec_unprotect_crashkres -static inline void arch_kexec_unprotect_crashkres(void) { } -#endif - #ifndef page_to_boot_pfn static inline unsigned long page_to_boot_pfn(struct page *page) { @@ -490,6 +459,13 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } #endif +extern bool kexec_file_dbg_print; + +#define kexec_dprintk(fmt, ...) \ + printk("%s" fmt, \ + kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \ + ##__VA_ARGS__) + #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 7d985a1dfe4a..5caf3ce82373 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -73,6 +73,7 @@ struct key_type { unsigned int flags; #define KEY_TYPE_NET_DOMAIN 0x00000001 /* Keys of this type have a net namespace domain */ +#define KEY_TYPE_INSTANT_REAP 0x00000002 /* Keys of this type don't have a delay after expiring */ /* vet a description */ int (*vet_description)(const char *description); diff --git a/include/linux/key.h b/include/linux/key.h index 938d7ecfb495..943a432da3ae 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -515,6 +515,7 @@ extern void key_init(void); #define key_init() do { } while(0) #define key_free_user_ns(ns) do { } while(0) #define key_remove_domain(d) do { } while(0) +#define key_lookup(k) NULL #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 726857a4b680..88100cc9caba 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -59,15 +59,16 @@ static __always_inline bool is_kfence_address(const void *addr) } /** - * kfence_alloc_pool() - allocate the KFENCE pool via memblock + * kfence_alloc_pool_and_metadata() - allocate the KFENCE pool and KFENCE + * metadata via memblock */ -void __init kfence_alloc_pool(void); +void __init kfence_alloc_pool_and_metadata(void); /** * kfence_init() - perform KFENCE initialization at boot time * - * Requires that kfence_alloc_pool() was called before. This sets up the - * allocation gate timer, and requires that workqueues are available. + * Requires that kfence_alloc_pool_and_metadata() was called before. This sets + * up the allocation gate timer, and requires that workqueues are available. */ void __init kfence_init(void); @@ -222,8 +223,10 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla #else /* CONFIG_KFENCE */ +#define kfence_sample_interval (0) + static inline bool is_kfence_address(const void *addr) { return false; } -static inline void kfence_alloc_pool(void) { } +static inline void kfence_alloc_pool_and_metadata(void) { } static inline void kfence_init(void) { } static inline void kfence_shutdown_cache(struct kmem_cache *s) { } static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; } diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h index 8bfa6c98176d..929287981afe 100644 --- a/include/linux/kmsan_types.h +++ b/include/linux/kmsan_types.h @@ -9,6 +9,8 @@ #ifndef _LINUX_KMSAN_TYPES_H #define _LINUX_KMSAN_TYPES_H +#include <linux/types.h> + /* These constants are defined in the MSan LLVM instrumentation pass. */ #define KMSAN_RETVAL_SIZE 800 #define KMSAN_PARAM_SIZE 800 diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c392c811d9ad..c8219505a79f 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -38,7 +38,7 @@ extern char uevent_helper[]; #endif /* counter to tag the uevent, read only except for the kobject core */ -extern u64 uevent_seqnum; +extern atomic64_t uevent_seqnum; /* * The actions here must match the index to the string array @@ -69,14 +69,16 @@ struct kobject { const struct kobj_type *ktype; struct kernfs_node *sd; /* sysfs directory entry */ struct kref kref; -#ifdef CONFIG_DEBUG_KOBJECT_RELEASE - struct delayed_work release; -#endif + unsigned int state_initialized:1; unsigned int state_in_sysfs:1; unsigned int state_add_uevent_sent:1; unsigned int state_remove_uevent_sent:1; unsigned int uevent_suppress:1; + +#ifdef CONFIG_DEBUG_KOBJECT_RELEASE + struct delayed_work release; +#endif }; __printf(2, 3) int kobject_set_name(struct kobject *kobj, const char *name, ...); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 85a64cb95d75..0ff44d6633e3 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -26,8 +26,7 @@ #include <linux/rcupdate.h> #include <linux/mutex.h> #include <linux/ftrace.h> -#include <linux/refcount.h> -#include <linux/freelist.h> +#include <linux/objpool.h> #include <linux/rethook.h> #include <asm/kprobes.h> @@ -140,8 +139,8 @@ static inline bool kprobe_ftrace(struct kprobe *p) * */ struct kretprobe_holder { - struct kretprobe *rp; - refcount_t ref; + struct kretprobe __rcu *rp; + struct objpool_head pool; }; struct kretprobe { @@ -154,7 +153,6 @@ struct kretprobe { #ifdef CONFIG_KRETPROBE_ON_RETHOOK struct rethook *rh; #else - struct freelist_head freelist; struct kretprobe_holder *rph; #endif }; @@ -165,10 +163,7 @@ struct kretprobe_instance { #ifdef CONFIG_KRETPROBE_ON_RETHOOK struct rethook_node node; #else - union { - struct freelist_node freelist; - struct rcu_head rcu; - }; + struct rcu_head rcu; struct llist_node llist; struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; @@ -202,10 +197,8 @@ extern int arch_trampoline_kprobe(struct kprobe *p); #ifdef CONFIG_KRETPROBE_ON_RETHOOK static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return (struct kretprobe *)READ_ONCE(ri->node.rethook->data); + /* rethook::data is non-changed field, so that you can access it freely. */ + return (struct kretprobe *)ri->node.rethook->data; } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) { @@ -250,10 +243,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return READ_ONCE(ri->rph->rp); + return rcu_dereference_check(ri->rph->rp, rcu_read_lock_any_held()); } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) @@ -450,6 +440,10 @@ int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); + +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + #else /* !CONFIG_KPROBES: */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 899a314bc487..401348e9f92b 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -26,6 +26,22 @@ int ksm_disable(struct mm_struct *mm); int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); +/* + * To identify zeropages that were mapped by KSM, we reuse the dirty bit + * in the PTE. If the PTE is dirty, the zeropage was mapped by KSM when + * deduplicating memory. + */ +#define is_ksm_zero_pte(pte) (is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte)) + +extern unsigned long ksm_zero_pages; + +static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) +{ + if (is_ksm_zero_pte(pte)) { + ksm_zero_pages--; + mm->ksm_zero_pages--; + } +} static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { @@ -60,8 +76,8 @@ static inline void ksm_exit(struct mm_struct *mm) * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ -struct page *ksm_might_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address); +struct folio *ksm_might_need_to_copy(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr); void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); @@ -95,6 +111,10 @@ static inline void ksm_exit(struct mm_struct *mm) { } +static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) +{ +} + #ifdef CONFIG_MEMORY_FAILURE static inline void collect_procs_ksm(struct page *page, struct list_head *to_kill, int force_early) @@ -109,10 +129,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } -static inline struct page *ksm_might_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address) +static inline struct folio *ksm_might_need_to_copy(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr) { - return page; + return folio; } static inline void rmap_walk_ksm(struct folio *folio, diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h index 529974e22ea7..7fcf29a4e0de 100644 --- a/include/linux/kstrtox.h +++ b/include/linux/kstrtox.h @@ -147,9 +147,4 @@ extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); -static inline int strtobool(const char *s, bool *res) -{ - return kstrtobool(s, res); -} - #endif /* _LINUX_KSTRTOX_H */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index f1f95a71a4bc..b11f53c1ba2e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -86,9 +86,9 @@ void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); +int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); -bool __kthread_should_park(struct task_struct *k); bool kthread_should_stop_or_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_func(struct task_struct *k); diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 73f20deb497d..3a4e723eae0f 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -21,12 +21,10 @@ #ifndef _LINUX_KTIME_H #define _LINUX_KTIME_H -#include <linux/time.h> -#include <linux/jiffies.h> #include <asm/bug.h> - -/* Nanosecond scalar representation for kernel time values */ -typedef s64 ktime_t; +#include <linux/jiffies.h> +#include <linux/time.h> +#include <linux/types.h> /** * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..48f31dcd318a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -80,8 +80,8 @@ /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 -#ifndef KVM_ADDRESS_SPACE_NUM -#define KVM_ADDRESS_SPACE_NUM 1 +#ifndef KVM_MAX_NR_ADDRESS_SPACES +#define KVM_MAX_NR_ADDRESS_SPACES 1 #endif /* @@ -148,6 +148,11 @@ static inline bool kvm_is_error_hva(unsigned long addr) #endif +static inline bool kvm_is_error_gpa(gpa_t gpa) +{ + return gpa == INVALID_GPA; +} + #define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) static inline bool is_error_page(struct page *page) @@ -190,8 +195,6 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, struct kvm_vcpu *except); -bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, - unsigned long *vcpu_bitmap); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -240,7 +243,6 @@ struct kvm_async_pf { struct list_head link; struct list_head queue; struct kvm_vcpu *vcpu; - struct mm_struct *mm; gpa_t cr2_or_gpa; unsigned long addr; struct kvm_arch_async_pf arch; @@ -255,12 +257,17 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -#ifdef KVM_ARCH_WANT_MMU_NOTIFIER +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER +union kvm_mmu_notifier_arg { + pte_t pte; + unsigned long attributes; +}; + struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; - pte_t pte; + union kvm_mmu_notifier_arg arg; bool may_block; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); @@ -586,8 +593,20 @@ struct kvm_memory_slot { u32 flags; short id; u16 as_id; + +#ifdef CONFIG_KVM_PRIVATE_MEM + struct { + struct file __rcu *file; + pgoff_t pgoff; + } gmem; +#endif }; +static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot) +{ + return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); +} + static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot) { return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; @@ -662,7 +681,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[]; + struct hlist_head map[] __counted_by(nr_rt_entries); }; #endif @@ -675,13 +694,29 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); #define KVM_MEM_SLOTS_NUM SHRT_MAX #define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) -#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +#if KVM_MAX_NR_ADDRESS_SPACES == 1 +static inline int kvm_arch_nr_memslot_as_ids(struct kvm *kvm) +{ + return KVM_MAX_NR_ADDRESS_SPACES; +} + static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) { return 0; } #endif +/* + * Arch code must define kvm_arch_has_private_mem if support for private memory + * is enabled. + */ +#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) +static inline bool kvm_arch_has_private_mem(struct kvm *kvm) +{ + return false; +} +#endif + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; @@ -719,9 +754,9 @@ struct kvm { struct mm_struct *mm; /* userspace tied to this vm */ unsigned long nr_memslot_pages; /* The two memslot sets - active and inactive (per address space) */ - struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2]; + struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2]; /* The current active memslot set for each address space */ - struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; + struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES]; struct xarray vcpu_array; /* * Protected by slots_lock, but can be read outside if an @@ -751,7 +786,7 @@ struct kvm { struct list_head vm_list; struct mutex lock; struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; -#ifdef CONFIG_HAVE_KVM_EVENTFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP struct { spinlock_t lock; struct list_head items; @@ -759,8 +794,8 @@ struct kvm { struct list_head resampler_list; struct mutex resampler_lock; } irqfds; - struct list_head ioeventfds; #endif + struct list_head ioeventfds; struct kvm_vm_stat stat; struct kvm_arch arch; refcount_t users_count; @@ -776,17 +811,16 @@ struct kvm { * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; -#endif -#ifdef CONFIG_HAVE_KVM_IRQFD + struct hlist_head irq_ack_notifier_list; #endif -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; - unsigned long mmu_invalidate_range_start; - unsigned long mmu_invalidate_range_end; + gfn_t mmu_invalidate_range_start; + gfn_t mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; @@ -805,6 +839,10 @@ struct kvm { #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER struct notifier_block pm_notifier; #endif +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + /* Protected by slots_locks (for writes) and RCU (for reads) */ + struct xarray mem_attr_array; +#endif char stats_id[KVM_STATS_NAME_SIZE]; }; @@ -865,6 +903,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm) unlikely(__ret); \ }) +/* + * Note, "data corruption" refers to corruption of host kernel data structures, + * not guest data. Guest data corruption, suspected or confirmed, that is tied + * and contained to a single VM should *never* BUG() and potentially panic the + * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure + * is corrupted and that corruption can have a cascading effect to other parts + * of the hosts and/or to other VMs. + */ +#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \ +({ \ + bool __ret = !!(cond); \ + \ + if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \ + BUG_ON(__ret); \ + else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ + kvm_vm_bugged(kvm); \ + unlikely(__ret); \ +}) + static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU @@ -944,7 +1001,7 @@ static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) } #endif -#ifdef CONFIG_HAVE_KVM_IRQFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else @@ -968,7 +1025,7 @@ void kvm_put_kvm_no_destroy(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { - as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM); + as_id = array_index_nospec(as_id, KVM_MAX_NR_ADDRESS_SPACES); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); @@ -1125,9 +1182,9 @@ enum kvm_mr_change { }; int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region2 *mem); int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -1266,21 +1323,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * * @gpc: struct gfn_to_pfn_cache object. * @kvm: pointer to kvm instance. - * @vcpu: vCPU to be used for marking pages dirty and to be woken on - * invalidation. - * @usage: indicates if the resulting host physical PFN is used while - * the @vcpu is IN_GUEST_MODE (in which case invalidation of - * the cache from MMU notifiers---but not for KVM memslot - * changes!---will also force @vcpu to exit the guest and - * refresh the cache); and/or if the PFN used directly - * by KVM (and thus needs a kernel virtual mapping). * * This sets up a gfn_to_pfn_cache by initializing locks and assigning the * immutable attributes. Note, the cache must be zero-allocated (or zeroed by * the caller before init). */ -void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, - struct kvm_vcpu *vcpu, enum pfn_cache_usage usage); +void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm); /** * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest @@ -1301,6 +1349,22 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** + * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA. + * + * @gpc: struct gfn_to_pfn_cache object. + * @hva: userspace virtual address to map. + * @len: sanity check; the range being access must fit a single page. + * + * @return: 0 for success. + * -EINVAL for a mapping which would cross a page boundary. + * -EFAULT for an untranslatable guest physical address. + * + * The semantics of this function are the same as those of kvm_gpc_activate(). It + * merely bypasses a layer of address translation. + */ +int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len); + +/** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. @@ -1346,6 +1410,16 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len); */ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc); +static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc) +{ + return gpc->active && !kvm_is_error_gpa(gpc->gpa); +} + +static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc) +{ + return gpc->active && kvm_is_error_gpa(gpc->gpa); +} + void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); @@ -1359,6 +1433,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); +void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); +void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); @@ -1368,10 +1445,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif -void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, - unsigned long end); -void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, - unsigned long end); +void kvm_mmu_invalidate_begin(struct kvm *kvm); +void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end); +void kvm_mmu_invalidate_end(struct kvm *kvm); +bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -1387,10 +1464,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, unsigned long mask); void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot); -#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty, struct kvm_memory_slot **memslot); @@ -1452,9 +1526,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); +bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); -int kvm_arch_create_vm_debugfs(struct kvm *kvm); +void kvm_arch_create_vm_debugfs(struct kvm *kvm); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* @@ -1479,11 +1554,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } +#else +int kvm_arch_flush_remote_tlbs(struct kvm *kvm); +#endif + +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE +static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, + gfn_t gfn, u64 nr_pages) +{ + return -EOPNOTSUPP; +} +#else +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); #endif #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA @@ -1723,11 +1810,21 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } -static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) +static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); - return kvm_is_error_hva(hva); + return !kvm_is_error_hva(hva); +} + +static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc) +{ + lockdep_assert_held(&gpc->lock); + + if (!gpc->memslot) + return; + + mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa)); } enum kvm_stat_kind { @@ -1914,7 +2011,7 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) @@ -1937,9 +2034,9 @@ static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) return 0; } -static inline int mmu_invalidate_retry_hva(struct kvm *kvm, +static inline int mmu_invalidate_retry_gfn(struct kvm *kvm, unsigned long mmu_seq, - unsigned long hva) + gfn_t gfn) { lockdep_assert_held(&kvm->mmu_lock); /* @@ -1948,14 +2045,50 @@ static inline int mmu_invalidate_retry_hva(struct kvm *kvm, * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ - if (unlikely(kvm->mmu_invalidate_in_progress) && - hva >= kvm->mmu_invalidate_range_start && - hva < kvm->mmu_invalidate_range_end) - return 1; + if (unlikely(kvm->mmu_invalidate_in_progress)) { + /* + * Dropping mmu_lock after bumping mmu_invalidate_in_progress + * but before updating the range is a KVM bug. + */ + if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA || + kvm->mmu_invalidate_range_end == INVALID_GPA)) + return 1; + + if (gfn >= kvm->mmu_invalidate_range_start && + gfn < kvm->mmu_invalidate_range_end) + return 1; + } + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } + +/* + * This lockless version of the range-based retry check *must* be paired with a + * call to the locked version after acquiring mmu_lock, i.e. this is safe to + * use only as a pre-check to avoid contending mmu_lock. This version *will* + * get false negatives and false positives. + */ +static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, + unsigned long mmu_seq, + gfn_t gfn) +{ + /* + * Use READ_ONCE() to ensure the in-progress flag and sequence counter + * are always read from memory, e.g. so that checking for retry in a + * loop won't result in an infinite retry loop. Don't force loads for + * start+end, as the key to avoiding infinite retry loops is observing + * the 1=>0 transition of in-progress, i.e. getting false negatives + * due to stale start+end values is acceptable. + */ + if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) && + gfn >= kvm->mmu_invalidate_range_start && + gfn < kvm->mmu_invalidate_range_end) + return true; + + return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; +} #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -1980,12 +2113,10 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); -#ifdef CONFIG_HAVE_KVM_EVENTFD - void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); -#ifdef CONFIG_HAVE_KVM_IRQFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); bool kvm_notify_irqfd_resampler(struct kvm *kvm, @@ -2006,31 +2137,7 @@ static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, { return false; } -#endif - -#else - -static inline void kvm_eventfd_init(struct kvm *kvm) {} - -static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) -{ - return -EINVAL; -} - -static inline void kvm_irqfd_release(struct kvm *kvm) {} - -#ifdef CONFIG_HAVE_KVM_IRQCHIP -static inline void kvm_irq_routing_update(struct kvm *kvm) -{ -} -#endif - -static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) -{ - return -ENOSYS; -} - -#endif /* CONFIG_HAVE_KVM_EVENTFD */ +#endif /* CONFIG_HAVE_KVM_IRQCHIP */ void kvm_arch_irq_routing_update(struct kvm *kvm); @@ -2148,8 +2255,6 @@ struct kvm_device_ops { int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; -void kvm_device_get(struct kvm_device *dev); -void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); @@ -2287,4 +2392,57 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr) /* Max number of entries allowed for each kvm dirty ring */ #define KVM_DIRTY_RING_MAX_ENTRIES 65536 +static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, + gpa_t gpa, gpa_t size, + bool is_write, bool is_exec, + bool is_private) +{ + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; + vcpu->run->memory_fault.gpa = gpa; + vcpu->run->memory_fault.size = size; + + /* RWX flags are not (yet) defined or communicated to userspace. */ + vcpu->run->memory_fault.flags = 0; + if (is_private) + vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; +} + +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) +{ + return xa_to_value(xa_load(&kvm->mem_attr_array, gfn)); +} + +bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, + unsigned long attrs); +bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); +bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); + +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) +{ + return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && + kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; +} +#else +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) +{ + return false; +} +#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ + +#ifdef CONFIG_KVM_PRIVATE_MEM +int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, int *max_order); +#else +static inline int kvm_gmem_get_pfn(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + kvm_pfn_t *pfn, int *max_order) +{ + KVM_BUG_ON(1, kvm); + return -EIO; +} +#endif /* CONFIG_KVM_PRIVATE_MEM */ + #endif diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 6f4737d5046a..d93f6522b2c3 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -6,6 +6,7 @@ struct kvm; struct kvm_async_pf; struct kvm_device_ops; +struct kvm_gfn_range; struct kvm_interrupt; struct kvm_irq_routing_table; struct kvm_memory_slot; @@ -48,12 +49,6 @@ typedef u64 hfn_t; typedef hfn_t kvm_pfn_t; -enum pfn_cache_usage { - KVM_GUEST_USES_PFN = BIT(0), - KVM_HOST_USES_PFN = BIT(1), - KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN, -}; - struct gfn_to_hva_cache { u64 generation; gpa_t gpa; @@ -68,13 +63,11 @@ struct gfn_to_pfn_cache { unsigned long uhva; struct kvm_memory_slot *memslot; struct kvm *kvm; - struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; struct mutex refresh_lock; void *khva; kvm_pfn_t pfn; - enum pfn_cache_usage usage; bool active; bool valid; }; diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 612b4cab3819..36df927ec4b7 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -85,7 +85,6 @@ static inline struct led_classdev_flash *lcdev_to_flcdev( return container_of(lcdev, struct led_classdev_flash, led_cdev); } -#if IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) /** * led_classdev_flash_register_ext - register a new object of LED class with * init data and with support for flash LEDs @@ -116,29 +115,6 @@ int devm_led_classdev_flash_register_ext(struct device *parent, void devm_led_classdev_flash_unregister(struct device *parent, struct led_classdev_flash *fled_cdev); -#else - -static inline int led_classdev_flash_register_ext(struct device *parent, - struct led_classdev_flash *fled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev) {}; -static inline int devm_led_classdev_flash_register_ext(struct device *parent, - struct led_classdev_flash *fled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void devm_led_classdev_flash_unregister(struct device *parent, - struct led_classdev_flash *fled_cdev) -{}; - -#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) */ - static inline int led_classdev_flash_register(struct device *parent, struct led_classdev_flash *fled_cdev) { diff --git a/include/linux/led-class-multicolor.h b/include/linux/led-class-multicolor.h index 210d57bcd767..db9f34c6736e 100644 --- a/include/linux/led-class-multicolor.h +++ b/include/linux/led-class-multicolor.h @@ -30,7 +30,6 @@ static inline struct led_classdev_mc *lcdev_to_mccdev( return container_of(led_cdev, struct led_classdev_mc, led_cdev); } -#if IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) /** * led_classdev_multicolor_register_ext - register a new object of led_classdev * class with support for multicolor LEDs @@ -64,34 +63,6 @@ int devm_led_classdev_multicolor_register_ext(struct device *parent, void devm_led_classdev_multicolor_unregister(struct device *parent, struct led_classdev_mc *mcled_cdev); -#else - -static inline int led_classdev_multicolor_register_ext(struct device *parent, - struct led_classdev_mc *mcled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void led_classdev_multicolor_unregister(struct led_classdev_mc *mcled_cdev) {}; -static inline int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev, - enum led_brightness brightness) -{ - return 0; -} - -static inline int devm_led_classdev_multicolor_register_ext(struct device *parent, - struct led_classdev_mc *mcled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void devm_led_classdev_multicolor_unregister(struct device *parent, - struct led_classdev_mc *mcled_cdev) -{}; - -#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */ static inline int led_classdev_multicolor_register(struct device *parent, struct led_classdev_mc *mcled_cdev) diff --git a/include/linux/leds-expresswire.h b/include/linux/leds-expresswire.h new file mode 100644 index 000000000000..a422921f4159 --- /dev/null +++ b/include/linux/leds-expresswire.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Shared library for Kinetic's ExpressWire protocol. + * This protocol works by pulsing the ExpressWire IC's control GPIO. + * ktd2692 and ktd2801 are known to use this protocol. + */ + +#ifndef _LEDS_EXPRESSWIRE_H +#define _LEDS_EXPRESSWIRE_H + +#include <linux/types.h> + +struct gpio_desc; + +struct expresswire_timing { + unsigned long poweroff_us; + unsigned long detect_delay_us; + unsigned long detect_us; + unsigned long data_start_us; + unsigned long end_of_data_low_us; + unsigned long end_of_data_high_us; + unsigned long short_bitset_us; + unsigned long long_bitset_us; +}; + +struct expresswire_common_props { + struct gpio_desc *ctrl_gpio; + struct expresswire_timing timing; +}; + +void expresswire_power_off(struct expresswire_common_props *props); +void expresswire_enable(struct expresswire_common_props *props); +void expresswire_start(struct expresswire_common_props *props); +void expresswire_end(struct expresswire_common_props *props); +void expresswire_set_bit(struct expresswire_common_props *props, bool bit); +void expresswire_write_u8(struct expresswire_common_props *props, u8 val); + +#endif /* _LEDS_EXPRESSWIRE_H */ diff --git a/include/linux/leds.h b/include/linux/leds.h index 7d428100b42b..db6b114bb3d9 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -82,15 +82,7 @@ struct led_init_data { bool devname_mandatory; }; -#if IS_ENABLED(CONFIG_NEW_LEDS) enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode); -#else -static inline enum led_default_state -led_init_default_state_get(struct fwnode_handle *fwnode) -{ - return LEDS_DEFSTATE_OFF; -} -#endif struct led_hw_trigger_type { int dummy; @@ -100,6 +92,7 @@ struct led_classdev { const char *name; unsigned int brightness; unsigned int max_brightness; + unsigned int color; int flags; /* Lower 16 bits reflect status */ @@ -278,20 +271,9 @@ static inline int led_classdev_register(struct device *parent, return led_classdev_register_ext(parent, led_cdev, NULL); } -#if IS_ENABLED(CONFIG_LEDS_CLASS) int devm_led_classdev_register_ext(struct device *parent, struct led_classdev *led_cdev, struct led_init_data *init_data); -#else -static inline int -devm_led_classdev_register_ext(struct device *parent, - struct led_classdev *led_cdev, - struct led_init_data *init_data) -{ - return 0; -} -#endif - static inline int devm_led_classdev_register(struct device *parent, struct led_classdev *led_cdev) { @@ -313,6 +295,8 @@ extern struct led_classdev *of_led_get(struct device_node *np, int index); extern void led_put(struct led_classdev *led_cdev); struct led_classdev *__must_check devm_of_led_get(struct device *dev, int index); +struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev, + int index); /** * led_blink_set - set blinking with software fallback @@ -524,23 +508,6 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) return led_cdev->trigger_data; } -/** - * led_trigger_rename_static - rename a trigger - * @name: the new trigger name - * @trig: the LED trigger to rename - * - * Change a LED trigger name by copying the string passed in - * name into current trigger name, which MUST be large - * enough for the new string. - * - * Note that name must NOT point to the same string used - * during LED registration, as that could lead to races. - * - * This is meant to be used on triggers with statically - * allocated name. - */ -void led_trigger_rename_static(const char *name, struct led_trigger *trig); - #define module_led_trigger(__led_trigger) \ module_driver(__led_trigger, led_trigger_register, \ led_trigger_unregister) @@ -585,6 +552,9 @@ enum led_trigger_netdev_modes { TRIGGER_NETDEV_LINK_10, TRIGGER_NETDEV_LINK_100, TRIGGER_NETDEV_LINK_1000, + TRIGGER_NETDEV_LINK_2500, + TRIGGER_NETDEV_LINK_5000, + TRIGGER_NETDEV_LINK_10000, TRIGGER_NETDEV_HALF_DUPLEX, TRIGGER_NETDEV_FULL_DUPLEX, TRIGGER_NETDEV_TX, @@ -669,7 +639,7 @@ struct gpio_led_platform_data { gpio_blink_set_t gpio_blink_set; }; -#ifdef CONFIG_NEW_LEDS +#ifdef CONFIG_LEDS_GPIO_REGISTER struct platform_device *gpio_led_register_device( int id, const struct gpio_led_platform_data *pdata); #else diff --git a/include/linux/libata.h b/include/linux/libata.h index 820f7a3a2749..324d792e7c78 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -107,6 +107,7 @@ enum { ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ @@ -192,6 +193,7 @@ enum { ATA_PFLAG_UNLOADING = (1 << 9), /* driver is being unloaded */ ATA_PFLAG_UNLOADED = (1 << 10), /* driver is unloaded */ + ATA_PFLAG_RESUMING = (1 << 16), /* port is being resumed */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */ @@ -222,6 +224,10 @@ enum { ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ + ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */ + ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */ + ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */ + /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ @@ -255,7 +261,7 @@ enum { * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 5000, + ATA_TMOUT_PMP_SRST_WAIT = 10000, /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might * be a spurious PHY event, so ignore the first PHY event that @@ -314,9 +320,10 @@ enum { ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */ + ATA_EH_SET_ACTIVE = (1 << 7), /* Set a device to active power mode */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK | - ATA_EH_GET_SUCCESS_SENSE, + ATA_EH_GET_SUCCESS_SENSE | ATA_EH_SET_ACTIVE, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK, @@ -344,7 +351,6 @@ enum { ATA_LINK_RESUME_TRIES = 5, /* how hard are we gonna try to probe/recover devices */ - ATA_PROBE_MAX_TRIES = 3, ATA_EH_DEV_TRIES = 3, ATA_EH_PMP_TRIES = 5, ATA_EH_PMP_LINK_TRIES = 3, @@ -354,7 +360,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ @@ -466,7 +472,7 @@ enum ata_completion_errors { /* * Link power management policy: If you alter this, you also need to - * alter libata-scsi.c (for the ascii descriptions) + * alter libata-sata.c (for the ascii descriptions) */ enum ata_lpm_policy { ATA_LPM_UNKNOWN, @@ -651,7 +657,7 @@ struct ata_cpr { struct ata_cpr_log { u8 nr_cpr; - struct ata_cpr cpr[]; + struct ata_cpr cpr[] __counted_by(nr_cpr); }; struct ata_device { @@ -977,12 +983,6 @@ struct ata_port_operations { ssize_t size); /* - * Obsolete - */ - void (*phy_reset)(struct ata_port *ap); - void (*eng_timeout)(struct ata_port *ap); - - /* * ->inherits must be the last field and all the preceding * fields must be pointers. */ @@ -1116,7 +1116,7 @@ static inline void ata_sas_port_resume(struct ata_port *ap) extern int ata_ratelimit(void); extern void ata_msleep(struct ata_port *ap, unsigned int msecs); extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, - u32 val, unsigned long interval, unsigned long timeout); + u32 val, unsigned int interval, unsigned int timeout); extern int atapi_cmd_type(u8 opcode); extern unsigned int ata_pack_xfermask(unsigned int pio_mask, unsigned int mwdma_mask, @@ -1151,6 +1151,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); +extern int ata_scsi_slave_alloc(struct scsi_device *sdev); extern int ata_scsi_slave_config(struct scsi_device *sdev); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, @@ -1166,11 +1167,11 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port * * SATA specific code - drivers/ata/libata-sata.c */ #ifdef CONFIG_SATA_HOST -extern const unsigned long sata_deb_timing_normal[]; -extern const unsigned long sata_deb_timing_hotplug[]; -extern const unsigned long sata_deb_timing_long[]; +extern const unsigned int sata_deb_timing_normal[]; +extern const unsigned int sata_deb_timing_hotplug[]; +extern const unsigned int sata_deb_timing_long[]; -static inline const unsigned long * +static inline const unsigned int * sata_ehc_deb_timing(struct ata_eh_context *ehc) { if (ehc->i.flags & ATA_EHI_HOTPLUGGED) @@ -1185,14 +1186,14 @@ extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); extern int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, unsigned long deadline, + const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); -extern int sata_link_resume(struct ata_link *link, const unsigned long *params, +extern int sata_link_resume(struct ata_link *link, const unsigned int *params, unsigned long deadline); extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else -static inline const unsigned long * +static inline const unsigned int * sata_ehc_deb_timing(struct ata_eh_context *ehc) { return NULL; @@ -1212,7 +1213,7 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } static inline int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, + const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)) @@ -1222,7 +1223,7 @@ static inline int sata_link_hardreset(struct ata_link *link, return -EOPNOTSUPP; } static inline int sata_link_resume(struct ata_link *link, - const unsigned long *params, + const unsigned int *params, unsigned long deadline) { return -EOPNOTSUPP; @@ -1234,20 +1235,15 @@ static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link) static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, - const unsigned long *params, unsigned long deadline); + const unsigned int *params, unsigned long deadline); extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -extern void ata_sas_port_destroy(struct ata_port *); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); -extern void ata_sas_async_probe(struct ata_port *ap); -extern int ata_sas_sync_probe(struct ata_port *ap); -extern int ata_sas_port_init(struct ata_port *); -extern int ata_sas_port_start(struct ata_port *ap); +extern void ata_port_probe(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -extern void ata_sas_port_stop(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, @@ -1404,6 +1400,7 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ + .slave_alloc = ata_scsi_slave_alloc, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ @@ -1565,6 +1562,11 @@ void ata_port_desc(struct ata_port *ap, const char *fmt, ...); extern void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, const char *name); #endif +static inline void ata_port_desc_misc(struct ata_port *ap, int irq) +{ + ata_port_desc(ap, "irq %d", irq); + ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy); +} static inline bool ata_tag_internal(unsigned int tag) { @@ -1785,7 +1787,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap, { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - if (unlikely(!qc) || !ap->ops->error_handler) + if (unlikely(!qc)) return qc; if ((qc->flags & (ATA_QCFLAG_ACTIVE | @@ -1876,7 +1878,7 @@ static inline int ata_check_ready(u8 status) } static inline unsigned long ata_deadline(unsigned long from_jiffies, - unsigned long timeout_msecs) + unsigned int timeout_msecs) { return from_jiffies + msecs_to_jiffies(timeout_msecs); } @@ -1885,23 +1887,21 @@ static inline unsigned long ata_deadline(unsigned long from_jiffies, change in future hardware and specs, secondly 0xFF means 'no DMA' but is > UDMA_0. Dyma ddreigiau */ -static inline int ata_using_mwdma(struct ata_device *adev) +static inline bool ata_using_mwdma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_MW_DMA_0 && adev->dma_mode <= XFER_MW_DMA_4) - return 1; - return 0; + return adev->dma_mode >= XFER_MW_DMA_0 && + adev->dma_mode <= XFER_MW_DMA_4; } -static inline int ata_using_udma(struct ata_device *adev) +static inline bool ata_using_udma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_UDMA_0 && adev->dma_mode <= XFER_UDMA_7) - return 1; - return 0; + return adev->dma_mode >= XFER_UDMA_0 && + adev->dma_mode <= XFER_UDMA_7; } -static inline int ata_dma_enabled(struct ata_device *adev) +static inline bool ata_dma_enabled(struct ata_device *adev) { - return (adev->dma_mode == 0xFF ? 0 : 1); + return adev->dma_mode != 0xFF; } /************************************************************************** diff --git a/include/linux/limits.h b/include/linux/limits.h index f6bcc9369010..38eb7f6f7e88 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -10,6 +10,8 @@ #define SSIZE_MAX ((ssize_t)(SIZE_MAX >> 1)) #define PHYS_ADDR_MAX (~(phys_addr_t)0) +#define RESOURCE_SIZE_MAX ((resource_size_t)~0) + #define U8_MAX ((u8)~0U) #define S8_MAX ((s8)(U8_MAX >> 1)) #define S8_MIN ((s8)(-S8_MAX - 1)) diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 15e0e0209da4..287f590ed56b 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -10,6 +10,11 @@ static inline void linkmode_zero(unsigned long *dst) bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static inline void linkmode_fill(unsigned long *dst) +{ + bitmap_fill(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + static inline void linkmode_copy(unsigned long *dst, const unsigned long *src) { bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -43,15 +48,6 @@ static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) __set_bit(nr, addr); } -static inline void linkmode_set_bit_array(const int *array, int array_size, - unsigned long *addr) -{ - int i; - - for (i = 0; i < array_size; i++) - linkmode_set_bit(array[i], addr); -} - static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) { __clear_bit(nr, addr); @@ -71,6 +67,15 @@ static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) return test_bit(nr, addr); } +static inline void linkmode_set_bit_array(const int *array, int array_size, + unsigned long *addr) +{ + int i; + + for (i = 0; i < array_size; i++) + linkmode_set_bit(array[i], addr); +} + static inline int linkmode_equal(const unsigned long *src1, const unsigned long *src2) { diff --git a/include/linux/list.h b/include/linux/list.h index f10344dbad4d..5f4b0a39cf46 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -38,11 +38,92 @@ static inline void INIT_LIST_HEAD(struct list_head *list) WRITE_ONCE(list->prev, list); } +#ifdef CONFIG_LIST_HARDENED + #ifdef CONFIG_DEBUG_LIST -extern bool __list_add_valid(struct list_head *new, - struct list_head *prev, - struct list_head *next); -extern bool __list_del_entry_valid(struct list_head *entry); +# define __list_valid_slowpath +#else +# define __list_valid_slowpath __cold __preserve_most +#endif + +/* + * Performs the full set of list corruption checks before __list_add(). + * On list corruption reports a warning, and returns false. + */ +extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); + +/* + * Performs list corruption checks before __list_add(). Returns false if a + * corruption is detected, true otherwise. + * + * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking + * inline to catch non-faulting corruptions, and only if a corruption is + * detected calls the reporting function __list_add_valid_or_report(). + */ +static __always_inline bool __list_add_valid(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + bool ret = true; + + if (!IS_ENABLED(CONFIG_DEBUG_LIST)) { + /* + * With the hardening version, elide checking if next and prev + * are NULL, since the immediate dereference of them below would + * result in a fault if NULL. + * + * With the reduced set of checks, we can afford to inline the + * checks, which also gives the compiler a chance to elide some + * of them completely if they can be proven at compile-time. If + * one of the pre-conditions does not hold, the slow-path will + * show a report which pre-condition failed. + */ + if (likely(next->prev == prev && prev->next == next && new != prev && new != next)) + return true; + ret = false; + } + + ret &= __list_add_valid_or_report(new, prev, next); + return ret; +} + +/* + * Performs the full set of list corruption checks before __list_del_entry(). + * On list corruption reports a warning, and returns false. + */ +extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); + +/* + * Performs list corruption checks before __list_del_entry(). Returns false if a + * corruption is detected, true otherwise. + * + * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking + * inline to catch non-faulting corruptions, and only if a corruption is + * detected calls the reporting function __list_del_entry_valid_or_report(). + */ +static __always_inline bool __list_del_entry_valid(struct list_head *entry) +{ + bool ret = true; + + if (!IS_ENABLED(CONFIG_DEBUG_LIST)) { + struct list_head *prev = entry->prev; + struct list_head *next = entry->next; + + /* + * With the hardening version, elide checking if next and prev + * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate + * dereference of them below would result in a fault. + */ + if (likely(prev->next == entry && next->prev == entry)) + return true; + ret = false; + } + + ret &= __list_del_entry_valid_or_report(entry); + return ret; +} #else static inline bool __list_add_valid(struct list_head *new, struct list_head *prev, @@ -606,6 +687,14 @@ static inline void list_splice_tail_init(struct list_head *list, for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) /** + * list_for_each_reverse - iterate backwards over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_reverse(pos, head) \ + for (pos = (head)->prev; pos != (head); pos = pos->prev) + +/** * list_for_each_rcu - Iterate over a list in an RCU-safe fashion * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. @@ -677,7 +766,7 @@ static inline size_t list_count_nodes(struct list_head *head) * @member: the name of the list_head within the struct. */ #define list_entry_is_head(pos, head, member) \ - (&pos->member == (head)) + list_is_head(&pos->member, (head)) /** * list_for_each_entry - iterate over list of given type @@ -1030,6 +1119,26 @@ static inline void hlist_move_list(struct hlist_head *old, old->first = NULL; } +/** + * hlist_splice_init() - move all entries from one list to another + * @from: hlist_head from which entries will be moved + * @last: last entry on the @from list + * @to: hlist_head to which entries will be moved + * + * @to can be empty, @from must contain at least @last. + */ +static inline void hlist_splice_init(struct hlist_head *from, + struct hlist_node *last, + struct hlist_head *to) +{ + if (to->first) + to->first->pprev = &last->next; + last->next = to->first; + to->first = from->first; + from->first->pprev = &to->first; + from->first = NULL; +} + #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ @@ -1086,4 +1195,19 @@ static inline void hlist_move_list(struct hlist_head *old, pos && ({ n = pos->member.next; 1; }); \ pos = hlist_entry_safe(n, typeof(*pos), member)) +/** + * hlist_count_nodes - count nodes in the hlist + * @head: the head for your hlist. + */ +static inline size_t hlist_count_nodes(struct hlist_head *head) +{ + struct hlist_node *pos; + size_t count = 0; + + hlist_for_each(pos, head) + count++; + + return count; +} + #endif diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index b35968ee9fb5..792b67ceb631 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -24,6 +24,8 @@ enum lru_status { LRU_SKIP, /* item cannot be locked, skip */ LRU_RETRY, /* item not freeable. May drop the lock internally, but has to return locked. */ + LRU_STOP, /* stop lru list walking. May drop the lock + internally, but has to return locked. */ }; struct list_lru_one { @@ -62,8 +64,6 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, #define list_lru_init(lru) \ __list_lru_init((lru), false, NULL, NULL) -#define list_lru_init_key(lru, key) \ - __list_lru_init((lru), false, (key), NULL) #define list_lru_init_memcg(lru, shrinker) \ __list_lru_init((lru), true, NULL, shrinker) @@ -73,8 +73,10 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren /** * list_lru_add: add an element to the lru list's tail - * @list_lru: the lru pointer + * @lru: the lru pointer * @item: the item to be added. + * @nid: the node id of the sublist to add the item to. + * @memcg: the cgroup of the sublist to add the item to. * * If the element is already part of a list, this function returns doing * nothing. Therefore the caller does not need to keep state about whether or @@ -83,24 +85,54 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren * the caller organize itself in a way that elements can be in more than * one type of list, it is up to the caller to fully remove the item from * the previous list (with list_lru_del() for instance) before moving it - * to @list_lru + * to @lru. + * + * Return: true if the list was updated, false otherwise + */ +bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg); + +/** + * list_lru_add_obj: add an element to the lru list's tail + * @lru: the lru pointer + * @item: the item to be added. + * + * This function is similar to list_lru_add(), but the NUMA node and the + * memcg of the sublist is determined by @item list_head. This assumption is + * valid for slab objects LRU such as dentries, inodes, etc. * * Return value: true if the list was updated, false otherwise */ -bool list_lru_add(struct list_lru *lru, struct list_head *item); +bool list_lru_add_obj(struct list_lru *lru, struct list_head *item); /** - * list_lru_del: delete an element to the lru list - * @list_lru: the lru pointer + * list_lru_del: delete an element from the lru list + * @lru: the lru pointer * @item: the item to be deleted. + * @nid: the node id of the sublist to delete the item from. + * @memcg: the cgroup of the sublist to delete the item from. * - * This function works analogously as list_lru_add in terms of list + * This function works analogously as list_lru_add() in terms of list * manipulation. The comments about an element already pertaining to - * a list are also valid for list_lru_del. + * a list are also valid for list_lru_del(). * - * Return value: true if the list was updated, false otherwise + * Return: true if the list was updated, false otherwise */ -bool list_lru_del(struct list_lru *lru, struct list_head *item); +bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg); + +/** + * list_lru_del_obj: delete an element from the lru list + * @lru: the lru pointer + * @item: the item to be deleted. + * + * This function is similar to list_lru_del(), but the NUMA node and the + * memcg of the sublist is determined by @item list_head. This assumption is + * valid for slab objects LRU such as dentries, inodes, etc. + * + * Return value: true if the list was updated, false otherwise. + */ +bool list_lru_del_obj(struct list_lru *lru, struct list_head *item); /** * list_lru_count_one: return the number of objects currently held by @lru @@ -108,9 +140,11 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item); * @nid: the node id to count from. * @memcg: the cgroup to count from. * - * Always return a non-negative number, 0 for empty lists. There is no - * guarantee that the list is not updated while the count is being computed. - * Callers that want such a guarantee need to provide an outer lock. + * There is no guarantee that the list is not updated while the count is being + * computed. Callers that want such a guarantee need to provide an outer lock. + * + * Return: 0 for empty lists, otherwise the number of objects + * currently held by @lru. */ unsigned long list_lru_count_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg); @@ -141,7 +175,7 @@ typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, struct list_lru_one *list, spinlock_t *lock, void *cb_arg); /** - * list_lru_walk_one: walk a list_lru, isolating and disposing freeable items. + * list_lru_walk_one: walk a @lru, isolating and disposing freeable items. * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. @@ -150,24 +184,24 @@ typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. * - * This function will scan all elements in a particular list_lru, calling the + * This function will scan all elements in a particular @lru, calling the * @isolate callback for each of those items, along with the current list * spinlock and a caller-provided opaque. The @isolate callback can choose to * drop the lock internally, but *must* return with the lock held. The callback - * will return an enum lru_status telling the list_lru infrastructure what to + * will return an enum lru_status telling the @lru infrastructure what to * do with the object being scanned. * - * Please note that nr_to_walk does not mean how many objects will be freed, + * Please note that @nr_to_walk does not mean how many objects will be freed, * just how many objects will be scanned. * - * Return value: the number of objects effectively removed from the LRU. + * Return: the number of objects effectively removed from the LRU. */ unsigned long list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk); /** - * list_lru_walk_one_irq: walk a list_lru, isolating and disposing freeable items. + * list_lru_walk_one_irq: walk a @lru, isolating and disposing freeable items. * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. @@ -176,7 +210,7 @@ unsigned long list_lru_walk_one(struct list_lru *lru, * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. * - * Same as @list_lru_walk_one except that the spinlock is acquired with + * Same as list_lru_walk_one() except that the spinlock is acquired with * spin_lock_irq(). */ unsigned long list_lru_walk_one_irq(struct list_lru *lru, diff --git a/include/linux/llist.h b/include/linux/llist.h index 85bda2d02d65..2c982ff7475a 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -74,6 +74,33 @@ static inline void init_llist_head(struct llist_head *list) } /** + * init_llist_node - initialize lock-less list node + * @node: the node to be initialised + * + * In cases where there is a need to test if a node is on + * a list or not, this initialises the node to clearly + * not be on any list. + */ +static inline void init_llist_node(struct llist_node *node) +{ + node->next = node; +} + +/** + * llist_on_list - test if a lock-list list node is on a list + * @node: the node to test + * + * When a node is on a list the ->next pointer will be NULL or + * some other node. It can never point to itself. We use that + * in init_llist_node() to record that a node is not on any list, + * and here to test whether it is on any list. + */ +static inline bool llist_on_list(const struct llist_node *node) +{ + return node->next != node; +} + +/** * llist_entry - get the struct of this entry * @ptr: the &struct llist_node pointer. * @type: the type of the struct this is embedded in. @@ -249,6 +276,25 @@ static inline struct llist_node *__llist_del_all(struct llist_head *head) extern struct llist_node *llist_del_first(struct llist_head *head); +/** + * llist_del_first_init - delete first entry from lock-list and mark is as being off-list + * @head: the head of lock-less list to delete from. + * + * This behave the same as llist_del_first() except that llist_init_node() is called + * on the returned node so that llist_on_list() will report false for the node. + */ +static inline struct llist_node *llist_del_first_init(struct llist_head *head) +{ + struct llist_node *n = llist_del_first(head); + + if (n) + init_llist_node(n); + return n; +} + +extern bool llist_del_first_this(struct llist_head *head, + struct llist_node *this); + struct llist_node *llist_reverse_order(struct llist_node *head); #endif /* LLIST_H */ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f42594a9efe0..1b95fe31051f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -204,6 +204,8 @@ extern unsigned long nlmsvc_timeout; extern bool nsm_use_hostnames; extern u32 nsm_local_state; +extern struct timer_list nlmsvc_retry; + /* * Lockd client functions */ @@ -280,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *); __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *); -unsigned long nlmsvc_retry_blocked(void); +void nlmsvc_retry_blocked(struct svc_rqst *rqstp); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, nlm_host_match_fn_t match); void nlmsvc_grant_reply(struct nlm_cookie *, __be32); @@ -373,12 +375,12 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) - && fl1->fl_pid == fl2->fl_pid - && fl1->fl_owner == fl2->fl_owner + return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file) + && fl1->c.flc_pid == fl2->c.flc_pid + && fl1->c.flc_owner == fl2->c.flc_owner && fl1->fl_start == fl2->fl_start && fl1->fl_end == fl2->fl_end - &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK); + &&(fl1->c.flc_type == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK); } extern const struct lock_manager_operations nlmsvc_lock_operations; diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index b60fbcd8cdfa..80cca9426761 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -52,7 +52,7 @@ struct nlm_lock { * FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to * 32 bytes. */ - + struct nlm_cookie { unsigned char data[NLM_MAXCOOKIELEN]; diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 310f85903c91..08b0d1d9d78b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -82,63 +82,6 @@ struct lock_chain { u64 chain_key; }; -#define MAX_LOCKDEP_KEYS_BITS 13 -#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) -#define INITIAL_CHAIN_KEY -1 - -struct held_lock { - /* - * One-way hash of the dependency chain up to this point. We - * hash the hashes step by step as the dependency chain grows. - * - * We use it for dependency-caching and we skip detection - * passes and dependency-updates if there is a cache-hit, so - * it is absolutely critical for 100% coverage of the validator - * to have a unique key value for every unique dependency path - * that can occur in the system, to make a unique hash value - * as likely as possible - hence the 64-bit width. - * - * The task struct holds the current hash value (initialized - * with zero), here we store the previous hash value: - */ - u64 prev_chain_key; - unsigned long acquire_ip; - struct lockdep_map *instance; - struct lockdep_map *nest_lock; -#ifdef CONFIG_LOCK_STAT - u64 waittime_stamp; - u64 holdtime_stamp; -#endif - /* - * class_idx is zero-indexed; it points to the element in - * lock_classes this held lock instance belongs to. class_idx is in - * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive. - */ - unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS; - /* - * The lock-stack is unified in that the lock chains of interrupt - * contexts nest ontop of process context chains, but we 'separate' - * the hashes by starting with 0 if we cross into an interrupt - * context, and we also keep do not add cross-context lock - * dependencies - the lock usage graph walking covers that area - * anyway, and we'd just unnecessarily increase the number of - * dependencies otherwise. [Note: hardirq and softirq contexts - * are separated from each other too.] - * - * The following field is used to detect when we cross into an - * interrupt context: - */ - unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */ - unsigned int trylock:1; /* 16 bits */ - - unsigned int read:2; /* see lock_acquire() comment */ - unsigned int check:1; /* see lock_acquire() comment */ - unsigned int hardirqs_off:1; - unsigned int sync:1; - unsigned int references:11; /* 32 bits */ - unsigned int pin_count; -}; - /* * Initialization, self-test and debugging-output methods: */ @@ -625,6 +568,12 @@ do { \ WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \ } while (0) +#define lockdep_assert_no_hardirq() \ +do { \ + WARN_ON_ONCE(__lockdep_enabled && (this_cpu_read(hardirq_context) || \ + !this_cpu_read(hardirqs_enabled))); \ +} while (0) + #define lockdep_assert_preemption_enabled() \ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ @@ -659,6 +608,7 @@ do { \ # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do { } while (0) +# define lockdep_assert_no_hardirq() do { } while (0) # define lockdep_assert_preemption_enabled() do { } while (0) # define lockdep_assert_preemption_disabled() do { } while (0) diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 2ebc323d345a..70d30d40ea4a 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -127,12 +127,12 @@ struct lock_class { unsigned long usage_mask; const struct lock_trace *usage_traces[LOCK_TRACE_STATES]; + const char *name; /* * Generation counter, when doing certain classes of graph walking, * to ensure that we check one node only once: */ int name_version; - const char *name; u8 wait_type_inner; u8 wait_type_outer; @@ -198,6 +198,63 @@ struct lockdep_map { struct pin_cookie { unsigned int val; }; +#define MAX_LOCKDEP_KEYS_BITS 13 +#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) +#define INITIAL_CHAIN_KEY -1 + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + unsigned long acquire_ip; + struct lockdep_map *instance; + struct lockdep_map *nest_lock; +#ifdef CONFIG_LOCK_STAT + u64 waittime_stamp; + u64 holdtime_stamp; +#endif + /* + * class_idx is zero-indexed; it points to the element in + * lock_classes this held lock instance belongs to. class_idx is in + * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive. + */ + unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS; + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */ + unsigned int trylock:1; /* 16 bits */ + + unsigned int read:2; /* see lock_acquire() comment */ + unsigned int check:1; /* see lock_acquire() comment */ + unsigned int hardirqs_off:1; + unsigned int sync:1; + unsigned int references:11; /* 32 bits */ + unsigned int pin_count; +}; + #else /* !CONFIG_LOCKDEP */ /* diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index 54945aa824b4..babf4e3c28ba 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -39,9 +39,6 @@ struct logic_pio_host_ops { #ifdef CONFIG_INDIRECT_PIO u8 logic_inb(unsigned long addr); -void logic_outb(u8 value, unsigned long addr); -void logic_outw(u16 value, unsigned long addr); -void logic_outl(u32 value, unsigned long addr); u16 logic_inw(unsigned long addr); u32 logic_inl(unsigned long addr); void logic_outb(u8 value, unsigned long addr); diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 7308a1a7599b..334e00efbde4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -32,28 +32,29 @@ LSM_HOOK(int, 0, binder_transaction, const struct cred *from, LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, const struct cred *to) LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, - const struct cred *to, struct file *file) + const struct cred *to, const struct file *file) LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, unsigned int mode) LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) -LSM_HOOK(int, 0, capget, struct task_struct *target, kernel_cap_t *effective, +LSM_HOOK(int, 0, capget, const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) -LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, struct super_block *sb) +LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, const struct super_block *sb) LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) -LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *file) +LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, const struct linux_binprm *bprm) +LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, @@ -65,7 +66,7 @@ LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) LSM_HOOK(int, 0, sb_mnt_opts_compat, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) -LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb) +LSM_HOOK(int, 0, sb_kern_mount, const struct super_block *sb) LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry) LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path, @@ -93,6 +94,8 @@ LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry, LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry) LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) +LSM_HOOK(void, LSM_RET_VOID, path_post_mknod, struct mnt_idmap *idmap, + struct dentry *dentry) LSM_HOOK(int, 0, path_truncate, const struct path *path) LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry, const char *old_name) @@ -111,13 +114,15 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask, unsigned int obj_type) LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode) LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode) -LSM_HOOK(int, 0, inode_init_security, struct inode *inode, - struct inode *dir, const struct qstr *qstr, const char **name, - void **value, size_t *len) +LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode, + struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, + int *xattr_count) LSM_HOOK(int, 0, inode_init_security_anon, struct inode *inode, const struct qstr *name, const struct inode *context_inode) LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry, umode_t mode) +LSM_HOOK(void, LSM_RET_VOID, inode_post_create_tmpfile, struct mnt_idmap *idmap, + struct inode *inode) LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry) @@ -134,7 +139,10 @@ LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry) LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode, bool rcu) LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask) -LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr) +LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap, + struct dentry *dentry, int ia_valid) LSM_HOOK(int, 0, inode_getattr, const struct path *path) LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, @@ -145,12 +153,18 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) +LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry, + const char *name) LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) +LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry, + const char *acl_name, struct posix_acl *kacl) LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) +LSM_HOOK(void, LSM_RET_VOID, inode_post_remove_acl, struct mnt_idmap *idmap, + struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap, struct dentry *dentry) @@ -167,9 +181,12 @@ LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) LSM_HOOK(int, 0, file_alloc_security, struct file *file) +LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) +LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, + unsigned long arg) LSM_HOOK(int, 0, mmap_addr, unsigned long addr) LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) @@ -183,6 +200,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, struct fown_struct *fown, int sig) LSM_HOOK(int, 0, file_receive, struct file *file) LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, file_post_open, struct file *file, int mask) LSM_HOOK(int, 0, file_truncate, struct file *file) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, unsigned long clone_flags) @@ -261,6 +279,10 @@ LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops, LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb) LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry, struct inode *inode) +LSM_HOOK(int, -EOPNOTSUPP, getselfattr, unsigned int attr, + struct lsm_ctx __user *ctx, u32 *size, u32 flags) +LSM_HOOK(int, -EOPNOTSUPP, setselfattr, unsigned int attr, + struct lsm_ctx *ctx, u32 size, u32 flags) LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) @@ -272,7 +294,7 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, u32 *ctxlen) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) @@ -308,15 +330,15 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) -LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) -LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk, struct sock *newsk) -LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, struct sock *sk, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, const struct sock *sk, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent) LSM_HOOK(int, 0, inet_conn_request, const struct sock *sk, struct sk_buff *skb, struct request_sock *req) @@ -383,6 +405,9 @@ LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer) +LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, + struct key *key, const void *payload, size_t payload_len, + unsigned long flags, bool create) #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT @@ -397,10 +422,17 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) -LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) -LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) -LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) +LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) +LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, + struct path *path) +LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) +LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) +LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index ab2b2fafa4a4..a2ade0ffe9e7 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -25,9 +25,11 @@ #ifndef __LINUX_LSM_HOOKS_H #define __LINUX_LSM_HOOKS_H +#include <uapi/linux/lsm.h> #include <linux/security.h> #include <linux/init.h> #include <linux/rculist.h> +#include <linux/xattr.h> union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); @@ -41,6 +43,18 @@ struct security_hook_heads { #undef LSM_HOOK } __randomize_layout; +/** + * struct lsm_id - Identify a Linux Security Module. + * @lsm: name of the LSM, must be approved by the LSM maintainers + * @id: LSM ID number from uapi/linux/lsm.h + * + * Contains the information that identifies the LSM. + */ +struct lsm_id { + const char *name; + u64 id; +}; + /* * Security module hook list structure. * For use with generic list macros for common operations. @@ -49,7 +63,7 @@ struct security_hook_list { struct hlist_node list; struct hlist_head *head; union security_list_options hook; - const char *lsm; + const struct lsm_id *lsmid; } __randomize_layout; /* @@ -63,8 +77,27 @@ struct lsm_blob_sizes { int lbs_ipc; int lbs_msg_msg; int lbs_task; + int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ }; +/** + * lsm_get_xattr_slot - Return the next available slot and increment the index + * @xattrs: array storing LSM-provided xattrs + * @xattr_count: number of already stored xattrs (updated) + * + * Retrieve the first available slot in the @xattrs array to fill with an xattr, + * and increment @xattr_count. + * + * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise. + */ +static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, + int *xattr_count) +{ + if (unlikely(!xattrs)) + return NULL; + return &xattrs[(*xattr_count)++]; +} + /* * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void * LSM hooks (in include/linux/lsm_hook_defs.h). @@ -84,7 +117,7 @@ extern struct security_hook_heads security_hook_heads; extern char *lsm_names; extern void security_add_hooks(struct security_hook_list *hooks, int count, - const char *lsm); + const struct lsm_id *lsmid); #define LSM_FLAG_LEGACY_MAJOR BIT(0) #define LSM_FLAG_EXCLUSIVE BIT(1) diff --git a/include/linux/lwq.h b/include/linux/lwq.h new file mode 100644 index 000000000000..d081d5cf8e33 --- /dev/null +++ b/include/linux/lwq.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef LWQ_H +#define LWQ_H +/* + * Light-weight single-linked queue built from llist + * + * Entries can be enqueued from any context with no locking. + * Entries can be dequeued from process context with integrated locking. + * + * This is particularly suitable when work items are queued in + * BH or IRQ context, and where work items are handled one at a time + * by dedicated threads. + */ +#include <linux/container_of.h> +#include <linux/spinlock.h> +#include <linux/llist.h> + +struct lwq_node { + struct llist_node node; +}; + +struct lwq { + spinlock_t lock; + struct llist_node *ready; /* entries to be dequeued */ + struct llist_head new; /* entries being enqueued */ +}; + +/** + * lwq_init - initialise a lwq + * @q: the lwq object + */ +static inline void lwq_init(struct lwq *q) +{ + spin_lock_init(&q->lock); + q->ready = NULL; + init_llist_head(&q->new); +} + +/** + * lwq_empty - test if lwq contains any entry + * @q: the lwq object + * + * This empty test contains an acquire barrier so that if a wakeup + * is sent when lwq_dequeue returns true, it is safe to go to sleep after + * a test on lwq_empty(). + */ +static inline bool lwq_empty(struct lwq *q) +{ + /* acquire ensures ordering wrt lwq_enqueue() */ + return smp_load_acquire(&q->ready) == NULL && llist_empty(&q->new); +} + +struct llist_node *__lwq_dequeue(struct lwq *q); +/** + * lwq_dequeue - dequeue first (oldest) entry from lwq + * @q: the queue to dequeue from + * @type: the type of object to return + * @member: them member in returned object which is an lwq_node. + * + * Remove a single object from the lwq and return it. This will take + * a spinlock and so must always be called in the same context, typcially + * process contet. + */ +#define lwq_dequeue(q, type, member) \ + ({ struct llist_node *_n = __lwq_dequeue(q); \ + _n ? container_of(_n, type, member.node) : NULL; }) + +struct llist_node *lwq_dequeue_all(struct lwq *q); + +/** + * lwq_for_each_safe - iterate over detached queue allowing deletion + * @_n: iterator variable + * @_t1: temporary struct llist_node ** + * @_t2: temporary struct llist_node * + * @_l: address of llist_node pointer from lwq_dequeue_all() + * @_member: member in _n where lwq_node is found. + * + * Iterate over members in a dequeued list. If the iterator variable + * is set to NULL, the iterator removes that entry from the queue. + */ +#define lwq_for_each_safe(_n, _t1, _t2, _l, _member) \ + for (_t1 = (_l); \ + *(_t1) ? (_n = container_of(*(_t1), typeof(*(_n)), _member.node),\ + _t2 = ((*_t1)->next), \ + true) \ + : false; \ + (_n) ? (_t1 = &(_n)->_member.node.next, 0) \ + : ((*(_t1) = (_t2)), 0)) + +/** + * lwq_enqueue - add a new item to the end of the queue + * @n - the lwq_node embedded in the item to be added + * @q - the lwq to append to. + * + * No locking is needed to append to the queue so this can + * be called from any context. + * Return %true is the list may have previously been empty. + */ +static inline bool lwq_enqueue(struct lwq_node *n, struct lwq *q) +{ + /* acquire enqures ordering wrt lwq_dequeue */ + return llist_add(&n->node, &q->new) && + smp_load_acquire(&q->ready) == NULL; +} + +/** + * lwq_enqueue_batch - add a list of new items to the end of the queue + * @n - the lwq_node embedded in the first item to be added + * @q - the lwq to append to. + * + * No locking is needed to append to the queue so this can + * be called from any context. + * Return %true is the list may have previously been empty. + */ +static inline bool lwq_enqueue_batch(struct llist_node *n, struct lwq *q) +{ + struct llist_node *e = n; + + /* acquire enqures ordering wrt lwq_dequeue */ + return llist_add_batch(llist_reverse_order(n), e, &q->new) && + smp_load_acquire(&q->ready) == NULL; +} +#endif /* LWQ_H */ diff --git a/include/linux/maple.h b/include/linux/maple.h index 9b140272ee16..9aae44efcfd4 100644 --- a/include/linux/maple.h +++ b/include/linux/maple.h @@ -5,7 +5,6 @@ #include <mach/maple.h> struct device; -extern struct bus_type maple_bus_type; /* Maple Bus command and response codes */ enum maple_code { diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 295548cca8b3..a53ad4dabd7e 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -29,14 +29,12 @@ #define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */ -#define MAPLE_ARANGE64_META_MAX 15 /* Out of range for metadata */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1) #else /* 32bit sizes */ #define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */ -#define MAPLE_ARANGE64_META_MAX 31 /* Out of range for metadata */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2) #endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */ @@ -173,6 +171,7 @@ enum maple_type { #define MT_FLAGS_LOCK_IRQ 0x100 #define MT_FLAGS_LOCK_BH 0x200 #define MT_FLAGS_LOCK_EXTERN 0x300 +#define MT_FLAGS_ALLOC_WRAPPED 0x0800 #define MAPLE_HEIGHT_MAX 31 @@ -184,13 +183,23 @@ enum maple_type { #ifdef CONFIG_LOCKDEP typedef struct lockdep_map *lockdep_map_p; -#define mt_lock_is_held(mt) lock_is_held(mt->ma_external_lock) +#define mt_lock_is_held(mt) \ + (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock)) + +#define mt_write_lock_is_held(mt) \ + (!(mt)->ma_external_lock || \ + lock_is_held_type((mt)->ma_external_lock, 0)) + #define mt_set_external_lock(mt, lock) \ (mt)->ma_external_lock = &(lock)->dep_map + +#define mt_on_stack(mt) (mt).ma_external_lock = NULL #else typedef struct { /* nothing */ } lockdep_map_p; -#define mt_lock_is_held(mt) 1 +#define mt_lock_is_held(mt) 1 +#define mt_write_lock_is_held(mt) 1 #define mt_set_external_lock(mt, lock) do { } while (0) +#define mt_on_stack(mt) do { } while (0) #endif /* @@ -212,8 +221,8 @@ struct maple_tree { spinlock_t ma_lock; lockdep_map_p ma_external_lock; }; - void __rcu *ma_root; unsigned int ma_flags; + void __rcu *ma_root; }; /** @@ -248,6 +257,8 @@ struct maple_tree { struct maple_tree name = MTREE_INIT(name, 0) #define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) +#define mtree_lock_nested(mas, subclass) \ + spin_lock_nested((&(mt)->ma_lock), subclass) #define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) /* @@ -309,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first, int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); +int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); @@ -319,6 +333,9 @@ int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); void *mtree_erase(struct maple_tree *mt, unsigned long index); +int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); +int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); + void mtree_destroy(struct maple_tree *mt); void __mt_destroy(struct maple_tree *mt); @@ -337,6 +354,36 @@ static inline bool mtree_empty(const struct maple_tree *mt) /* Advanced API */ /* + * Maple State Status + * ma_active means the maple state is pointing to a node and offset and can + * continue operating on the tree. + * ma_start means we have not searched the tree. + * ma_root means we have searched the tree and the entry we found lives in + * the root of the tree (ie it has index 0, length 1 and is the only entry in + * the tree). + * ma_none means we have searched the tree and there is no node in the + * tree for this entry. For example, we searched for index 1 in an empty + * tree. Or we have a tree which points to a full leaf node and we + * searched for an entry which is larger than can be contained in that + * leaf node. + * ma_pause means the data within the maple state may be stale, restart the + * operation + * ma_overflow means the search has reached the upper limit of the search + * ma_underflow means the search has reached the lower limit of the search + * ma_error means there was an error, check the node for the error number. + */ +enum maple_status { + ma_active, + ma_start, + ma_root, + ma_none, + ma_pause, + ma_overflow, + ma_underflow, + ma_error, +}; + +/* * The maple state is defined in the struct ma_state and is used to keep track * of information during operations, and even between operations when using the * advanced API. @@ -368,6 +415,13 @@ static inline bool mtree_empty(const struct maple_tree *mt) * When returning a value the maple state index and last respectively contain * the start and end of the range for the entry. Ranges are inclusive in the * Maple Tree. + * + * The status of the state is used to determine how the next action should treat + * the state. For instance, if the status is ma_start then the next action + * should start at the root of the tree and walk down. If the status is + * ma_pause then the node may be stale data and should be discarded. If the + * status is ma_overflow, then the last action hit the upper limit. + * */ struct ma_state { struct maple_tree *tree; /* The tree we're operating in */ @@ -377,9 +431,11 @@ struct ma_state { unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ struct maple_alloc *alloc; /* Allocated nodes for this operation */ + enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; unsigned char mas_flags; + unsigned char end; /* The end of the node */ }; struct ma_wr_state { @@ -389,7 +445,6 @@ struct ma_wr_state { unsigned long r_max; /* range max */ enum maple_type type; /* mas->node type */ unsigned char offset_end; /* The offset where the write ends */ - unsigned char node_end; /* mas->node end */ unsigned long *pivots; /* mas->node->pivots pointer */ unsigned long end_piv; /* The pivot at the offset end */ void __rcu **slots; /* mas->node->slots pointer */ @@ -398,28 +453,16 @@ struct ma_wr_state { }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) +#define mas_lock_nested(mas, subclass) \ + spin_lock_nested(&((mas)->tree->ma_lock), subclass) #define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) - /* * Special values for ma_state.node. - * MAS_START means we have not searched the tree. - * MAS_ROOT means we have searched the tree and the entry we found lives in - * the root of the tree (ie it has index 0, length 1 and is the only entry in - * the tree). - * MAS_NONE means we have searched the tree and there is no node in the - * tree for this entry. For example, we searched for index 1 in an empty - * tree. Or we have a tree which points to a full leaf node and we - * searched for an entry which is larger than can be contained in that - * leaf node. * MA_ERROR represents an errno. After dropping the lock and attempting * to resolve the error, the walk would have to be restarted from the * top of the tree as the tree may have been modified. */ -#define MAS_START ((struct maple_enode *)1UL) -#define MAS_ROOT ((struct maple_enode *)5UL) -#define MAS_NONE ((struct maple_enode *)9UL) -#define MAS_PAUSE ((struct maple_enode *)17UL) #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) @@ -428,7 +471,8 @@ struct ma_wr_state { .tree = mt, \ .index = first, \ .last = end, \ - .node = MAS_START, \ + .node = NULL, \ + .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ .alloc = NULL, \ @@ -458,8 +502,10 @@ void *mas_find(struct ma_state *mas, unsigned long max); void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); -int mas_preallocate(struct ma_state *mas, gfp_t gfp); -bool mas_is_err(struct ma_state *mas); +int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); @@ -488,19 +534,18 @@ static inline void mas_init(struct ma_state *mas, struct maple_tree *tree, mas->tree = tree; mas->index = mas->last = addr; mas->max = ULONG_MAX; - mas->node = MAS_START; + mas->status = ma_start; + mas->node = NULL; } -/* Checks if a mas has not found anything */ -static inline bool mas_is_none(const struct ma_state *mas) +static inline bool mas_is_active(struct ma_state *mas) { - return mas->node == MAS_NONE; + return mas->status == ma_active; } -/* Checks if a mas has been paused */ -static inline bool mas_is_paused(const struct ma_state *mas) +static inline bool mas_is_err(struct ma_state *mas) { - return mas->node == MAS_PAUSE; + return mas->status == ma_error; } /** @@ -513,9 +558,10 @@ static inline bool mas_is_paused(const struct ma_state *mas) * * Context: Any context. */ -static inline void mas_reset(struct ma_state *mas) +static __always_inline void mas_reset(struct ma_state *mas) { - mas->node = MAS_START; + mas->status = ma_start; + mas->node = NULL; } /** @@ -532,6 +578,150 @@ static inline void mas_reset(struct ma_state *mas) #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) +#ifdef CONFIG_DEBUG_MAPLE_TREE +enum mt_dump_format { + mt_dump_dec, + mt_dump_hex, +}; + +extern atomic_t maple_tree_tests_run; +extern atomic_t maple_tree_tests_passed; + +void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); +void mas_dump(const struct ma_state *mas); +void mas_wr_dump(const struct ma_wr_state *wr_mas); +void mt_validate(struct maple_tree *mt); +void mt_cache_shrink(void); +#define MT_BUG_ON(__tree, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mt_dump(__tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MAS_BUG_ON(__mas, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_dump(__mas); \ + mt_dump((__mas)->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MAS_WR_BUG_ON(__wrmas, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_wr_dump(__wrmas); \ + mas_dump((__wrmas)->mas); \ + mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MT_WARN_ON(__tree, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mt_dump(__tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) + +#define MAS_WARN_ON(__mas, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_dump(__mas); \ + mt_dump((__mas)->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) + +#define MAS_WR_WARN_ON(__wrmas, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_wr_dump(__wrmas); \ + mas_dump((__wrmas)->mas); \ + mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) +#else +#define MT_BUG_ON(__tree, __x) BUG_ON(__x) +#define MAS_BUG_ON(__mas, __x) BUG_ON(__x) +#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) +#define MT_WARN_ON(__tree, __x) WARN_ON(__x) +#define MAS_WARN_ON(__mas, __x) WARN_ON(__x) +#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) +#endif /* CONFIG_DEBUG_MAPLE_TREE */ + +/** + * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the + * current location. + * @mas: Maple Tree operation state. + * @start: New start of range in the Maple Tree. + * @last: New end of range in the Maple Tree. + * + * set the internal maple state values to a sub-range. + * Please use mas_set_range() if you do not know where you are in the tree. + */ +static inline void __mas_set_range(struct ma_state *mas, unsigned long start, + unsigned long last) +{ + /* Ensure the range starts within the current slot */ + MAS_WARN_ON(mas, mas_is_active(mas) && + (mas->index > start || mas->last < start)); + mas->index = start; + mas->last = last; +} + /** * mas_set_range() - Set up Maple Tree operation state for a different index. * @mas: Maple Tree operation state. @@ -545,9 +735,8 @@ static inline void mas_reset(struct ma_state *mas) static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { - mas->index = start; - mas->last = last; - mas->node = MAS_START; + mas_reset(mas); + __mas_set_range(mas, start, last); } /** @@ -662,138 +851,14 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); * mt_for_each - Iterate over each entry starting at index until max. * @__tree: The Maple Tree * @__entry: The current entry - * @__index: The index to update to track the location in the tree + * @__index: The index to start the search from. Subsequently used as iterator. * @__max: The maximum limit for @index * - * Note: Will not return the zero entry. + * This iterator skips all entries, which resolve to a NULL pointer, + * e.g. entries which has been reserved with XA_ZERO_ENTRY. */ #define mt_for_each(__tree, __entry, __index, __max) \ for (__entry = mt_find(__tree, &(__index), __max); \ __entry; __entry = mt_find_after(__tree, &(__index), __max)) - -#ifdef CONFIG_DEBUG_MAPLE_TREE -enum mt_dump_format { - mt_dump_dec, - mt_dump_hex, -}; - -extern atomic_t maple_tree_tests_run; -extern atomic_t maple_tree_tests_passed; - -void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); -void mas_dump(const struct ma_state *mas); -void mas_wr_dump(const struct ma_wr_state *wr_mas); -void mt_validate(struct maple_tree *mt); -void mt_cache_shrink(void); -#define MT_BUG_ON(__tree, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mt_dump(__tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MAS_BUG_ON(__mas, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_dump(__mas); \ - mt_dump((__mas)->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MAS_WR_BUG_ON(__wrmas, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_wr_dump(__wrmas); \ - mas_dump((__wrmas)->mas); \ - mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MT_WARN_ON(__tree, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mt_dump(__tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) - -#define MAS_WARN_ON(__mas, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_dump(__mas); \ - mt_dump((__mas)->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) - -#define MAS_WR_WARN_ON(__wrmas, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_wr_dump(__wrmas); \ - mas_dump((__wrmas)->mas); \ - mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) -#else -#define MT_BUG_ON(__tree, __x) BUG_ON(__x) -#define MAS_BUG_ON(__mas, __x) BUG_ON(__x) -#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) -#define MT_WARN_ON(__tree, __x) WARN_ON(__x) -#define MAS_WARN_ON(__mas, __x) WARN_ON(__x) -#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) -#endif /* CONFIG_DEBUG_MAPLE_TREE */ - #endif /*_LINUX_MAPLE_TREE_H */ diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 0f06c2287b52..693eba9869e4 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -25,6 +25,8 @@ #define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 +#define MARVELL_PHY_ID_88Q2110 0x002b0980 +#define MARVELL_PHY_ID_88Q2220 0x002b0b20 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 diff --git a/include/linux/math.h b/include/linux/math.h index 2d388650c556..dd4152711de7 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -156,6 +156,25 @@ __STRUCT_FRACT(u32) ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) /** + * abs_diff - return absolute value of the difference between the arguments + * @a: the first argument + * @b: the second argument + * + * @a and @b have to be of the same type. With this restriction we compare + * signed to signed and unsigned to unsigned. The result is the subtraction + * the smaller of the two from the bigger, hence result is always a positive + * value. + * + * Return: an absolute value of the difference between the @a and @b. + */ +#define abs_diff(a, b) ({ \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + (void)(&__a == &__b); \ + __a > __b ? (__a - __b) : (__b - __a); \ +}) + +/** * reciprocal_scale - "scale" a value into range [0, ep_ro) * @val: value * @ep_ro: right open interval endpoint diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index b0da04fe087b..34dfcc77f505 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -126,10 +126,11 @@ struct cmos_rtc_board_info { #endif /* ARCH_RTC_LOCATION */ bool mc146818_does_rtc_work(void); -int mc146818_get_time(struct rtc_time *time); +int mc146818_get_time(struct rtc_time *time, int timeout); int mc146818_set_time(struct rtc_time *time); bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param); #endif /* _MC146818RTC_H */ diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 1e5893138afe..0b971b24a804 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -63,7 +63,6 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) struct mcb_device { struct device dev; struct mcb_bus *bus; - bool is_added; struct mcb_driver *driver; u16 id; int inst; diff --git a/include/linux/mdio.h b/include/linux/mdio.h index c1b7008826e5..68f8d2e970d4 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -38,6 +38,7 @@ struct mdio_device { /* Bus address of the MDIO device (0-31) */ int addr; int flags; + int reset_state; struct gpio_desc *reset_gpio; struct reset_control *reset_ctrl; unsigned int reset_assert_delay; @@ -372,6 +373,10 @@ static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 l { linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising, lpa & MDIO_AN_T1_ADV_M_B10L); + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_100BT1); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_1000BT1); } /** @@ -408,6 +413,10 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising)) result |= MDIO_AN_T1_ADV_M_B10L; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_100BT1; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_1000BT1; return result; } @@ -420,7 +429,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) * A function that translates value of following registers to the linkmode: * IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20) * IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60) - * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61) + * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61) */ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) { @@ -439,6 +448,42 @@ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) } /** + * mii_eee_cap2_mod_linkmode_sup_t() + * @adv: target the linkmode settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2022 45.2.3.11 "EEE control and capability 2" register (3.21) + */ +static inline void mii_eee_cap2_mod_linkmode_sup_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + adv, val & MDIO_EEE_2_5GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + adv, val & MDIO_EEE_5GT); +} + +/** + * mii_eee_cap2_mod_linkmode_adv_t() + * @adv: target the linkmode advertisement settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2022 45.2.7.16 "EEE advertisement 2" register (7.62) + * IEEE 802.3-2022 45.2.7.17 "EEE link partner ability 2" register (7.63) + * Note: Currently this function is the same as mii_eee_cap2_mod_linkmode_sup_t. + * For certain, not yet supported, modes however the bits differ. + * Therefore create separate functions already. + */ +static inline void mii_eee_cap2_mod_linkmode_adv_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + adv, val & MDIO_EEE_2_5GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + adv, val & MDIO_EEE_5GT); +} + +/** * linkmode_to_mii_eee_cap1_t() * @adv: the linkmode advertisement settings * @@ -466,6 +511,25 @@ static inline u32 linkmode_to_mii_eee_cap1_t(unsigned long *adv) } /** + * linkmode_to_mii_eee_cap2_t() + * @adv: the linkmode advertisement settings + * + * A function that translates linkmode to value for IEEE 802.3-2022 45.2.7.16 + * "EEE advertisement 2" register (7.62) + */ +static inline u32 linkmode_to_mii_eee_cap2_t(unsigned long *adv) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, adv)) + result |= MDIO_EEE_2_5GT; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, adv)) + result |= MDIO_EEE_5GT; + + return result; +} + +/** * mii_10base_t1_adv_mod_linkmode_t() * @adv: linkmode advertisement settings * @val: register value @@ -537,6 +601,8 @@ static inline void mii_c73_mod_linkmode(unsigned long *adv, u16 *lpa) int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int __mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, + u16 set); int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); @@ -564,6 +630,30 @@ int mdiobus_c45_modify(struct mii_bus *bus, int addr, int devad, u32 regnum, int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad, u32 regnum, u16 mask, u16 set); +static inline int __mdiodev_read(struct mdio_device *mdiodev, u32 regnum) +{ + return __mdiobus_read(mdiodev->bus, mdiodev->addr, regnum); +} + +static inline int __mdiodev_write(struct mdio_device *mdiodev, u32 regnum, + u16 val) +{ + return __mdiobus_write(mdiodev->bus, mdiodev->addr, regnum, val); +} + +static inline int __mdiodev_modify(struct mdio_device *mdiodev, u32 regnum, + u16 mask, u16 set) +{ + return __mdiobus_modify(mdiodev->bus, mdiodev->addr, regnum, mask, set); +} + +static inline int __mdiodev_modify_changed(struct mdio_device *mdiodev, + u32 regnum, u16 mask, u16 set) +{ + return __mdiobus_modify_changed(mdiodev->bus, mdiodev->addr, regnum, + mask, set); +} + static inline int mdiodev_read(struct mdio_device *mdiodev, u32 regnum) { return mdiobus_read(mdiodev->bus, mdiodev->addr, regnum); diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index fd6e0620658d..b38a56a13f39 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -31,11 +31,11 @@ typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev); * @rx_work: async work to execute Rx event callback * @rx_cb: Drivers register this callback to get asynchronous ME * Rx buffer pending notifications. - * @notif_work: async work to execute FW notif event callback + * @notif_work: async work to execute FW notify event callback * @notif_cb: Drivers register this callback to get asynchronous ME * FW notification pending notifications. * - * @do_match: wheather device can be matched with a driver + * @do_match: whether the device can be matched with a driver * @is_added: device is already scanned * @priv_data: client private data */ @@ -94,15 +94,23 @@ void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv); ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf, size_t length); +ssize_t mei_cldev_send_timeout(struct mei_cl_device *cldev, const u8 *buf, + size_t length, unsigned long timeout); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, size_t length); +ssize_t mei_cldev_recv_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, + unsigned long timeout); ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf, size_t length, u8 vtag); +ssize_t mei_cldev_send_vtag_timeout(struct mei_cl_device *cldev, const u8 *buf, + size_t length, u8 vtag, unsigned long timeout); ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); +ssize_t mei_cldev_recv_vtag_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, + u8 *vtag, unsigned long timeout); int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb); int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f71ff9f0ec81..e2082240586d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. + * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are + * not initialized (only for reserved regions). */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ @@ -47,6 +49,7 @@ enum memblock_flags { MEMBLOCK_MIRROR = 0x2, /* mirrored region */ MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ + MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ }; /** @@ -118,13 +121,17 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); +bool memblock_validate_numa_coverage(unsigned long threshold_bytes); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); +int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); void memblock_free(void *ptr, size_t size); @@ -259,6 +266,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m) return m->flags & MEMBLOCK_NOMAP; } +static inline bool memblock_is_reserved_noinit(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_RSRV_NOINIT; +} + static inline bool memblock_is_driver_managed(struct memblock_region *m) { return m->flags & MEMBLOCK_DRIVER_MANAGED; @@ -581,9 +593,7 @@ extern void *alloc_large_system_hash(const char *tablename, unsigned long high_limit); #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ -#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min - * shift passed via *_hash_shift */ -#define HASH_ZERO 0x00000004 /* Zero allocated hash table */ +#define HASH_ZERO 0x00000002 /* Zero allocated hash table */ /* Only NUMA needs hash distribution. 64bit NUMA architectures have * sufficient vmalloc space. @@ -596,13 +606,11 @@ extern int hashdist; /* Distribute hashes across NUMA nodes? */ #endif #ifdef CONFIG_MEMTEST -extern phys_addr_t early_memtest_bad_size; /* Size of faulty ram found by memtest */ -extern bool early_memtest_done; /* Was early memtest done? */ -extern void early_memtest(phys_addr_t start, phys_addr_t end); +void early_memtest(phys_addr_t start, phys_addr_t end); +void memtest_report_meminfo(struct seq_file *m); #else -static inline void early_memtest(phys_addr_t start, phys_addr_t end) -{ -} +static inline void early_memtest(phys_addr_t start, phys_addr_t end) { } +static inline void memtest_report_meminfo(struct seq_file *m) { } #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5818af8eca5a..394fd0a887ae 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -14,6 +14,7 @@ #include <linux/vm_event_item.h> #include <linux/hardirq.h> #include <linux/jump_label.h> +#include <linux/kernel.h> #include <linux/page_counter.h> #include <linux/vmpressure.h> #include <linux/eventfd.h> @@ -21,6 +22,7 @@ #include <linux/vmstat.h> #include <linux/writeback.h> #include <linux/page-flags.h> +#include <linux/shrinker.h> struct mem_cgroup; struct obj_cgroup; @@ -61,7 +63,6 @@ struct mem_cgroup_reclaim_cookie { #ifdef CONFIG_MEMCG #define MEM_CGROUP_ID_SHIFT 16 -#define MEM_CGROUP_ID_MAX USHRT_MAX struct mem_cgroup_id { int id; @@ -89,17 +90,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -/* - * Bitmap and deferred work of shrinker::id corresponding to memcg-aware - * shrinkers, which have elements charged to this memcg. - */ -struct shrinker_info { - struct rcu_head rcu; - atomic_long_t *nr_deferred; - unsigned long *map; - int map_nr_max; -}; - struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ long state[NR_VM_NODE_STAT_ITEMS]; @@ -112,6 +102,9 @@ struct lruvec_stats { /* Aggregated (CPU and subtree) state */ long state[NR_VM_NODE_STAT_ITEMS]; + /* Non-hierarchical (CPU aggregated) state */ + long state_local[NR_VM_NODE_STAT_ITEMS]; + /* Pending child counts during tree propagation */ long state_pending[NR_VM_NODE_STAT_ITEMS]; }; @@ -151,7 +144,7 @@ struct mem_cgroup_threshold_ary { /* Size of entries[] */ unsigned int size; /* Array of thresholds */ - struct mem_cgroup_threshold entries[]; + struct mem_cgroup_threshold entries[] __counted_by(size); }; struct mem_cgroup_thresholds { @@ -227,6 +220,12 @@ struct mem_cgroup { #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) unsigned long zswap_max; + + /* + * Prevent pages from this memcg from being written back from zswap to + * swap, and from being swapped out on zswap store failures. + */ + bool zswap_writeback; #endif unsigned long soft_limit; @@ -284,6 +283,11 @@ struct mem_cgroup { atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; + /* + * Hint of reclaim pressure for socket memroy management. Note + * that this indicator should NOT be used in legacy cgroup mode + * where socket memory is accounted/charged separately. + */ unsigned long socket_pressure; /* Legacy tcp memory accounting */ @@ -292,7 +296,13 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; - struct obj_cgroup __rcu *objcg; + /* + * memcg->objcg is wiped out as a part of the objcg repaprenting + * process. memcg->orig_objcg preserves a pointer (and a reference) + * to the original objcg until the end of live of memcg. + */ + struct obj_cgroup __rcu *objcg; + struct obj_cgroup *orig_objcg; /* list of inherited objcgs, protected by objcg_lock */ struct list_head objcg_list; #endif @@ -321,7 +331,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif -#ifdef CONFIG_LRU_GEN +#ifdef CONFIG_LRU_GEN_WALKS_MMU /* per-memcg mm_struct list */ struct lru_gen_mm_list mm_list; #endif @@ -583,7 +593,7 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, /* * There is no reclaim protection applied to a targeted reclaim. * We are special casing this specific case here because - * mem_cgroup_protected calculation is not robust enough to keep + * mem_cgroup_calculate_protection is not robust enough to keep * the protection invariant for calculated effective values for * parallel reclaimers with different reclaim target. This is * especially a problem for tail memcgs (as they have pages on LRU) @@ -655,6 +665,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, page_counter_read(&memcg->memory); } +void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg); + int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); /** @@ -679,6 +691,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, return __mem_cgroup_charge(folio, mm, gfp); } +int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, + long nr_pages); + int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); @@ -698,14 +713,16 @@ static inline void mem_cgroup_uncharge(struct folio *folio) __mem_cgroup_uncharge(folio); } -void __mem_cgroup_uncharge_list(struct list_head *page_list); -static inline void mem_cgroup_uncharge_list(struct list_head *page_list) +void __mem_cgroup_uncharge_folios(struct folio_batch *folios); +static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) { if (mem_cgroup_disabled()) return; - __mem_cgroup_uncharge_list(page_list); + __mem_cgroup_uncharge_folios(folios); } +void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages); +void mem_cgroup_replace_folio(struct folio *old, struct folio *new); void mem_cgroup_migrate(struct folio *old, struct folio *new); /** @@ -762,6 +779,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); +struct mem_cgroup *get_mem_cgroup_from_current(void); + struct lruvec *folio_lruvec_lock(struct folio *folio); struct lruvec *folio_lruvec_lock_irq(struct folio *folio); struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, @@ -807,6 +826,11 @@ static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) return !memcg || css_tryget(&memcg->css); } +static inline bool mem_cgroup_tryget_online(struct mem_cgroup *memcg) +{ + return !memcg || css_tryget_online(&memcg->css); +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { if (memcg) @@ -861,8 +885,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) * parent_mem_cgroup - find the accounting parent of a memcg * @memcg: memcg whose parent to find * - * Returns the parent memcg, or NULL if this is the root or the memory - * controller is in legacy no-hierarchy mode. + * Returns the parent memcg, or NULL if this is the root. */ static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { @@ -914,7 +937,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } -void mem_cgroup_handle_over_high(void); +void mem_cgroup_handle_over_high(gfp_t gfp_mask); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); @@ -1020,14 +1043,12 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, { struct mem_cgroup_per_node *pn; long x = 0; - int cpu; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - for_each_possible_cpu(cpu) - x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu); + x = READ_ONCE(pn->lruvec_stats.state_local[idx]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -1035,8 +1056,8 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, return x; } -void mem_cgroup_flush_stats(void); -void mem_cgroup_flush_stats_ratelimited(void); +void mem_cgroup_flush_stats(struct mem_cgroup *memcg); +void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg); void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); @@ -1076,15 +1097,6 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, local_irq_restore(flags); } -static inline void count_memcg_page_event(struct page *page, - enum vm_event_item idx) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - count_memcg_events(memcg, idx, 1); -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1149,7 +1161,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -void split_page_memcg(struct page *head, unsigned int nr); +void split_page_memcg(struct page *head, int old_order, int new_order); unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1158,7 +1170,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 -#define MEM_CGROUP_ID_MAX 0 static inline struct mem_cgroup *folio_memcg(struct folio *folio) { @@ -1186,6 +1197,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; } +static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) +{ + return NULL; +} + static inline bool folio_memcg_kmem(struct folio *folio) { return false; @@ -1246,12 +1262,23 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, return false; } +static inline void mem_cgroup_commit_charge(struct folio *folio, + struct mem_cgroup *memcg) +{ +} + static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { return 0; } +static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, + gfp_t gfp, long nr_pages) +{ + return 0; +} + static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { @@ -1266,7 +1293,17 @@ static inline void mem_cgroup_uncharge(struct folio *folio) { } -static inline void mem_cgroup_uncharge_list(struct list_head *page_list) +static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) +{ +} + +static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, + unsigned int nr_pages) +{ +} + +static inline void mem_cgroup_replace_folio(struct folio *old, + struct folio *new) { } @@ -1307,6 +1344,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) return NULL; } +static inline struct mem_cgroup *get_mem_cgroup_from_current(void) +{ + return NULL; +} + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) { @@ -1322,6 +1364,11 @@ static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) return true; } +static inline bool mem_cgroup_tryget_online(struct mem_cgroup *memcg) +{ + return true; +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } @@ -1455,7 +1502,7 @@ static inline void mem_cgroup_unlock_pages(void) rcu_read_unlock(); } -static inline void mem_cgroup_handle_over_high(void) +static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) { } @@ -1521,11 +1568,11 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, return node_page_state(lruvec_pgdat(lruvec), idx); } -static inline void mem_cgroup_flush_stats(void) +static inline void mem_cgroup_flush_stats(struct mem_cgroup *memcg) { } -static inline void mem_cgroup_flush_stats_ratelimited(void) +static inline void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg) { } @@ -1562,11 +1609,6 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg, { } -static inline void count_memcg_page_event(struct page *page, - int idx) -{ -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1577,7 +1619,7 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void split_page_memcg(struct page *head, unsigned int nr) +static inline void split_page_memcg(struct page *head, int old_order, int new_order) { } @@ -1651,18 +1693,18 @@ static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio, return folio_lruvec_lock_irq(folio); } -/* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio, - struct lruvec *locked_lruvec, unsigned long *flags) +/* Don't lock again iff folio's lruvec locked */ +static inline void folio_lruvec_relock_irqsave(struct folio *folio, + struct lruvec **lruvecp, unsigned long *flags) { - if (locked_lruvec) { - if (folio_matches_lruvec(folio, locked_lruvec)) - return locked_lruvec; + if (*lruvecp) { + if (folio_matches_lruvec(folio, *lruvecp)) + return; - unlock_page_lruvec_irqrestore(locked_lruvec, *flags); + unlock_page_lruvec_irqrestore(*lruvecp, *flags); } - return folio_lruvec_lock_irqsave(folio, flags); + *lruvecp = folio_lruvec_lock_irqsave(folio, flags); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -1727,8 +1769,8 @@ void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) - return true; + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return !!memcg->tcpmem_pressure; do { if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) return true; @@ -1760,8 +1802,26 @@ bool mem_cgroup_kmem_disabled(void); int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order); -struct obj_cgroup *get_obj_cgroup_from_current(void); -struct obj_cgroup *get_obj_cgroup_from_page(struct page *page); +/* + * The returned objcg pointer is safe to use without additional + * protection within a scope. The scope is defined either by + * the current task (similar to the "current" global variable) + * or by set_active_memcg() pair. + * Please, use obj_cgroup_get() to get a reference if the pointer + * needs to be used outside of the local scope. + */ +struct obj_cgroup *current_obj_cgroup(void); +struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio); + +static inline struct obj_cgroup *get_obj_cgroup_from_current(void) +{ + struct obj_cgroup *objcg = current_obj_cgroup(); + + if (objcg) + obj_cgroup_get(objcg); + + return objcg; +} int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); @@ -1845,7 +1905,7 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } -static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) +static inline struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) { return NULL; } @@ -1886,6 +1946,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, bool obj_cgroup_may_zswap(struct obj_cgroup *objcg); void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size); void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size); +bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg); #else static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg) { @@ -1899,6 +1960,11 @@ static inline void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size) { } +static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg) +{ + /* if zswap is disabled, do not block pages going to the swapping device */ + return true; +} #endif #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index fc9647b1b4f9..69e781900082 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -6,6 +6,7 @@ #include <linux/nodemask.h> #include <linux/kref.h> #include <linux/mmzone.h> +#include <linux/notifier.h> /* * Each tier cover a abstrace distance chunk size of 128 */ @@ -22,7 +23,9 @@ struct memory_tier; struct memory_dev_type { /* list of memory types that are part of same tier as this type */ - struct list_head tier_sibiling; + struct list_head tier_sibling; + /* list of memory types that are managed by one driver */ + struct list_head list; /* abstract distance for this specific memory type */ int adistance; /* Nodes of same abstract distance */ @@ -30,12 +33,21 @@ struct memory_dev_type { struct kref kref; }; +struct access_coordinate; + #ifdef CONFIG_NUMA extern bool numa_demotion_enabled; +extern struct memory_dev_type *default_dram_type; struct memory_dev_type *alloc_memory_type(int adistance); -void destroy_memory_type(struct memory_dev_type *memtype); +void put_memory_type(struct memory_dev_type *memtype); void init_node_memory_type(int node, struct memory_dev_type *default_type); void clear_node_memory_type(int node, struct memory_dev_type *memtype); +int register_mt_adistance_algorithm(struct notifier_block *nb); +int unregister_mt_adistance_algorithm(struct notifier_block *nb); +int mt_calc_adistance(int node, int *adist); +int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, + const char *source); +int mt_perf_to_adistance(struct access_coordinate *perf, int *adist); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -60,6 +72,7 @@ static inline bool node_is_toptier(int node) #else #define numa_demotion_enabled false +#define default_dram_type NULL /* * CONFIG_NUMA implementation returns non NULL error. */ @@ -68,7 +81,7 @@ static inline struct memory_dev_type *alloc_memory_type(int adistance) return NULL; } -static inline void destroy_memory_type(struct memory_dev_type *memtype) +static inline void put_memory_type(struct memory_dev_type *memtype) { } @@ -97,5 +110,31 @@ static inline bool node_is_toptier(int node) { return true; } + +static inline int register_mt_adistance_algorithm(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_mt_adistance_algorithm(struct notifier_block *nb) +{ + return 0; +} + +static inline int mt_calc_adistance(int node, int *adist) +{ + return NOTIFY_DONE; +} + +static inline int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, + const char *source) +{ + return -EIO; +} + +static inline int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) +{ + return -EIO; +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index 31343566c221..c0afee5d126e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -77,11 +77,7 @@ struct memory_block { */ struct zone *zone; struct device dev; - /* - * Number of vmemmap pages. These pages - * lay at the beginning of the memory block. - */ - unsigned long nr_vmemmap_pages; + struct vmem_altmap *altmap; struct memory_group *group; /* group (if any) for this block */ struct list_head group_next; /* next block inside memory group */ #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) @@ -100,8 +96,17 @@ int set_memory_block_size_order(unsigned int order); #define MEM_GOING_ONLINE (1<<3) #define MEM_CANCEL_ONLINE (1<<4) #define MEM_CANCEL_OFFLINE (1<<5) +#define MEM_PREPARE_ONLINE (1<<6) +#define MEM_FINISH_OFFLINE (1<<7) struct memory_notify { + /* + * The altmap_start_pfn and altmap_nr_pages fields are designated for + * specifying the altmap range and are exclusively intended for use in + * MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers. + */ + unsigned long altmap_start_pfn; + unsigned long altmap_nr_pages; unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; @@ -118,6 +123,7 @@ struct mem_section; #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 #define HMAT_CALLBACK_PRI 2 +#define CXL_CALLBACK_PRI 5 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 @@ -147,7 +153,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, - unsigned long vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 013c69753c91..7a9ff464608d 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -97,6 +97,8 @@ typedef int __bitwise mhp_t; * To do so, we will use the beginning of the hot-added range to build * the page tables for the memmap array that describes the entire range. * Only selected architectures support it with SPARSE_VMEMMAP. + * This is only a hint, the core kernel can decide to not do this based on + * different alignment checks. */ #define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1)) /* @@ -104,6 +106,22 @@ typedef int __bitwise mhp_t; * implies the node id (nid). */ #define MHP_NID_IS_MGID ((__force mhp_t)BIT(2)) +/* + * The hotplugged memory is completely inaccessible while the memory is + * offline. The memory provider will handle MEM_PREPARE_ONLINE / + * MEM_FINISH_OFFLINE notifications and make the memory accessible. + * + * This flag is only relevant when used along with MHP_MEMMAP_ON_MEMORY, + * because the altmap cannot be written (e.g., poisoned) when adding + * memory -- before it is set online. + * + * This allows for adding memory with an altmap that is not currently + * made available by a hypervisor. When onlining that memory, the + * hypervisor can be instructed to make that memory available, and + * the onlining phase will not require any memory allocations, which is + * helpful in low-memory situations. + */ +#define MHP_OFFLINE_INACCESSIBLE ((__force mhp_t)BIT(3)) /* * Extended parameters for memory hotplug: @@ -119,6 +137,7 @@ struct mhp_params { bool mhp_range_allowed(u64 start, u64 size, bool need_mapping); struct range mhp_get_pluggable_range(bool need_mapping); +bool mhp_supports_memmap_on_memory(void); /* * Zone resizing functions @@ -152,7 +171,7 @@ extern void adjust_present_page_count(struct page *page, long nr_pages); /* VM interface that may be used by firmware interface */ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, - struct zone *zone); + struct zone *zone, bool mhp_off_inaccessible); extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); @@ -260,6 +279,11 @@ static inline bool movable_node_is_enabled(void) return false; } +static inline bool mhp_supports_memmap_on_memory(void) +{ + return false; +} + static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {} static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {} static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {} @@ -354,7 +378,6 @@ extern struct zone *zone_for_pfn_range(int online_type, int nid, extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); void arch_remove_linear_mapping(u64 start, u64 size); -extern bool mhp_supports_memmap_on_memory(unsigned long size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index d232de7cdc56..931b118336f4 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -17,6 +17,8 @@ struct mm_struct; +#define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */ + #ifdef CONFIG_NUMA /* @@ -89,8 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) return pol; } -#define vma_policy(vma) ((vma)->vm_policy) - static inline void mpol_get(struct mempolicy *pol) { if (pol) @@ -107,35 +107,30 @@ static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) /* * Tree of shared policies for a shared memory region. - * Maintain the policies in a pseudo mm that contains vmas. The vmas - * carry the policy. As a special twist the pseudo mm is indexed in pages, not - * bytes, so that we can work with shared memory segments bigger than - * unsigned long. */ - -struct sp_node { - struct rb_node nd; - unsigned long start, end; - struct mempolicy *policy; -}; - struct shared_policy { struct rb_root root; rwlock_t lock; }; +struct sp_node { + struct rb_node nd; + pgoff_t start, end; + struct mempolicy *policy; +}; int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); -int mpol_set_shared_policy(struct shared_policy *info, - struct vm_area_struct *vma, - struct mempolicy *new); -void mpol_free_shared_policy(struct shared_policy *p); +int mpol_set_shared_policy(struct shared_policy *sp, + struct vm_area_struct *vma, struct mempolicy *mpol); +void mpol_free_shared_policy(struct shared_policy *sp); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, - unsigned long idx); + pgoff_t idx); struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, pgoff_t *ilx); +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr, int order, pgoff_t *ilx); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); @@ -149,8 +144,6 @@ extern int huge_node(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); -extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); - extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -174,7 +167,7 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); -extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); +int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) @@ -188,12 +181,17 @@ extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); struct mempolicy {}; +static inline struct mempolicy *get_task_policy(struct task_struct *p) +{ + return NULL; +} + static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; } -static inline void mpol_put(struct mempolicy *p) +static inline void mpol_put(struct mempolicy *pol) { } @@ -212,17 +210,22 @@ static inline void mpol_shared_policy_init(struct shared_policy *sp, { } -static inline void mpol_free_shared_policy(struct shared_policy *p) +static inline void mpol_free_shared_policy(struct shared_policy *sp) { } static inline struct mempolicy * -mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) +mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { return NULL; } -#define vma_policy(vma) NULL +static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr, int order, pgoff_t *ilx) +{ + *ilx = 0; + return NULL; +} static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) @@ -278,7 +281,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol) } #endif -static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, +static inline int mpol_misplaced(struct folio *folio, + struct vm_area_struct *vma, unsigned long address) { return -1; /* no node preference */ diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 4aae6c06c5f2..16c5cc807ff6 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -51,6 +51,7 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc; +extern void *mempool_alloc_preallocated(mempool_t *pool) __malloc; extern void mempool_free(void *element, mempool_t *pool); /* @@ -94,6 +95,19 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) (void *) size); } +void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data); +void mempool_kvfree(void *element, void *pool_data); + +static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size) +{ + return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size); +} + +static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size) +{ + return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size); +} + /* * A mempool_alloc_t and mempool_free_t for a simple page allocator that * allocates pages of the order specified by pool_data diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 1314d9c5f05b..3f7143ade32c 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -25,6 +25,7 @@ struct vmem_altmap { unsigned long free; unsigned long align; unsigned long alloc; + bool inaccessible; }; /* @@ -108,7 +109,7 @@ struct dev_pagemap_ops { * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping * @done: completion for @ref - * @type: memory type: see MEMORY_* in memory_hotplug.h + * @type: memory type: see MEMORY_* above in memremap.h * @flags: PGMAP_* flags to specify defailed behavior * @vmemmap_shift: structural definition of how the vmemmap page metadata * is populated, specifically the metadata page order. @@ -196,8 +197,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); -unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); -void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); unsigned long memremap_compat_align(void); #else static inline void *devm_memremap_pages(struct device *dev, @@ -228,16 +227,6 @@ static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) return false; } -static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) -{ - return 0; -} - -static inline void vmem_altmap_free(struct vmem_altmap *altmap, - unsigned long nr_pfns) -{ -} - /* when memremap_pages() is disabled all archs can remap a single page */ static inline unsigned long memremap_compat_align(void) { diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h index 473545a2c425..6fa21791fc85 100644 --- a/include/linux/mfd/88pm860x.h +++ b/include/linux/mfd/88pm860x.h @@ -472,13 +472,7 @@ extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *); extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *); extern int pm860x_set_bits(struct i2c_client *, int, unsigned char, unsigned char); -extern int pm860x_page_reg_read(struct i2c_client *, int); extern int pm860x_page_reg_write(struct i2c_client *, int, unsigned char); extern int pm860x_page_bulk_read(struct i2c_client *, int, int, unsigned char *); -extern int pm860x_page_bulk_write(struct i2c_client *, int, int, - unsigned char *); -extern int pm860x_page_set_bits(struct i2c_client *, int, unsigned char, - unsigned char); - #endif /* __LINUX_MFD_88PM860X_H */ diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 302a330c5c84..76d326ea8eba 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -382,10 +382,6 @@ struct ab8500_platform_data { struct ab8500_sysctrl_platform_data *sysctrl; }; -extern int ab8500_init(struct ab8500 *ab8500, - enum ab8500_version version); -extern int ab8500_exit(struct ab8500 *ab8500); - extern int ab8500_suspend(struct ab8500 *ab8500); static inline int is_ab8500(struct ab8500 *ab) @@ -503,13 +499,7 @@ static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab) void ab8500_override_turn_on_stat(u8 mask, u8 set); -#ifdef CONFIG_AB8500_DEBUG -extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size); -void ab8500_dump_all_banks(struct device *dev); -void ab8500_debug_register_interrupt(int line); -#else static inline void ab8500_dump_all_banks(struct device *dev) {} static inline void ab8500_debug_register_interrupt(int line) {} -#endif #endif /* MFD_AB8500_H */ diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 47e7a3a61ce6..e8bcad641d8c 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -92,7 +92,7 @@ struct mfd_cell { * (above) when matching OF nodes with devices that have identical * compatible strings */ - const u64 of_reg; + u64 of_reg; /* Set to 'true' to use 'of_reg' (above) - allows for of_reg=0 */ bool use_of_reg; diff --git a/include/linux/mfd/cs42l43-regs.h b/include/linux/mfd/cs42l43-regs.h new file mode 100644 index 000000000000..c39a49269cb7 --- /dev/null +++ b/include/linux/mfd/cs42l43-regs.h @@ -0,0 +1,1184 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * cs42l43 register definitions + * + * Copyright (c) 2022-2023 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef CS42L43_CORE_REGS_H +#define CS42L43_CORE_REGS_H + +/* Registers */ +#define CS42L43_GEN_INT_STAT_1 0x000000C0 +#define CS42L43_GEN_INT_MASK_1 0x000000C1 +#define CS42L43_DEVID 0x00003000 +#define CS42L43_REVID 0x00003004 +#define CS42L43_RELID 0x0000300C +#define CS42L43_SFT_RESET 0x00003020 +#define CS42L43_DRV_CTRL1 0x00006004 +#define CS42L43_DRV_CTRL3 0x0000600C +#define CS42L43_DRV_CTRL4 0x00006010 +#define CS42L43_DRV_CTRL_5 0x00006014 +#define CS42L43_GPIO_CTRL1 0x00006034 +#define CS42L43_GPIO_CTRL2 0x00006038 +#define CS42L43_GPIO_STS 0x0000603C +#define CS42L43_GPIO_FN_SEL 0x00006040 +#define CS42L43_MCLK_SRC_SEL 0x00007004 +#define CS42L43_CCM_BLK_CLK_CONTROL 0x00007010 +#define CS42L43_SAMPLE_RATE1 0x00007014 +#define CS42L43_SAMPLE_RATE2 0x00007018 +#define CS42L43_SAMPLE_RATE3 0x0000701C +#define CS42L43_SAMPLE_RATE4 0x00007020 +#define CS42L43_PLL_CONTROL 0x00007034 +#define CS42L43_FS_SELECT1 0x00007038 +#define CS42L43_FS_SELECT2 0x0000703C +#define CS42L43_FS_SELECT3 0x00007040 +#define CS42L43_FS_SELECT4 0x00007044 +#define CS42L43_PDM_CONTROL 0x0000704C +#define CS42L43_ASP_CLK_CONFIG1 0x00007058 +#define CS42L43_ASP_CLK_CONFIG2 0x0000705C +#define CS42L43_OSC_DIV_SEL 0x00007068 +#define CS42L43_ADC_B_CTRL1 0x00008000 +#define CS42L43_ADC_B_CTRL2 0x00008004 +#define CS42L43_DECIM_HPF_WNF_CTRL1 0x0000803C +#define CS42L43_DECIM_HPF_WNF_CTRL2 0x00008040 +#define CS42L43_DECIM_HPF_WNF_CTRL3 0x00008044 +#define CS42L43_DECIM_HPF_WNF_CTRL4 0x00008048 +#define CS42L43_DMIC_PDM_CTRL 0x0000804C +#define CS42L43_DECIM_VOL_CTRL_CH1_CH2 0x00008050 +#define CS42L43_DECIM_VOL_CTRL_CH3_CH4 0x00008054 +#define CS42L43_DECIM_VOL_CTRL_UPDATE 0x00008058 +#define CS42L43_INTP_VOLUME_CTRL1 0x00009008 +#define CS42L43_INTP_VOLUME_CTRL2 0x0000900C +#define CS42L43_AMP1_2_VOL_RAMP 0x00009010 +#define CS42L43_ASP_CTRL 0x0000A000 +#define CS42L43_ASP_FSYNC_CTRL1 0x0000A004 +#define CS42L43_ASP_FSYNC_CTRL2 0x0000A008 +#define CS42L43_ASP_FSYNC_CTRL3 0x0000A00C +#define CS42L43_ASP_FSYNC_CTRL4 0x0000A010 +#define CS42L43_ASP_DATA_CTRL 0x0000A018 +#define CS42L43_ASP_RX_EN 0x0000A020 +#define CS42L43_ASP_TX_EN 0x0000A024 +#define CS42L43_ASP_RX_CH1_CTRL 0x0000A028 +#define CS42L43_ASP_RX_CH2_CTRL 0x0000A02C +#define CS42L43_ASP_RX_CH3_CTRL 0x0000A030 +#define CS42L43_ASP_RX_CH4_CTRL 0x0000A034 +#define CS42L43_ASP_RX_CH5_CTRL 0x0000A038 +#define CS42L43_ASP_RX_CH6_CTRL 0x0000A03C +#define CS42L43_ASP_TX_CH1_CTRL 0x0000A068 +#define CS42L43_ASP_TX_CH2_CTRL 0x0000A06C +#define CS42L43_ASP_TX_CH3_CTRL 0x0000A070 +#define CS42L43_ASP_TX_CH4_CTRL 0x0000A074 +#define CS42L43_ASP_TX_CH5_CTRL 0x0000A078 +#define CS42L43_ASP_TX_CH6_CTRL 0x0000A07C +#define CS42L43_OTP_REVISION_ID 0x0000B02C +#define CS42L43_ASPTX1_INPUT 0x0000C200 +#define CS42L43_ASPTX2_INPUT 0x0000C210 +#define CS42L43_ASPTX3_INPUT 0x0000C220 +#define CS42L43_ASPTX4_INPUT 0x0000C230 +#define CS42L43_ASPTX5_INPUT 0x0000C240 +#define CS42L43_ASPTX6_INPUT 0x0000C250 +#define CS42L43_SWIRE_DP1_CH1_INPUT 0x0000C280 +#define CS42L43_SWIRE_DP1_CH2_INPUT 0x0000C290 +#define CS42L43_SWIRE_DP1_CH3_INPUT 0x0000C2A0 +#define CS42L43_SWIRE_DP1_CH4_INPUT 0x0000C2B0 +#define CS42L43_SWIRE_DP2_CH1_INPUT 0x0000C2C0 +#define CS42L43_SWIRE_DP2_CH2_INPUT 0x0000C2D0 +#define CS42L43_SWIRE_DP3_CH1_INPUT 0x0000C2E0 +#define CS42L43_SWIRE_DP3_CH2_INPUT 0x0000C2F0 +#define CS42L43_SWIRE_DP4_CH1_INPUT 0x0000C300 +#define CS42L43_SWIRE_DP4_CH2_INPUT 0x0000C310 +#define CS42L43_ASRC_INT1_INPUT1 0x0000C400 +#define CS42L43_ASRC_INT2_INPUT1 0x0000C410 +#define CS42L43_ASRC_INT3_INPUT1 0x0000C420 +#define CS42L43_ASRC_INT4_INPUT1 0x0000C430 +#define CS42L43_ASRC_DEC1_INPUT1 0x0000C440 +#define CS42L43_ASRC_DEC2_INPUT1 0x0000C450 +#define CS42L43_ASRC_DEC3_INPUT1 0x0000C460 +#define CS42L43_ASRC_DEC4_INPUT1 0x0000C470 +#define CS42L43_ISRC1INT1_INPUT1 0x0000C500 +#define CS42L43_ISRC1INT2_INPUT1 0x0000C510 +#define CS42L43_ISRC1DEC1_INPUT1 0x0000C520 +#define CS42L43_ISRC1DEC2_INPUT1 0x0000C530 +#define CS42L43_ISRC2INT1_INPUT1 0x0000C540 +#define CS42L43_ISRC2INT2_INPUT1 0x0000C550 +#define CS42L43_ISRC2DEC1_INPUT1 0x0000C560 +#define CS42L43_ISRC2DEC2_INPUT1 0x0000C570 +#define CS42L43_EQ1MIX_INPUT1 0x0000C580 +#define CS42L43_EQ1MIX_INPUT2 0x0000C584 +#define CS42L43_EQ1MIX_INPUT3 0x0000C588 +#define CS42L43_EQ1MIX_INPUT4 0x0000C58C +#define CS42L43_EQ2MIX_INPUT1 0x0000C590 +#define CS42L43_EQ2MIX_INPUT2 0x0000C594 +#define CS42L43_EQ2MIX_INPUT3 0x0000C598 +#define CS42L43_EQ2MIX_INPUT4 0x0000C59C +#define CS42L43_SPDIF1_INPUT1 0x0000C600 +#define CS42L43_SPDIF2_INPUT1 0x0000C610 +#define CS42L43_AMP1MIX_INPUT1 0x0000C620 +#define CS42L43_AMP1MIX_INPUT2 0x0000C624 +#define CS42L43_AMP1MIX_INPUT3 0x0000C628 +#define CS42L43_AMP1MIX_INPUT4 0x0000C62C +#define CS42L43_AMP2MIX_INPUT1 0x0000C630 +#define CS42L43_AMP2MIX_INPUT2 0x0000C634 +#define CS42L43_AMP2MIX_INPUT3 0x0000C638 +#define CS42L43_AMP2MIX_INPUT4 0x0000C63C +#define CS42L43_AMP3MIX_INPUT1 0x0000C640 +#define CS42L43_AMP3MIX_INPUT2 0x0000C644 +#define CS42L43_AMP3MIX_INPUT3 0x0000C648 +#define CS42L43_AMP3MIX_INPUT4 0x0000C64C +#define CS42L43_AMP4MIX_INPUT1 0x0000C650 +#define CS42L43_AMP4MIX_INPUT2 0x0000C654 +#define CS42L43_AMP4MIX_INPUT3 0x0000C658 +#define CS42L43_AMP4MIX_INPUT4 0x0000C65C +#define CS42L43_ASRC_INT_ENABLES 0x0000E000 +#define CS42L43_ASRC_DEC_ENABLES 0x0000E004 +#define CS42L43_PDNCNTL 0x00010000 +#define CS42L43_RINGSENSE_DEB_CTRL 0x0001001C +#define CS42L43_TIPSENSE_DEB_CTRL 0x00010020 +#define CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS 0x00010028 +#define CS42L43_HS2 0x00010040 +#define CS42L43_HS_STAT 0x00010048 +#define CS42L43_MCU_SW_INTERRUPT 0x00010094 +#define CS42L43_STEREO_MIC_CTRL 0x000100A4 +#define CS42L43_STEREO_MIC_CLAMP_CTRL 0x000100C4 +#define CS42L43_BLOCK_EN2 0x00010104 +#define CS42L43_BLOCK_EN3 0x00010108 +#define CS42L43_BLOCK_EN4 0x0001010C +#define CS42L43_BLOCK_EN5 0x00010110 +#define CS42L43_BLOCK_EN6 0x00010114 +#define CS42L43_BLOCK_EN7 0x00010118 +#define CS42L43_BLOCK_EN8 0x0001011C +#define CS42L43_BLOCK_EN9 0x00010120 +#define CS42L43_BLOCK_EN10 0x00010124 +#define CS42L43_BLOCK_EN11 0x00010128 +#define CS42L43_TONE_CH1_CTRL 0x00010134 +#define CS42L43_TONE_CH2_CTRL 0x00010138 +#define CS42L43_MIC_DETECT_CONTROL_1 0x00011074 +#define CS42L43_DETECT_STATUS_1 0x0001107C +#define CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL 0x00011090 +#define CS42L43_MIC_DETECT_CONTROL_ANDROID 0x000110B0 +#define CS42L43_ISRC1_CTRL 0x00012004 +#define CS42L43_ISRC2_CTRL 0x00013004 +#define CS42L43_CTRL_REG 0x00014000 +#define CS42L43_FDIV_FRAC 0x00014004 +#define CS42L43_CAL_RATIO 0x00014008 +#define CS42L43_SPI_CLK_CONFIG1 0x00016004 +#define CS42L43_SPI_CONFIG1 0x00016010 +#define CS42L43_SPI_CONFIG2 0x00016014 +#define CS42L43_SPI_CONFIG3 0x00016018 +#define CS42L43_SPI_CONFIG4 0x00016024 +#define CS42L43_SPI_STATUS1 0x00016100 +#define CS42L43_SPI_STATUS2 0x00016104 +#define CS42L43_TRAN_CONFIG1 0x00016200 +#define CS42L43_TRAN_CONFIG2 0x00016204 +#define CS42L43_TRAN_CONFIG3 0x00016208 +#define CS42L43_TRAN_CONFIG4 0x0001620C +#define CS42L43_TRAN_CONFIG5 0x00016220 +#define CS42L43_TRAN_CONFIG6 0x00016224 +#define CS42L43_TRAN_CONFIG7 0x00016228 +#define CS42L43_TRAN_CONFIG8 0x0001622C +#define CS42L43_TRAN_STATUS1 0x00016300 +#define CS42L43_TRAN_STATUS2 0x00016304 +#define CS42L43_TRAN_STATUS3 0x00016308 +#define CS42L43_TX_DATA 0x00016400 +#define CS42L43_RX_DATA 0x00016600 +#define CS42L43_DACCNFG1 0x00017000 +#define CS42L43_DACCNFG2 0x00017004 +#define CS42L43_HPPATHVOL 0x0001700C +#define CS42L43_PGAVOL 0x00017014 +#define CS42L43_LOADDETRESULTS 0x00017018 +#define CS42L43_LOADDETENA 0x00017024 +#define CS42L43_CTRL 0x00017028 +#define CS42L43_COEFF_DATA_IN0 0x00018000 +#define CS42L43_COEFF_RD_WR0 0x00018008 +#define CS42L43_INIT_DONE0 0x00018010 +#define CS42L43_START_EQZ0 0x00018014 +#define CS42L43_MUTE_EQ_IN0 0x0001801C +#define CS42L43_DECIM_INT 0x0001B000 +#define CS42L43_EQ_INT 0x0001B004 +#define CS42L43_ASP_INT 0x0001B008 +#define CS42L43_PLL_INT 0x0001B00C +#define CS42L43_SOFT_INT 0x0001B010 +#define CS42L43_SWIRE_INT 0x0001B014 +#define CS42L43_MSM_INT 0x0001B018 +#define CS42L43_ACC_DET_INT 0x0001B01C +#define CS42L43_I2C_TGT_INT 0x0001B020 +#define CS42L43_SPI_MSTR_INT 0x0001B024 +#define CS42L43_SW_TO_SPI_BRIDGE_INT 0x0001B028 +#define CS42L43_OTP_INT 0x0001B02C +#define CS42L43_CLASS_D_AMP_INT 0x0001B030 +#define CS42L43_GPIO_INT 0x0001B034 +#define CS42L43_ASRC_INT 0x0001B038 +#define CS42L43_HPOUT_INT 0x0001B03C +#define CS42L43_DECIM_MASK 0x0001B0A0 +#define CS42L43_EQ_MIX_MASK 0x0001B0A4 +#define CS42L43_ASP_MASK 0x0001B0A8 +#define CS42L43_PLL_MASK 0x0001B0AC +#define CS42L43_SOFT_MASK 0x0001B0B0 +#define CS42L43_SWIRE_MASK 0x0001B0B4 +#define CS42L43_MSM_MASK 0x0001B0B8 +#define CS42L43_ACC_DET_MASK 0x0001B0BC +#define CS42L43_I2C_TGT_MASK 0x0001B0C0 +#define CS42L43_SPI_MSTR_MASK 0x0001B0C4 +#define CS42L43_SW_TO_SPI_BRIDGE_MASK 0x0001B0C8 +#define CS42L43_OTP_MASK 0x0001B0CC +#define CS42L43_CLASS_D_AMP_MASK 0x0001B0D0 +#define CS42L43_GPIO_INT_MASK 0x0001B0D4 +#define CS42L43_ASRC_MASK 0x0001B0D8 +#define CS42L43_HPOUT_MASK 0x0001B0DC +#define CS42L43_DECIM_INT_SHADOW 0x0001B300 +#define CS42L43_EQ_MIX_INT_SHADOW 0x0001B304 +#define CS42L43_ASP_INT_SHADOW 0x0001B308 +#define CS42L43_PLL_INT_SHADOW 0x0001B30C +#define CS42L43_SOFT_INT_SHADOW 0x0001B310 +#define CS42L43_SWIRE_INT_SHADOW 0x0001B314 +#define CS42L43_MSM_INT_SHADOW 0x0001B318 +#define CS42L43_ACC_DET_INT_SHADOW 0x0001B31C +#define CS42L43_I2C_TGT_INT_SHADOW 0x0001B320 +#define CS42L43_SPI_MSTR_INT_SHADOW 0x0001B324 +#define CS42L43_SW_TO_SPI_BRIDGE_SHADOW 0x0001B328 +#define CS42L43_OTP_INT_SHADOW 0x0001B32C +#define CS42L43_CLASS_D_AMP_INT_SHADOW 0x0001B330 +#define CS42L43_GPIO_SHADOW 0x0001B334 +#define CS42L43_ASRC_SHADOW 0x0001B338 +#define CS42L43_HP_OUT_SHADOW 0x0001B33C +#define CS42L43_BOOT_CONTROL 0x00101000 +#define CS42L43_BLOCK_EN 0x00101008 +#define CS42L43_SHUTTER_CONTROL 0x0010100C +#define CS42L43_MCU_SW_REV 0x00114000 +#define CS42L43_PATCH_START_ADDR 0x00114004 +#define CS42L43_NEED_CONFIGS 0x0011400C +#define CS42L43_BOOT_STATUS 0x0011401C +#define CS42L43_FW_SH_BOOT_CFG_NEED_CONFIGS 0x0011F8F8 +#define CS42L43_FW_MISSION_CTRL_NEED_CONFIGS 0x0011FE00 +#define CS42L43_FW_MISSION_CTRL_HAVE_CONFIGS 0x0011FE04 +#define CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION 0x0011FE0C +#define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG 0x0011FE10 +#define CS42L43_MCU_RAM_MAX 0x0011FFFF + +/* CS42L43_DEVID */ +#define CS42L43_DEVID_VAL 0x00042A43 + +/* CS42L43_GEN_INT_STAT_1 */ +#define CS42L43_INT_STAT_GEN1_MASK 0x00000001 +#define CS42L43_INT_STAT_GEN1_SHIFT 0 + +/* CS42L43_SFT_RESET */ +#define CS42L43_SFT_RESET_MASK 0xFF000000 +#define CS42L43_SFT_RESET_SHIFT 24 + +#define CS42L43_SFT_RESET_VAL 0x5A000000 + +/* CS42L43_DRV_CTRL1 */ +#define CS42L43_ASP_DOUT_DRV_MASK 0x00038000 +#define CS42L43_ASP_DOUT_DRV_SHIFT 15 +#define CS42L43_ASP_FSYNC_DRV_MASK 0x00000E00 +#define CS42L43_ASP_FSYNC_DRV_SHIFT 9 +#define CS42L43_ASP_BCLK_DRV_MASK 0x000001C0 +#define CS42L43_ASP_BCLK_DRV_SHIFT 6 + +/* CS42L43_DRV_CTRL3 */ +#define CS42L43_I2C_ADDR_DRV_MASK 0x30000000 +#define CS42L43_I2C_ADDR_DRV_SHIFT 28 +#define CS42L43_I2C_SDA_DRV_MASK 0x0C000000 +#define CS42L43_I2C_SDA_DRV_SHIFT 26 +#define CS42L43_PDMOUT2_CLK_DRV_MASK 0x00E00000 +#define CS42L43_PDMOUT2_CLK_DRV_SHIFT 21 +#define CS42L43_PDMOUT2_DATA_DRV_MASK 0x001C0000 +#define CS42L43_PDMOUT2_DATA_DRV_SHIFT 18 +#define CS42L43_PDMOUT1_CLK_DRV_MASK 0x00038000 +#define CS42L43_PDMOUT1_CLK_DRV_SHIFT 15 +#define CS42L43_PDMOUT1_DATA_DRV_MASK 0x00007000 +#define CS42L43_PDMOUT1_DATA_DRV_SHIFT 12 +#define CS42L43_SPI_MISO_DRV_MASK 0x00000038 +#define CS42L43_SPI_MISO_DRV_SHIFT 3 + +/* CS42L43_DRV_CTRL4 */ +#define CS42L43_GPIO3_DRV_MASK 0x00000E00 +#define CS42L43_GPIO3_DRV_SHIFT 9 +#define CS42L43_GPIO2_DRV_MASK 0x000001C0 +#define CS42L43_GPIO2_DRV_SHIFT 6 +#define CS42L43_GPIO1_DRV_MASK 0x00000038 +#define CS42L43_GPIO1_DRV_SHIFT 3 + +/* CS42L43_DRV_CTRL_5 */ +#define CS42L43_I2C_SCL_DRV_MASK 0x18000000 +#define CS42L43_I2C_SCL_DRV_SHIFT 27 +#define CS42L43_SPI_SCK_DRV_MASK 0x07000000 +#define CS42L43_SPI_SCK_DRV_SHIFT 24 +#define CS42L43_SPI_MOSI_DRV_MASK 0x00E00000 +#define CS42L43_SPI_MOSI_DRV_SHIFT 21 +#define CS42L43_SPI_SSB_DRV_MASK 0x001C0000 +#define CS42L43_SPI_SSB_DRV_SHIFT 18 +#define CS42L43_ASP_DIN_DRV_MASK 0x000001C0 +#define CS42L43_ASP_DIN_DRV_SHIFT 6 + +/* CS42L43_GPIO_CTRL1 */ +#define CS42L43_GPIO3_POL_MASK 0x00040000 +#define CS42L43_GPIO3_POL_SHIFT 18 +#define CS42L43_GPIO2_POL_MASK 0x00020000 +#define CS42L43_GPIO2_POL_SHIFT 17 +#define CS42L43_GPIO1_POL_MASK 0x00010000 +#define CS42L43_GPIO1_POL_SHIFT 16 +#define CS42L43_GPIO3_LVL_MASK 0x00000400 +#define CS42L43_GPIO3_LVL_SHIFT 10 +#define CS42L43_GPIO2_LVL_MASK 0x00000200 +#define CS42L43_GPIO2_LVL_SHIFT 9 +#define CS42L43_GPIO1_LVL_MASK 0x00000100 +#define CS42L43_GPIO1_LVL_SHIFT 8 +#define CS42L43_GPIO3_DIR_MASK 0x00000004 +#define CS42L43_GPIO3_DIR_SHIFT 2 +#define CS42L43_GPIO2_DIR_MASK 0x00000002 +#define CS42L43_GPIO2_DIR_SHIFT 1 +#define CS42L43_GPIO1_DIR_MASK 0x00000001 +#define CS42L43_GPIO1_DIR_SHIFT 0 + +/* CS42L43_GPIO_CTRL2 */ +#define CS42L43_GPIO3_DEGLITCH_BYP_MASK 0x00000004 +#define CS42L43_GPIO3_DEGLITCH_BYP_SHIFT 2 +#define CS42L43_GPIO2_DEGLITCH_BYP_MASK 0x00000002 +#define CS42L43_GPIO2_DEGLITCH_BYP_SHIFT 1 +#define CS42L43_GPIO1_DEGLITCH_BYP_MASK 0x00000001 +#define CS42L43_GPIO1_DEGLITCH_BYP_SHIFT 0 + +/* CS42L43_GPIO_STS */ +#define CS42L43_GPIO3_STS_MASK 0x00000004 +#define CS42L43_GPIO3_STS_SHIFT 2 +#define CS42L43_GPIO2_STS_MASK 0x00000002 +#define CS42L43_GPIO2_STS_SHIFT 1 +#define CS42L43_GPIO1_STS_MASK 0x00000001 +#define CS42L43_GPIO1_STS_SHIFT 0 + +/* CS42L43_GPIO_FN_SEL */ +#define CS42L43_GPIO3_FN_SEL_MASK 0x00000004 +#define CS42L43_GPIO3_FN_SEL_SHIFT 2 +#define CS42L43_GPIO1_FN_SEL_MASK 0x00000001 +#define CS42L43_GPIO1_FN_SEL_SHIFT 0 + +/* CS42L43_MCLK_SRC_SEL */ +#define CS42L43_OSC_PLL_MCLK_SEL_MASK 0x00000001 +#define CS42L43_OSC_PLL_MCLK_SEL_SHIFT 0 + +/* CS42L43_SAMPLE_RATE1..CS42L43_SAMPLE_RATE4 */ +#define CS42L43_SAMPLE_RATE_MASK 0x0000001F +#define CS42L43_SAMPLE_RATE_SHIFT 0 + +/* CS42L43_PLL_CONTROL */ +#define CS42L43_PLL_REFCLK_EN_MASK 0x00000008 +#define CS42L43_PLL_REFCLK_EN_SHIFT 3 +#define CS42L43_PLL_REFCLK_DIV_MASK 0x00000006 +#define CS42L43_PLL_REFCLK_DIV_SHIFT 1 +#define CS42L43_PLL_REFCLK_SRC_MASK 0x00000001 +#define CS42L43_PLL_REFCLK_SRC_SHIFT 0 + +/* CS42L43_FS_SELECT1 */ +#define CS42L43_ASP_RATE_MASK 0x00000003 +#define CS42L43_ASP_RATE_SHIFT 0 + +/* CS42L43_FS_SELECT2 */ +#define CS42L43_ASRC_DEC_OUT_RATE_MASK 0x000000C0 +#define CS42L43_ASRC_DEC_OUT_RATE_SHIFT 6 +#define CS42L43_ASRC_INT_OUT_RATE_MASK 0x00000030 +#define CS42L43_ASRC_INT_OUT_RATE_SHIFT 4 +#define CS42L43_ASRC_DEC_IN_RATE_MASK 0x0000000C +#define CS42L43_ASRC_DEC_IN_RATE_SHIFT 2 +#define CS42L43_ASRC_INT_IN_RATE_MASK 0x00000003 +#define CS42L43_ASRC_INT_IN_RATE_SHIFT 0 + +/* CS42L43_FS_SELECT3 */ +#define CS42L43_HPOUT_RATE_MASK 0x0000C000 +#define CS42L43_HPOUT_RATE_SHIFT 14 +#define CS42L43_EQZ_RATE_MASK 0x00003000 +#define CS42L43_EQZ_RATE_SHIFT 12 +#define CS42L43_DIAGGEN_RATE_MASK 0x00000C00 +#define CS42L43_DIAGGEN_RATE_SHIFT 10 +#define CS42L43_DECIM_CH4_RATE_MASK 0x00000300 +#define CS42L43_DECIM_CH4_RATE_SHIFT 8 +#define CS42L43_DECIM_CH3_RATE_MASK 0x000000C0 +#define CS42L43_DECIM_CH3_RATE_SHIFT 6 +#define CS42L43_DECIM_CH2_RATE_MASK 0x00000030 +#define CS42L43_DECIM_CH2_RATE_SHIFT 4 +#define CS42L43_DECIM_CH1_RATE_MASK 0x0000000C +#define CS42L43_DECIM_CH1_RATE_SHIFT 2 +#define CS42L43_AMP1_2_RATE_MASK 0x00000003 +#define CS42L43_AMP1_2_RATE_SHIFT 0 + +/* CS42L43_FS_SELECT4 */ +#define CS42L43_SW_DP7_RATE_MASK 0x00C00000 +#define CS42L43_SW_DP7_RATE_SHIFT 22 +#define CS42L43_SW_DP6_RATE_MASK 0x00300000 +#define CS42L43_SW_DP6_RATE_SHIFT 20 +#define CS42L43_SPDIF_RATE_MASK 0x000C0000 +#define CS42L43_SPDIF_RATE_SHIFT 18 +#define CS42L43_SW_DP5_RATE_MASK 0x00030000 +#define CS42L43_SW_DP5_RATE_SHIFT 16 +#define CS42L43_SW_DP4_RATE_MASK 0x0000C000 +#define CS42L43_SW_DP4_RATE_SHIFT 14 +#define CS42L43_SW_DP3_RATE_MASK 0x00003000 +#define CS42L43_SW_DP3_RATE_SHIFT 12 +#define CS42L43_SW_DP2_RATE_MASK 0x00000C00 +#define CS42L43_SW_DP2_RATE_SHIFT 10 +#define CS42L43_SW_DP1_RATE_MASK 0x00000300 +#define CS42L43_SW_DP1_RATE_SHIFT 8 +#define CS42L43_ISRC2_LOW_RATE_MASK 0x000000C0 +#define CS42L43_ISRC2_LOW_RATE_SHIFT 6 +#define CS42L43_ISRC2_HIGH_RATE_MASK 0x00000030 +#define CS42L43_ISRC2_HIGH_RATE_SHIFT 4 +#define CS42L43_ISRC1_LOW_RATE_MASK 0x0000000C +#define CS42L43_ISRC1_LOW_RATE_SHIFT 2 +#define CS42L43_ISRC1_HIGH_RATE_MASK 0x00000003 +#define CS42L43_ISRC1_HIGH_RATE_SHIFT 0 + +/* CS42L43_PDM_CONTROL */ +#define CS42L43_PDM2_CLK_DIV_MASK 0x0000000C +#define CS42L43_PDM2_CLK_DIV_SHIFT 2 +#define CS42L43_PDM1_CLK_DIV_MASK 0x00000003 +#define CS42L43_PDM1_CLK_DIV_SHIFT 0 + +/* CS42L43_ASP_CLK_CONFIG1 */ +#define CS42L43_ASP_BCLK_N_MASK 0x03FF0000 +#define CS42L43_ASP_BCLK_N_SHIFT 16 +#define CS42L43_ASP_BCLK_M_MASK 0x000003FF +#define CS42L43_ASP_BCLK_M_SHIFT 0 + +/* CS42L43_ASP_CLK_CONFIG2 */ +#define CS42L43_ASP_MASTER_MODE_MASK 0x00000002 +#define CS42L43_ASP_MASTER_MODE_SHIFT 1 +#define CS42L43_ASP_BCLK_INV_MASK 0x00000001 +#define CS42L43_ASP_BCLK_INV_SHIFT 0 + +/* CS42L43_OSC_DIV_SEL */ +#define CS42L43_OSC_DIV2_EN_MASK 0x00000001 +#define CS42L43_OSC_DIV2_EN_SHIFT 0 + +/* CS42L43_ADC_B_CTRL1..CS42L43_ADC_B_CTRL1 */ +#define CS42L43_PGA_WIDESWING_MODE_EN_MASK 0x00000080 +#define CS42L43_PGA_WIDESWING_MODE_EN_SHIFT 7 +#define CS42L43_ADC_AIN_SEL_MASK 0x00000010 +#define CS42L43_ADC_AIN_SEL_SHIFT 4 +#define CS42L43_ADC_PGA_GAIN_MASK 0x0000000F +#define CS42L43_ADC_PGA_GAIN_SHIFT 0 + +/* CS42L43_DECIM_HPF_WNF_CTRL1..CS42L43_DECIM_HPF_WNF_CTRL4 */ +#define CS42L43_DECIM_WNF_CF_MASK 0x00000070 +#define CS42L43_DECIM_WNF_CF_SHIFT 4 +#define CS42L43_DECIM_WNF_EN_MASK 0x00000008 +#define CS42L43_DECIM_WNF_EN_SHIFT 3 +#define CS42L43_DECIM_HPF_CF_MASK 0x00000006 +#define CS42L43_DECIM_HPF_CF_SHIFT 1 +#define CS42L43_DECIM_HPF_EN_MASK 0x00000001 +#define CS42L43_DECIM_HPF_EN_SHIFT 0 + +/* CS42L43_DMIC_PDM_CTRL */ +#define CS42L43_PDM2R_INV_MASK 0x00000020 +#define CS42L43_PDM2R_INV_SHIFT 5 +#define CS42L43_PDM2L_INV_MASK 0x00000010 +#define CS42L43_PDM2L_INV_SHIFT 4 +#define CS42L43_PDM1R_INV_MASK 0x00000008 +#define CS42L43_PDM1R_INV_SHIFT 3 +#define CS42L43_PDM1L_INV_MASK 0x00000004 +#define CS42L43_PDM1L_INV_SHIFT 2 + +/* CS42L43_DECIM_VOL_CTRL_CH1_CH2 */ +#define CS42L43_DECIM2_MUTE_MASK 0x80000000 +#define CS42L43_DECIM2_MUTE_SHIFT 31 +#define CS42L43_DECIM2_VOL_MASK 0x3FC00000 +#define CS42L43_DECIM2_VOL_SHIFT 22 +#define CS42L43_DECIM2_VD_RAMP_MASK 0x00380000 +#define CS42L43_DECIM2_VD_RAMP_SHIFT 19 +#define CS42L43_DECIM2_VI_RAMP_MASK 0x00070000 +#define CS42L43_DECIM2_VI_RAMP_SHIFT 16 +#define CS42L43_DECIM1_MUTE_MASK 0x00008000 +#define CS42L43_DECIM1_MUTE_SHIFT 15 +#define CS42L43_DECIM1_VOL_MASK 0x00003FC0 +#define CS42L43_DECIM1_VOL_SHIFT 6 +#define CS42L43_DECIM1_VD_RAMP_MASK 0x00000038 +#define CS42L43_DECIM1_VD_RAMP_SHIFT 3 +#define CS42L43_DECIM1_VI_RAMP_MASK 0x00000007 +#define CS42L43_DECIM1_VI_RAMP_SHIFT 0 + +/* CS42L43_DECIM_VOL_CTRL_CH3_CH4 */ +#define CS42L43_DECIM4_MUTE_MASK 0x80000000 +#define CS42L43_DECIM4_MUTE_SHIFT 31 +#define CS42L43_DECIM4_VOL_MASK 0x3FC00000 +#define CS42L43_DECIM4_VOL_SHIFT 22 +#define CS42L43_DECIM4_VD_RAMP_MASK 0x00380000 +#define CS42L43_DECIM4_VD_RAMP_SHIFT 19 +#define CS42L43_DECIM4_VI_RAMP_MASK 0x00070000 +#define CS42L43_DECIM4_VI_RAMP_SHIFT 16 +#define CS42L43_DECIM3_MUTE_MASK 0x00008000 +#define CS42L43_DECIM3_MUTE_SHIFT 15 +#define CS42L43_DECIM3_VOL_MASK 0x00003FC0 +#define CS42L43_DECIM3_VOL_SHIFT 6 +#define CS42L43_DECIM3_VD_RAMP_MASK 0x00000038 +#define CS42L43_DECIM3_VD_RAMP_SHIFT 3 +#define CS42L43_DECIM3_VI_RAMP_MASK 0x00000007 +#define CS42L43_DECIM3_VI_RAMP_SHIFT 0 + +/* CS42L43_DECIM_VOL_CTRL_UPDATE */ +#define CS42L43_DECIM4_VOL_UPDATE_MASK 0x00000008 +#define CS42L43_DECIM4_VOL_UPDATE_SHIFT 3 +#define CS42L43_DECIM3_VOL_UPDATE_MASK 0x00000004 +#define CS42L43_DECIM3_VOL_UPDATE_SHIFT 2 +#define CS42L43_DECIM2_VOL_UPDATE_MASK 0x00000002 +#define CS42L43_DECIM2_VOL_UPDATE_SHIFT 1 +#define CS42L43_DECIM1_VOL_UPDATE_MASK 0x00000001 +#define CS42L43_DECIM1_VOL_UPDATE_SHIFT 0 + +/* CS42L43_INTP_VOLUME_CTRL1..CS42L43_INTP_VOLUME_CTRL2 */ +#define CS42L43_AMP1_2_VU_MASK 0x00000200 +#define CS42L43_AMP1_2_VU_SHIFT 9 +#define CS42L43_AMP_MUTE_MASK 0x00000100 +#define CS42L43_AMP_MUTE_SHIFT 8 +#define CS42L43_AMP_VOL_MASK 0x000000FF +#define CS42L43_AMP_VOL_SHIFT 0 + +/* CS42L43_AMP1_2_VOL_RAMP */ +#define CS42L43_AMP1_2_VD_RAMP_MASK 0x00000070 +#define CS42L43_AMP1_2_VD_RAMP_SHIFT 4 +#define CS42L43_AMP1_2_VI_RAMP_MASK 0x00000007 +#define CS42L43_AMP1_2_VI_RAMP_SHIFT 0 + +/* CS42L43_ASP_CTRL */ +#define CS42L43_ASP_FSYNC_MODE_MASK 0x00000004 +#define CS42L43_ASP_FSYNC_MODE_SHIFT 2 +#define CS42L43_ASP_BCLK_EN_MASK 0x00000002 +#define CS42L43_ASP_BCLK_EN_SHIFT 1 +#define CS42L43_ASP_FSYNC_EN_MASK 0x00000001 +#define CS42L43_ASP_FSYNC_EN_SHIFT 0 + +/* CS42L43_ASP_FSYNC_CTRL1 */ +#define CS42L43_ASP_FSYNC_M_MASK 0x0007FFFF +#define CS42L43_ASP_FSYNC_M_SHIFT 0 + +/* CS42L43_ASP_FSYNC_CTRL3 */ +#define CS42L43_ASP_FSYNC_IN_INV_MASK 0x00000002 +#define CS42L43_ASP_FSYNC_IN_INV_SHIFT 1 +#define CS42L43_ASP_FSYNC_OUT_INV_MASK 0x00000001 +#define CS42L43_ASP_FSYNC_OUT_INV_SHIFT 0 + +/* CS42L43_ASP_FSYNC_CTRL4 */ +#define CS42L43_ASP_NUM_BCLKS_PER_FSYNC_MASK 0x00001FFE +#define CS42L43_ASP_NUM_BCLKS_PER_FSYNC_SHIFT 1 + +/* CS42L43_ASP_DATA_CTRL */ +#define CS42L43_ASP_FSYNC_FRAME_START_PHASE_MASK 0x00000008 +#define CS42L43_ASP_FSYNC_FRAME_START_PHASE_SHIFT 3 +#define CS42L43_ASP_FSYNC_FRAME_START_DLY_MASK 0x00000007 +#define CS42L43_ASP_FSYNC_FRAME_START_DLY_SHIFT 0 + +/* CS42L43_ASP_RX_EN */ +#define CS42L43_ASP_RX_CH6_EN_MASK 0x00000020 +#define CS42L43_ASP_RX_CH6_EN_SHIFT 5 +#define CS42L43_ASP_RX_CH5_EN_MASK 0x00000010 +#define CS42L43_ASP_RX_CH5_EN_SHIFT 4 +#define CS42L43_ASP_RX_CH4_EN_MASK 0x00000008 +#define CS42L43_ASP_RX_CH4_EN_SHIFT 3 +#define CS42L43_ASP_RX_CH3_EN_MASK 0x00000004 +#define CS42L43_ASP_RX_CH3_EN_SHIFT 2 +#define CS42L43_ASP_RX_CH2_EN_MASK 0x00000002 +#define CS42L43_ASP_RX_CH2_EN_SHIFT 1 +#define CS42L43_ASP_RX_CH1_EN_MASK 0x00000001 +#define CS42L43_ASP_RX_CH1_EN_SHIFT 0 + +/* CS42L43_ASP_TX_EN */ +#define CS42L43_ASP_TX_CH6_EN_MASK 0x00000020 +#define CS42L43_ASP_TX_CH6_EN_SHIFT 5 +#define CS42L43_ASP_TX_CH5_EN_MASK 0x00000010 +#define CS42L43_ASP_TX_CH5_EN_SHIFT 4 +#define CS42L43_ASP_TX_CH4_EN_MASK 0x00000008 +#define CS42L43_ASP_TX_CH4_EN_SHIFT 3 +#define CS42L43_ASP_TX_CH3_EN_MASK 0x00000004 +#define CS42L43_ASP_TX_CH3_EN_SHIFT 2 +#define CS42L43_ASP_TX_CH2_EN_MASK 0x00000002 +#define CS42L43_ASP_TX_CH2_EN_SHIFT 1 +#define CS42L43_ASP_TX_CH1_EN_MASK 0x00000001 +#define CS42L43_ASP_TX_CH1_EN_SHIFT 0 + +/* CS42L43_ASP_RX_CH1_CTRL..CS42L43_ASP_TX_CH6_CTRL */ +#define CS42L43_ASP_CH_WIDTH_MASK 0x001F0000 +#define CS42L43_ASP_CH_WIDTH_SHIFT 16 +#define CS42L43_ASP_CH_SLOT_MASK 0x00001FFE +#define CS42L43_ASP_CH_SLOT_SHIFT 1 +#define CS42L43_ASP_CH_SLOT_PHASE_MASK 0x00000001 +#define CS42L43_ASP_CH_SLOT_PHASE_SHIFT 0 + +/* CS42L43_ASPTX1_INPUT..CS42L43_AMP4MIX_INPUT4 */ +#define CS42L43_MIXER_VOL_MASK 0x00FE0000 +#define CS42L43_MIXER_VOL_SHIFT 17 +#define CS42L43_MIXER_SRC_MASK 0x000001FF +#define CS42L43_MIXER_SRC_SHIFT 0 + +/* CS42L43_ASRC_INT_ENABLES */ +#define CS42L43_ASRC_INT4_EN_MASK 0x00000008 +#define CS42L43_ASRC_INT4_EN_SHIFT 3 +#define CS42L43_ASRC_INT3_EN_MASK 0x00000004 +#define CS42L43_ASRC_INT3_EN_SHIFT 2 +#define CS42L43_ASRC_INT2_EN_MASK 0x00000002 +#define CS42L43_ASRC_INT2_EN_SHIFT 1 +#define CS42L43_ASRC_INT1_EN_MASK 0x00000001 +#define CS42L43_ASRC_INT1_EN_SHIFT 0 + +/* CS42L43_ASRC_DEC_ENABLES */ +#define CS42L43_ASRC_DEC4_EN_MASK 0x00000008 +#define CS42L43_ASRC_DEC4_EN_SHIFT 3 +#define CS42L43_ASRC_DEC3_EN_MASK 0x00000004 +#define CS42L43_ASRC_DEC3_EN_SHIFT 2 +#define CS42L43_ASRC_DEC2_EN_MASK 0x00000002 +#define CS42L43_ASRC_DEC2_EN_SHIFT 1 +#define CS42L43_ASRC_DEC1_EN_MASK 0x00000001 +#define CS42L43_ASRC_DEC1_EN_SHIFT 0 + +/* CS42L43_PDNCNTL */ +#define CS42L43_RING_SENSE_EN_MASK 0x00000002 +#define CS42L43_RING_SENSE_EN_SHIFT 1 + +/* CS42L43_RINGSENSE_DEB_CTRL */ +#define CS42L43_RINGSENSE_INV_MASK 0x00000080 +#define CS42L43_RINGSENSE_INV_SHIFT 7 +#define CS42L43_RINGSENSE_PULLUP_PDNB_MASK 0x00000040 +#define CS42L43_RINGSENSE_PULLUP_PDNB_SHIFT 6 +#define CS42L43_RINGSENSE_FALLING_DB_TIME_MASK 0x00000038 +#define CS42L43_RINGSENSE_FALLING_DB_TIME_SHIFT 3 +#define CS42L43_RINGSENSE_RISING_DB_TIME_MASK 0x00000007 +#define CS42L43_RINGSENSE_RISING_DB_TIME_SHIFT 0 + +/* CS42L43_TIPSENSE_DEB_CTRL */ +#define CS42L43_TIPSENSE_INV_MASK 0x00000080 +#define CS42L43_TIPSENSE_INV_SHIFT 7 +#define CS42L43_TIPSENSE_FALLING_DB_TIME_MASK 0x00000038 +#define CS42L43_TIPSENSE_FALLING_DB_TIME_SHIFT 3 +#define CS42L43_TIPSENSE_RISING_DB_TIME_MASK 0x00000007 +#define CS42L43_TIPSENSE_RISING_DB_TIME_SHIFT 0 + +/* CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS */ +#define CS42L43_TIPSENSE_UNPLUG_DB_STS_MASK 0x00000008 +#define CS42L43_TIPSENSE_UNPLUG_DB_STS_SHIFT 3 +#define CS42L43_TIPSENSE_PLUG_DB_STS_MASK 0x00000004 +#define CS42L43_TIPSENSE_PLUG_DB_STS_SHIFT 2 +#define CS42L43_RINGSENSE_UNPLUG_DB_STS_MASK 0x00000002 +#define CS42L43_RINGSENSE_UNPLUG_DB_STS_SHIFT 1 +#define CS42L43_RINGSENSE_PLUG_DB_STS_MASK 0x00000001 +#define CS42L43_RINGSENSE_PLUG_DB_STS_SHIFT 0 + +/* CS42L43_HS2 */ +#define CS42L43_HS_CLAMP_DISABLE_MASK 0x10000000 +#define CS42L43_HS_CLAMP_DISABLE_SHIFT 28 +#define CS42L43_HSBIAS_RAMP_MASK 0x0C000000 +#define CS42L43_HSBIAS_RAMP_SHIFT 26 +#define CS42L43_HSDET_MODE_MASK 0x00018000 +#define CS42L43_HSDET_MODE_SHIFT 15 +#define CS42L43_HSDET_MANUAL_MODE_MASK 0x00006000 +#define CS42L43_HSDET_MANUAL_MODE_SHIFT 13 +#define CS42L43_AUTO_HSDET_TIME_MASK 0x00000700 +#define CS42L43_AUTO_HSDET_TIME_SHIFT 8 +#define CS42L43_AMP3_4_GNDREF_HS3_SEL_MASK 0x00000080 +#define CS42L43_AMP3_4_GNDREF_HS3_SEL_SHIFT 7 +#define CS42L43_AMP3_4_GNDREF_HS4_SEL_MASK 0x00000040 +#define CS42L43_AMP3_4_GNDREF_HS4_SEL_SHIFT 6 +#define CS42L43_HSBIAS_GNDREF_HS3_SEL_MASK 0x00000020 +#define CS42L43_HSBIAS_GNDREF_HS3_SEL_SHIFT 5 +#define CS42L43_HSBIAS_GNDREF_HS4_SEL_MASK 0x00000010 +#define CS42L43_HSBIAS_GNDREF_HS4_SEL_SHIFT 4 +#define CS42L43_HSBIAS_OUT_HS3_SEL_MASK 0x00000008 +#define CS42L43_HSBIAS_OUT_HS3_SEL_SHIFT 3 +#define CS42L43_HSBIAS_OUT_HS4_SEL_MASK 0x00000004 +#define CS42L43_HSBIAS_OUT_HS4_SEL_SHIFT 2 +#define CS42L43_HSGND_HS3_SEL_MASK 0x00000002 +#define CS42L43_HSGND_HS3_SEL_SHIFT 1 +#define CS42L43_HSGND_HS4_SEL_MASK 0x00000001 +#define CS42L43_HSGND_HS4_SEL_SHIFT 0 + +/* CS42L43_HS_STAT */ +#define CS42L43_HSDET_TYPE_STS_MASK 0x00000007 +#define CS42L43_HSDET_TYPE_STS_SHIFT 0 + +/* CS42L43_MCU_SW_INTERRUPT */ +#define CS42L43_CONTROL_IND_MASK 0x00000004 +#define CS42L43_CONTROL_IND_SHIFT 2 +#define CS42L43_CONFIGS_IND_MASK 0x00000002 +#define CS42L43_CONFIGS_IND_SHIFT 1 +#define CS42L43_PATCH_IND_MASK 0x00000001 +#define CS42L43_PATCH_IND_SHIFT 0 + +/* CS42L43_STEREO_MIC_CTRL */ +#define CS42L43_HS2_BIAS_SENSE_EN_MASK 0x00000020 +#define CS42L43_HS2_BIAS_SENSE_EN_SHIFT 5 +#define CS42L43_HS1_BIAS_SENSE_EN_MASK 0x00000010 +#define CS42L43_HS1_BIAS_SENSE_EN_SHIFT 4 +#define CS42L43_HS2_BIAS_EN_MASK 0x00000008 +#define CS42L43_HS2_BIAS_EN_SHIFT 3 +#define CS42L43_HS1_BIAS_EN_MASK 0x00000004 +#define CS42L43_HS1_BIAS_EN_SHIFT 2 +#define CS42L43_JACK_STEREO_CONFIG_MASK 0x00000003 +#define CS42L43_JACK_STEREO_CONFIG_SHIFT 0 + +/* CS42L43_STEREO_MIC_CLAMP_CTRL */ +#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK 0x00000002 +#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_SHIFT 1 +#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_MASK 0x00000001 +#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_SHIFT 0 + +/* CS42L43_BLOCK_EN2 */ +#define CS42L43_SPI_MSTR_EN_MASK 0x00000001 +#define CS42L43_SPI_MSTR_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN3 */ +#define CS42L43_PDM2_DIN_R_EN_MASK 0x00000020 +#define CS42L43_PDM2_DIN_R_EN_SHIFT 5 +#define CS42L43_PDM2_DIN_L_EN_MASK 0x00000010 +#define CS42L43_PDM2_DIN_L_EN_SHIFT 4 +#define CS42L43_PDM1_DIN_R_EN_MASK 0x00000008 +#define CS42L43_PDM1_DIN_R_EN_SHIFT 3 +#define CS42L43_PDM1_DIN_L_EN_MASK 0x00000004 +#define CS42L43_PDM1_DIN_L_EN_SHIFT 2 +#define CS42L43_ADC2_EN_MASK 0x00000002 +#define CS42L43_ADC2_EN_SHIFT 1 +#define CS42L43_ADC1_EN_MASK 0x00000001 +#define CS42L43_ADC1_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN4 */ +#define CS42L43_ASRC_DEC_BANK_EN_MASK 0x00000002 +#define CS42L43_ASRC_DEC_BANK_EN_SHIFT 1 +#define CS42L43_ASRC_INT_BANK_EN_MASK 0x00000001 +#define CS42L43_ASRC_INT_BANK_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN5 */ +#define CS42L43_ISRC2_BANK_EN_MASK 0x00000002 +#define CS42L43_ISRC2_BANK_EN_SHIFT 1 +#define CS42L43_ISRC1_BANK_EN_MASK 0x00000001 +#define CS42L43_ISRC1_BANK_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN6 */ +#define CS42L43_MIXER_EN_MASK 0x00000001 +#define CS42L43_MIXER_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN7 */ +#define CS42L43_EQ_EN_MASK 0x00000001 +#define CS42L43_EQ_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN8 */ +#define CS42L43_HP_EN_MASK 0x00000001 +#define CS42L43_HP_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN9 */ +#define CS42L43_TONE_EN_MASK 0x00000001 +#define CS42L43_TONE_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN10 */ +#define CS42L43_AMP2_EN_MASK 0x00000002 +#define CS42L43_AMP2_EN_SHIFT 1 +#define CS42L43_AMP1_EN_MASK 0x00000001 +#define CS42L43_AMP1_EN_SHIFT 0 + +/* CS42L43_BLOCK_EN11 */ +#define CS42L43_SPDIF_EN_MASK 0x00000001 +#define CS42L43_SPDIF_EN_SHIFT 0 + +/* CS42L43_TONE_CH1_CTRL..CS42L43_TONE_CH2_CTRL */ +#define CS42L43_TONE_FREQ_MASK 0x00000070 +#define CS42L43_TONE_FREQ_SHIFT 4 +#define CS42L43_TONE_SEL_MASK 0x0000000F +#define CS42L43_TONE_SEL_SHIFT 0 + +/* CS42L43_MIC_DETECT_CONTROL_1 */ +#define CS42L43_BUTTON_DETECT_MODE_MASK 0x00000018 +#define CS42L43_BUTTON_DETECT_MODE_SHIFT 3 +#define CS42L43_HSBIAS_MODE_MASK 0x00000006 +#define CS42L43_HSBIAS_MODE_SHIFT 1 +#define CS42L43_MIC_LVL_DET_DISABLE_MASK 0x00000001 +#define CS42L43_MIC_LVL_DET_DISABLE_SHIFT 0 + +/* CS42L43_DETECT_STATUS_1 */ +#define CS42L43_HSDET_DC_STS_MASK 0x01FF0000 +#define CS42L43_HSDET_DC_STS_SHIFT 16 +#define CS42L43_JACKDET_STS_MASK 0x00000080 +#define CS42L43_JACKDET_STS_SHIFT 7 +#define CS42L43_HSBIAS_CLAMP_STS_MASK 0x00000040 +#define CS42L43_HSBIAS_CLAMP_STS_SHIFT 6 + +/* CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL */ +#define CS42L43_JACKDET_MODE_MASK 0xC0000000 +#define CS42L43_JACKDET_MODE_SHIFT 30 +#define CS42L43_JACKDET_INV_MASK 0x20000000 +#define CS42L43_JACKDET_INV_SHIFT 29 +#define CS42L43_JACKDET_DB_TIME_MASK 0x03000000 +#define CS42L43_JACKDET_DB_TIME_SHIFT 24 +#define CS42L43_S0_AUTO_ADCMUTE_DISABLE_MASK 0x00800000 +#define CS42L43_S0_AUTO_ADCMUTE_DISABLE_SHIFT 23 +#define CS42L43_HSBIAS_SENSE_EN_MASK 0x00000080 +#define CS42L43_HSBIAS_SENSE_EN_SHIFT 7 +#define CS42L43_AUTO_HSBIAS_CLAMP_EN_MASK 0x00000040 +#define CS42L43_AUTO_HSBIAS_CLAMP_EN_SHIFT 6 +#define CS42L43_JACKDET_SENSE_EN_MASK 0x00000020 +#define CS42L43_JACKDET_SENSE_EN_SHIFT 5 +#define CS42L43_HSBIAS_SENSE_TRIP_MASK 0x00000007 +#define CS42L43_HSBIAS_SENSE_TRIP_SHIFT 0 + +/* CS42L43_MIC_DETECT_CONTROL_ANDROID */ +#define CS42L43_HSDET_LVL_COMBWIDTH_MASK 0xC0000000 +#define CS42L43_HSDET_LVL_COMBWIDTH_SHIFT 30 +#define CS42L43_HSDET_LVL2_THRESH_MASK 0x01FF0000 +#define CS42L43_HSDET_LVL2_THRESH_SHIFT 16 +#define CS42L43_HSDET_LVL1_THRESH_MASK 0x000001FF +#define CS42L43_HSDET_LVL1_THRESH_SHIFT 0 + +/* CS42L43_ISRC1_CTRL..CS42L43_ISRC2_CTRL */ +#define CS42L43_ISRC_INT2_EN_MASK 0x00000200 +#define CS42L43_ISRC_INT2_EN_SHIFT 9 +#define CS42L43_ISRC_INT1_EN_MASK 0x00000100 +#define CS42L43_ISRC_INT1_EN_SHIFT 8 +#define CS42L43_ISRC_DEC2_EN_MASK 0x00000002 +#define CS42L43_ISRC_DEC2_EN_SHIFT 1 +#define CS42L43_ISRC_DEC1_EN_MASK 0x00000001 +#define CS42L43_ISRC_DEC1_EN_SHIFT 0 + +/* CS42L43_CTRL_REG */ +#define CS42L43_PLL_MODE_BYPASS_500_MASK 0x00000004 +#define CS42L43_PLL_MODE_BYPASS_500_SHIFT 2 +#define CS42L43_PLL_MODE_BYPASS_1029_MASK 0x00000002 +#define CS42L43_PLL_MODE_BYPASS_1029_SHIFT 1 +#define CS42L43_PLL_EN_MASK 0x00000001 +#define CS42L43_PLL_EN_SHIFT 0 + +/* CS42L43_FDIV_FRAC */ +#define CS42L43_PLL_DIV_INT_MASK 0xFF000000 +#define CS42L43_PLL_DIV_INT_SHIFT 24 +#define CS42L43_PLL_DIV_FRAC_BYTE2_MASK 0x00FF0000 +#define CS42L43_PLL_DIV_FRAC_BYTE2_SHIFT 16 +#define CS42L43_PLL_DIV_FRAC_BYTE1_MASK 0x0000FF00 +#define CS42L43_PLL_DIV_FRAC_BYTE1_SHIFT 8 +#define CS42L43_PLL_DIV_FRAC_BYTE0_MASK 0x000000FF +#define CS42L43_PLL_DIV_FRAC_BYTE0_SHIFT 0 + +/* CS42L43_CAL_RATIO */ +#define CS42L43_PLL_CAL_RATIO_MASK 0x000000FF +#define CS42L43_PLL_CAL_RATIO_SHIFT 0 + +/* CS42L43_SPI_CLK_CONFIG1 */ +#define CS42L43_SCLK_DIV_MASK 0x0000000F +#define CS42L43_SCLK_DIV_SHIFT 0 + +/* CS42L43_SPI_CONFIG1 */ +#define CS42L43_SPI_SS_IDLE_DUR_MASK 0x0F000000 +#define CS42L43_SPI_SS_IDLE_DUR_SHIFT 24 +#define CS42L43_SPI_SS_DELAY_DUR_MASK 0x000F0000 +#define CS42L43_SPI_SS_DELAY_DUR_SHIFT 16 +#define CS42L43_SPI_THREE_WIRE_MASK 0x00000100 +#define CS42L43_SPI_THREE_WIRE_SHIFT 8 +#define CS42L43_SPI_DPHA_MASK 0x00000040 +#define CS42L43_SPI_DPHA_SHIFT 6 +#define CS42L43_SPI_CPHA_MASK 0x00000020 +#define CS42L43_SPI_CPHA_SHIFT 5 +#define CS42L43_SPI_CPOL_MASK 0x00000010 +#define CS42L43_SPI_CPOL_SHIFT 4 +#define CS42L43_SPI_SS_SEL_MASK 0x00000007 +#define CS42L43_SPI_SS_SEL_SHIFT 0 + +/* CS42L43_SPI_CONFIG2 */ +#define CS42L43_SPI_SS_FRC_MASK 0x00000001 +#define CS42L43_SPI_SS_FRC_SHIFT 0 + +/* CS42L43_SPI_CONFIG3 */ +#define CS42L43_SPI_WDT_ENA_MASK 0x00000001 +#define CS42L43_SPI_WDT_ENA_SHIFT 0 + +/* CS42L43_SPI_CONFIG4 */ +#define CS42L43_SPI_STALL_ENA_MASK 0x00010000 +#define CS42L43_SPI_STALL_ENA_SHIFT 16 + +/* CS42L43_SPI_STATUS1 */ +#define CS42L43_SPI_ABORT_STS_MASK 0x00000002 +#define CS42L43_SPI_ABORT_STS_SHIFT 1 +#define CS42L43_SPI_DONE_STS_MASK 0x00000001 +#define CS42L43_SPI_DONE_STS_SHIFT 0 + +/* CS42L43_SPI_STATUS2 */ +#define CS42L43_SPI_RX_DONE_STS_MASK 0x00000010 +#define CS42L43_SPI_RX_DONE_STS_SHIFT 4 +#define CS42L43_SPI_TX_DONE_STS_MASK 0x00000001 +#define CS42L43_SPI_TX_DONE_STS_SHIFT 0 + +/* CS42L43_TRAN_CONFIG1 */ +#define CS42L43_SPI_START_MASK 0x00000001 +#define CS42L43_SPI_START_SHIFT 0 + +/* CS42L43_TRAN_CONFIG2 */ +#define CS42L43_SPI_ABORT_MASK 0x00000001 +#define CS42L43_SPI_ABORT_SHIFT 0 + +/* CS42L43_TRAN_CONFIG3 */ +#define CS42L43_SPI_WORD_SIZE_MASK 0x00070000 +#define CS42L43_SPI_WORD_SIZE_SHIFT 16 +#define CS42L43_SPI_CMD_MASK 0x00000003 +#define CS42L43_SPI_CMD_SHIFT 0 + +/* CS42L43_TRAN_CONFIG4 */ +#define CS42L43_SPI_TX_LENGTH_MASK 0x0000FFFF +#define CS42L43_SPI_TX_LENGTH_SHIFT 0 + +/* CS42L43_TRAN_CONFIG5 */ +#define CS42L43_SPI_RX_LENGTH_MASK 0x0000FFFF +#define CS42L43_SPI_RX_LENGTH_SHIFT 0 + +/* CS42L43_TRAN_CONFIG6 */ +#define CS42L43_SPI_TX_BLOCK_LENGTH_MASK 0x0000000F +#define CS42L43_SPI_TX_BLOCK_LENGTH_SHIFT 0 + +/* CS42L43_TRAN_CONFIG7 */ +#define CS42L43_SPI_RX_BLOCK_LENGTH_MASK 0x0000000F +#define CS42L43_SPI_RX_BLOCK_LENGTH_SHIFT 0 + +/* CS42L43_TRAN_CONFIG8 */ +#define CS42L43_SPI_RX_DONE_MASK 0x00000010 +#define CS42L43_SPI_RX_DONE_SHIFT 4 +#define CS42L43_SPI_TX_DONE_MASK 0x00000001 +#define CS42L43_SPI_TX_DONE_SHIFT 0 + +/* CS42L43_TRAN_STATUS1 */ +#define CS42L43_SPI_BUSY_STS_MASK 0x00000100 +#define CS42L43_SPI_BUSY_STS_SHIFT 8 +#define CS42L43_SPI_RX_REQUEST_MASK 0x00000010 +#define CS42L43_SPI_RX_REQUEST_SHIFT 4 +#define CS42L43_SPI_TX_REQUEST_MASK 0x00000001 +#define CS42L43_SPI_TX_REQUEST_SHIFT 0 + +/* CS42L43_TRAN_STATUS2 */ +#define CS42L43_SPI_TX_BYTE_COUNT_MASK 0x0000FFFF +#define CS42L43_SPI_TX_BYTE_COUNT_SHIFT 0 + +/* CS42L43_TRAN_STATUS3 */ +#define CS42L43_SPI_RX_BYTE_COUNT_MASK 0x0000FFFF +#define CS42L43_SPI_RX_BYTE_COUNT_SHIFT 0 + +/* CS42L43_TX_DATA */ +#define CS42L43_SPI_TX_DATA_MASK 0xFFFFFFFF +#define CS42L43_SPI_TX_DATA_SHIFT 0 + +/* CS42L43_RX_DATA */ +#define CS42L43_SPI_RX_DATA_MASK 0xFFFFFFFF +#define CS42L43_SPI_RX_DATA_SHIFT 0 + +/* CS42L43_DACCNFG1 */ +#define CS42L43_HP_MSTR_VOL_CTRL_EN_MASK 0x00000008 +#define CS42L43_HP_MSTR_VOL_CTRL_EN_SHIFT 3 +#define CS42L43_AMP4_INV_MASK 0x00000002 +#define CS42L43_AMP4_INV_SHIFT 1 +#define CS42L43_AMP3_INV_MASK 0x00000001 +#define CS42L43_AMP3_INV_SHIFT 0 + +/* CS42L43_DACCNFG2 */ +#define CS42L43_HP_AUTO_CLAMP_DISABLE_MASK 0x00000002 +#define CS42L43_HP_AUTO_CLAMP_DISABLE_SHIFT 1 +#define CS42L43_HP_HPF_EN_MASK 0x00000001 +#define CS42L43_HP_HPF_EN_SHIFT 0 + +/* CS42L43_HPPATHVOL */ +#define CS42L43_AMP4_PATH_VOL_MASK 0x01FF0000 +#define CS42L43_AMP4_PATH_VOL_SHIFT 16 +#define CS42L43_AMP3_PATH_VOL_MASK 0x000001FF +#define CS42L43_AMP3_PATH_VOL_SHIFT 0 + +/* CS42L43_PGAVOL */ +#define CS42L43_HP_PATH_VOL_RAMP_MASK 0x0003C000 +#define CS42L43_HP_PATH_VOL_RAMP_SHIFT 14 +#define CS42L43_HP_PATH_VOL_ZC_MASK 0x00002000 +#define CS42L43_HP_PATH_VOL_ZC_SHIFT 13 +#define CS42L43_HP_PATH_VOL_SFT_MASK 0x00001000 +#define CS42L43_HP_PATH_VOL_SFT_SHIFT 12 +#define CS42L43_HP_DIG_VOL_RAMP_MASK 0x00000F00 +#define CS42L43_HP_DIG_VOL_RAMP_SHIFT 8 +#define CS42L43_HP_ANA_VOL_RAMP_MASK 0x0000000F +#define CS42L43_HP_ANA_VOL_RAMP_SHIFT 0 + +/* CS42L43_LOADDETRESULTS */ +#define CS42L43_AMP3_RES_DET_MASK 0x00000003 +#define CS42L43_AMP3_RES_DET_SHIFT 0 + +/* CS42L43_LOADDETENA */ +#define CS42L43_HPLOAD_DET_EN_MASK 0x00000001 +#define CS42L43_HPLOAD_DET_EN_SHIFT 0 + +/* CS42L43_CTRL */ +#define CS42L43_ADPTPWR_MODE_MASK 0x00000007 +#define CS42L43_ADPTPWR_MODE_SHIFT 0 + +/* CS42L43_COEFF_RD_WR0 */ +#define CS42L43_WRITE_MODE_MASK 0x00000002 +#define CS42L43_WRITE_MODE_SHIFT 1 + +/* CS42L43_INIT_DONE0 */ +#define CS42L43_INITIALIZE_DONE_MASK 0x00000001 +#define CS42L43_INITIALIZE_DONE_SHIFT 0 + +/* CS42L43_START_EQZ0 */ +#define CS42L43_START_FILTER_MASK 0x00000001 +#define CS42L43_START_FILTER_SHIFT 0 + +/* CS42L43_MUTE_EQ_IN0 */ +#define CS42L43_MUTE_EQ_CH2_MASK 0x00000002 +#define CS42L43_MUTE_EQ_CH2_SHIFT 1 +#define CS42L43_MUTE_EQ_CH1_MASK 0x00000001 +#define CS42L43_MUTE_EQ_CH1_SHIFT 0 + +/* CS42L43_PLL_INT */ +#define CS42L43_PLL_LOST_LOCK_INT_MASK 0x00000002 +#define CS42L43_PLL_LOST_LOCK_INT_SHIFT 1 +#define CS42L43_PLL_READY_INT_MASK 0x00000001 +#define CS42L43_PLL_READY_INT_SHIFT 0 + +/* CS42L43_SOFT_INT */ +#define CS42L43_CONTROL_APPLIED_INT_MASK 0x00000010 +#define CS42L43_CONTROL_APPLIED_INT_SHIFT 4 +#define CS42L43_CONTROL_WARN_INT_MASK 0x00000008 +#define CS42L43_CONTROL_WARN_INT_SHIFT 3 +#define CS42L43_PATCH_WARN_INT_MASK 0x00000002 +#define CS42L43_PATCH_WARN_INT_SHIFT 1 +#define CS42L43_PATCH_APPLIED_INT_MASK 0x00000001 +#define CS42L43_PATCH_APPLIED_INT_SHIFT 0 + +/* CS42L43_MSM_INT */ +#define CS42L43_HP_STARTUP_DONE_INT_MASK 0x00000800 +#define CS42L43_HP_STARTUP_DONE_INT_SHIFT 11 +#define CS42L43_HP_SHUTDOWN_DONE_INT_MASK 0x00000400 +#define CS42L43_HP_SHUTDOWN_DONE_INT_SHIFT 10 +#define CS42L43_HSDET_DONE_INT_MASK 0x00000200 +#define CS42L43_HSDET_DONE_INT_SHIFT 9 +#define CS42L43_TIPSENSE_UNPLUG_DB_INT_MASK 0x00000080 +#define CS42L43_TIPSENSE_UNPLUG_DB_INT_SHIFT 7 +#define CS42L43_TIPSENSE_PLUG_DB_INT_MASK 0x00000040 +#define CS42L43_TIPSENSE_PLUG_DB_INT_SHIFT 6 +#define CS42L43_RINGSENSE_UNPLUG_DB_INT_MASK 0x00000020 +#define CS42L43_RINGSENSE_UNPLUG_DB_INT_SHIFT 5 +#define CS42L43_RINGSENSE_PLUG_DB_INT_MASK 0x00000010 +#define CS42L43_RINGSENSE_PLUG_DB_INT_SHIFT 4 +#define CS42L43_TIPSENSE_UNPLUG_PDET_INT_MASK 0x00000008 +#define CS42L43_TIPSENSE_UNPLUG_PDET_INT_SHIFT 3 +#define CS42L43_TIPSENSE_PLUG_PDET_INT_MASK 0x00000004 +#define CS42L43_TIPSENSE_PLUG_PDET_INT_SHIFT 2 +#define CS42L43_RINGSENSE_UNPLUG_PDET_INT_MASK 0x00000002 +#define CS42L43_RINGSENSE_UNPLUG_PDET_INT_SHIFT 1 +#define CS42L43_RINGSENSE_PLUG_PDET_INT_MASK 0x00000001 +#define CS42L43_RINGSENSE_PLUG_PDET_INT_SHIFT 0 + +/* CS42L43_ACC_DET_INT */ +#define CS42L43_HS2_BIAS_SENSE_INT_MASK 0x00000800 +#define CS42L43_HS2_BIAS_SENSE_INT_SHIFT 11 +#define CS42L43_HS1_BIAS_SENSE_INT_MASK 0x00000400 +#define CS42L43_HS1_BIAS_SENSE_INT_SHIFT 10 +#define CS42L43_DC_DETECT1_FALSE_INT_MASK 0x00000080 +#define CS42L43_DC_DETECT1_FALSE_INT_SHIFT 7 +#define CS42L43_DC_DETECT1_TRUE_INT_MASK 0x00000040 +#define CS42L43_DC_DETECT1_TRUE_INT_SHIFT 6 +#define CS42L43_HSBIAS_CLAMPED_INT_MASK 0x00000008 +#define CS42L43_HSBIAS_CLAMPED_INT_SHIFT 3 +#define CS42L43_HS3_4_BIAS_SENSE_INT_MASK 0x00000001 +#define CS42L43_HS3_4_BIAS_SENSE_INT_SHIFT 0 + +/* CS42L43_SPI_MSTR_INT */ +#define CS42L43_IRQ_SPI_STALLING_INT_MASK 0x00000004 +#define CS42L43_IRQ_SPI_STALLING_INT_SHIFT 2 +#define CS42L43_IRQ_SPI_STS_INT_MASK 0x00000002 +#define CS42L43_IRQ_SPI_STS_INT_SHIFT 1 +#define CS42L43_IRQ_SPI_BLOCK_INT_MASK 0x00000001 +#define CS42L43_IRQ_SPI_BLOCK_INT_SHIFT 0 + +/* CS42L43_SW_TO_SPI_BRIDGE_INT */ +#define CS42L43_SW2SPI_BUF_OVF_UDF_INT_MASK 0x00000001 +#define CS42L43_SW2SPI_BUF_OVF_UDF_INT_SHIFT 0 + +/* CS42L43_CLASS_D_AMP_INT */ +#define CS42L43_AMP2_CLK_STOP_FAULT_INT_MASK 0x00002000 +#define CS42L43_AMP2_CLK_STOP_FAULT_INT_SHIFT 13 +#define CS42L43_AMP1_CLK_STOP_FAULT_INT_MASK 0x00001000 +#define CS42L43_AMP1_CLK_STOP_FAULT_INT_SHIFT 12 +#define CS42L43_AMP2_VDDSPK_FAULT_INT_MASK 0x00000800 +#define CS42L43_AMP2_VDDSPK_FAULT_INT_SHIFT 11 +#define CS42L43_AMP1_VDDSPK_FAULT_INT_MASK 0x00000400 +#define CS42L43_AMP1_VDDSPK_FAULT_INT_SHIFT 10 +#define CS42L43_AMP2_SHUTDOWN_DONE_INT_MASK 0x00000200 +#define CS42L43_AMP2_SHUTDOWN_DONE_INT_SHIFT 9 +#define CS42L43_AMP1_SHUTDOWN_DONE_INT_MASK 0x00000100 +#define CS42L43_AMP1_SHUTDOWN_DONE_INT_SHIFT 8 +#define CS42L43_AMP2_STARTUP_DONE_INT_MASK 0x00000080 +#define CS42L43_AMP2_STARTUP_DONE_INT_SHIFT 7 +#define CS42L43_AMP1_STARTUP_DONE_INT_MASK 0x00000040 +#define CS42L43_AMP1_STARTUP_DONE_INT_SHIFT 6 +#define CS42L43_AMP2_THERM_SHDN_INT_MASK 0x00000020 +#define CS42L43_AMP2_THERM_SHDN_INT_SHIFT 5 +#define CS42L43_AMP1_THERM_SHDN_INT_MASK 0x00000010 +#define CS42L43_AMP1_THERM_SHDN_INT_SHIFT 4 +#define CS42L43_AMP2_THERM_WARN_INT_MASK 0x00000008 +#define CS42L43_AMP2_THERM_WARN_INT_SHIFT 3 +#define CS42L43_AMP1_THERM_WARN_INT_MASK 0x00000004 +#define CS42L43_AMP1_THERM_WARN_INT_SHIFT 2 +#define CS42L43_AMP2_SCDET_INT_MASK 0x00000002 +#define CS42L43_AMP2_SCDET_INT_SHIFT 1 +#define CS42L43_AMP1_SCDET_INT_MASK 0x00000001 +#define CS42L43_AMP1_SCDET_INT_SHIFT 0 + +/* CS42L43_GPIO_INT */ +#define CS42L43_GPIO3_FALL_INT_MASK 0x00000020 +#define CS42L43_GPIO3_FALL_INT_SHIFT 5 +#define CS42L43_GPIO3_RISE_INT_MASK 0x00000010 +#define CS42L43_GPIO3_RISE_INT_SHIFT 4 +#define CS42L43_GPIO2_FALL_INT_MASK 0x00000008 +#define CS42L43_GPIO2_FALL_INT_SHIFT 3 +#define CS42L43_GPIO2_RISE_INT_MASK 0x00000004 +#define CS42L43_GPIO2_RISE_INT_SHIFT 2 +#define CS42L43_GPIO1_FALL_INT_MASK 0x00000002 +#define CS42L43_GPIO1_FALL_INT_SHIFT 1 +#define CS42L43_GPIO1_RISE_INT_MASK 0x00000001 +#define CS42L43_GPIO1_RISE_INT_SHIFT 0 + +/* CS42L43_HPOUT_INT */ +#define CS42L43_HP_ILIMIT_INT_MASK 0x00000002 +#define CS42L43_HP_ILIMIT_INT_SHIFT 1 +#define CS42L43_HP_LOADDET_DONE_INT_MASK 0x00000001 +#define CS42L43_HP_LOADDET_DONE_INT_SHIFT 0 + +/* CS42L43_BOOT_CONTROL */ +#define CS42L43_LOCK_HW_STS_MASK 0x00000002 +#define CS42L43_LOCK_HW_STS_SHIFT 1 + +/* CS42L43_BLOCK_EN */ +#define CS42L43_MCU_EN_MASK 0x00000001 +#define CS42L43_MCU_EN_SHIFT 0 + +/* CS42L43_SHUTTER_CONTROL */ +#define CS42L43_STATUS_SPK_SHUTTER_MUTE_MASK 0x00008000 +#define CS42L43_STATUS_SPK_SHUTTER_MUTE_SHIFT 15 +#define CS42L43_SPK_SHUTTER_CFG_MASK 0x00000F00 +#define CS42L43_SPK_SHUTTER_CFG_SHIFT 8 +#define CS42L43_STATUS_MIC_SHUTTER_MUTE_MASK 0x00000080 +#define CS42L43_STATUS_MIC_SHUTTER_MUTE_SHIFT 7 +#define CS42L43_MIC_SHUTTER_CFG_MASK 0x0000000F +#define CS42L43_MIC_SHUTTER_CFG_SHIFT 0 + +/* CS42L43_MCU_SW_REV */ +#define CS42L43_BIOS_SUBMINOR_REV_MASK 0xFF000000 +#define CS42L43_BIOS_SUBMINOR_REV_SHIFT 24 +#define CS42L43_BIOS_MINOR_REV_MASK 0x00F00000 +#define CS42L43_BIOS_MINOR_REV_SHIFT 20 +#define CS42L43_BIOS_MAJOR_REV_MASK 0x000F0000 +#define CS42L43_BIOS_MAJOR_REV_SHIFT 16 +#define CS42L43_FW_SUBMINOR_REV_MASK 0x0000FF00 +#define CS42L43_FW_SUBMINOR_REV_SHIFT 8 +#define CS42L43_FW_MINOR_REV_MASK 0x000000F0 +#define CS42L43_FW_MINOR_REV_SHIFT 4 +#define CS42L43_FW_MAJOR_REV_MASK 0x0000000F +#define CS42L43_FW_MAJOR_REV_SHIFT 0 + +/* CS42L43_NEED_CONFIGS */ +#define CS42L43_FW_PATCH_NEED_CFG_MASK 0x80000000 +#define CS42L43_FW_PATCH_NEED_CFG_SHIFT 31 + +/* CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION */ +#define CS42L43_FW_MM_CTRL_MCU_SEL_MASK 0x00000001 +#define CS42L43_FW_MM_CTRL_MCU_SEL_SHIFT 0 + +/* CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG */ +#define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL 0xF05AA50F + +#endif /* CS42L43_CORE_REGS_H */ diff --git a/include/linux/mfd/cs42l43.h b/include/linux/mfd/cs42l43.h new file mode 100644 index 000000000000..2239d8585e78 --- /dev/null +++ b/include/linux/mfd/cs42l43.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * CS42L43 core driver external data + * + * Copyright (C) 2022-2023 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef CS42L43_CORE_EXT_H +#define CS42L43_CORE_EXT_H + +#include <linux/completion.h> +#include <linux/mutex.h> +#include <linux/regmap.h> +#include <linux/regulator/consumer.h> +#include <linux/workqueue.h> + +#define CS42L43_N_SUPPLIES 3 + +struct device; +struct gpio_desc; +struct sdw_slave; + +enum cs42l43_irq_numbers { + CS42L43_PLL_LOST_LOCK, + CS42L43_PLL_READY, + + CS42L43_HP_STARTUP_DONE, + CS42L43_HP_SHUTDOWN_DONE, + CS42L43_HSDET_DONE, + CS42L43_TIPSENSE_UNPLUG_DB, + CS42L43_TIPSENSE_PLUG_DB, + CS42L43_RINGSENSE_UNPLUG_DB, + CS42L43_RINGSENSE_PLUG_DB, + CS42L43_TIPSENSE_UNPLUG_PDET, + CS42L43_TIPSENSE_PLUG_PDET, + CS42L43_RINGSENSE_UNPLUG_PDET, + CS42L43_RINGSENSE_PLUG_PDET, + + CS42L43_HS2_BIAS_SENSE, + CS42L43_HS1_BIAS_SENSE, + CS42L43_DC_DETECT1_FALSE, + CS42L43_DC_DETECT1_TRUE, + CS42L43_HSBIAS_CLAMPED, + CS42L43_HS3_4_BIAS_SENSE, + + CS42L43_AMP2_CLK_STOP_FAULT, + CS42L43_AMP1_CLK_STOP_FAULT, + CS42L43_AMP2_VDDSPK_FAULT, + CS42L43_AMP1_VDDSPK_FAULT, + CS42L43_AMP2_SHUTDOWN_DONE, + CS42L43_AMP1_SHUTDOWN_DONE, + CS42L43_AMP2_STARTUP_DONE, + CS42L43_AMP1_STARTUP_DONE, + CS42L43_AMP2_THERM_SHDN, + CS42L43_AMP1_THERM_SHDN, + CS42L43_AMP2_THERM_WARN, + CS42L43_AMP1_THERM_WARN, + CS42L43_AMP2_SCDET, + CS42L43_AMP1_SCDET, + + CS42L43_GPIO3_FALL, + CS42L43_GPIO3_RISE, + CS42L43_GPIO2_FALL, + CS42L43_GPIO2_RISE, + CS42L43_GPIO1_FALL, + CS42L43_GPIO1_RISE, + + CS42L43_HP_ILIMIT, + CS42L43_HP_LOADDET_DONE, +}; + +struct cs42l43 { + struct device *dev; + struct regmap *regmap; + struct sdw_slave *sdw; + + struct regulator *vdd_p; + struct regulator *vdd_d; + struct regulator_bulk_data core_supplies[CS42L43_N_SUPPLIES]; + + struct gpio_desc *reset; + + int irq; + struct regmap_irq_chip irq_chip; + struct regmap_irq_chip_data *irq_data; + + struct work_struct boot_work; + struct completion device_attach; + struct completion device_detach; + struct completion firmware_download; + int firmware_error; + + unsigned int sdw_freq; + /* Lock to gate control of the PLL and its sources. */ + struct mutex pll_lock; + + bool sdw_pll_active; + bool attached; + bool hw_lock; +}; + +#endif /* CS42L43_CORE_EXT_H */ diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h index d3f126990ad0..137a2b067512 100644 --- a/include/linux/mfd/da9055/pdata.h +++ b/include/linux/mfd/da9055/pdata.h @@ -7,7 +7,6 @@ #define DA9055_MAX_REGULATORS 8 struct da9055; -struct gpio_desc; enum gpio_select { NO_GPIO = 0, @@ -24,16 +23,6 @@ struct da9055_pdata { /* Enable RTC in RESET Mode */ bool reset_enable; /* - * GPI muxed pin to control - * regulator state A/B, 0 if not available. - */ - int *gpio_ren; - /* - * GPI muxed pin to control - * regulator set, 0 if not available. - */ - int *gpio_rsel; - /* * Regulator mode control bits value (GPI offset) that * controls the regulator state, 0 if not available. */ @@ -43,7 +32,5 @@ struct da9055_pdata { * controls the regulator set A/B, 0 if not available. */ enum gpio_select *reg_rsel; - /* GPIO descriptors to enable regulator, NULL if not available */ - struct gpio_desc **ena_gpiods; }; #endif /* __DA9055_PDATA_H */ diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index e7a7e70fdb38..dd0fc891b228 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -556,16 +556,6 @@ static inline void prcmu_clear(unsigned int reg, u32 bits) #define PRCMU_QOS_ARM_OPP 3 #define PRCMU_QOS_DEFAULT_VALUE -1 -static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void) -{ - return 0; -} - -static inline int prcmu_qos_requirement(int prcmu_qos_class) -{ - return 0; -} - static inline int prcmu_qos_add_requirement(int prcmu_qos_class, char *name, s32 value) { @@ -582,15 +572,4 @@ static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name) { } -static inline int prcmu_qos_add_notifier(int prcmu_qos_class, - struct notifier_block *notifier) -{ - return 0; -} -static inline int prcmu_qos_remove_notifier(int prcmu_qos_class, - struct notifier_block *notifier) -{ - return 0; -} - #endif /* __MACH_PRCMU_H */ diff --git a/include/linux/mfd/hi655x-pmic.h b/include/linux/mfd/hi655x-pmic.h index 6a012784dd1b..194556851ccf 100644 --- a/include/linux/mfd/hi655x-pmic.h +++ b/include/linux/mfd/hi655x-pmic.h @@ -52,7 +52,6 @@ #define OTMP_D1R_INT_MASK BIT(OTMP_D1R_INT) struct hi655x_pmic { - struct resource *res; struct device *dev; struct regmap *regmap; struct gpio_desc *gpio; diff --git a/include/linux/mfd/idtRC38xxx_reg.h b/include/linux/mfd/idtRC38xxx_reg.h new file mode 100644 index 000000000000..ec11872f51ad --- /dev/null +++ b/include/linux/mfd/idtRC38xxx_reg.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Register Map - Based on PolarBear_CSRs.RevA.xlsx (2023-04-21) + * + * Copyright (C) 2023 Integrated Device Technology, Inc., a Renesas Company. + */ +#ifndef MFD_IDTRC38XXX_REG +#define MFD_IDTRC38XXX_REG + +/* GLOBAL */ +#define SOFT_RESET_CTRL (0x15) /* Specific to FC3W */ +#define MISC_CTRL (0x14) /* Specific to FC3A */ +#define APLL_REINIT BIT(1) +#define APLL_REINIT_VFC3A BIT(2) + +#define DEVICE_ID (0x2) +#define DEVICE_ID_MASK (0x1000) /* Bit 12 is 1 if FC3W and 0 if FC3A */ +#define DEVICE_ID_SHIFT (12) + +/* FOD */ +#define FOD_0 (0x300) +#define FOD_0_VFC3A (0x400) +#define FOD_1 (0x340) +#define FOD_1_VFC3A (0x440) +#define FOD_2 (0x380) +#define FOD_2_VFC3A (0x480) + +/* TDCAPLL */ +#define TDC_CTRL (0x44a) /* Specific to FC3W */ +#define TDC_ENABLE_CTRL (0x169) /* Specific to FC3A */ +#define TDC_DAC_CAL_CTRL (0x16a) /* Specific to FC3A */ +#define TDC_EN BIT(0) +#define TDC_DAC_RECAL_REQ BIT(1) +#define TDC_DAC_RECAL_REQ_VFC3A BIT(0) + +#define TDC_FB_DIV_INT_CNFG (0x442) +#define TDC_FB_DIV_INT_CNFG_VFC3A (0x162) +#define TDC_FB_DIV_INT_MASK GENMASK(7, 0) +#define TDC_REF_DIV_CNFG (0x443) +#define TDC_REF_DIV_CNFG_VFC3A (0x163) +#define TDC_REF_DIV_CONFIG_MASK GENMASK(2, 0) + +/* TIME SYNC CHANNEL */ +#define TIME_CLOCK_SRC (0xa01) /* Specific to FC3W */ +#define TIME_CLOCK_COUNT (0xa00) /* Specific to FC3W */ +#define TIME_CLOCK_COUNT_MASK GENMASK(5, 0) + +#define SUB_SYNC_GEN_CNFG (0xa04) + +#define TOD_COUNTER_READ_REQ (0xa5f) +#define TOD_COUNTER_READ_REQ_VFC3A (0x6df) +#define TOD_SYNC_LOAD_VAL_CTRL (0xa10) +#define TOD_SYNC_LOAD_VAL_CTRL_VFC3A (0x690) +#define SYNC_COUNTER_MASK GENMASK_ULL(51, 0) +#define SUB_SYNC_COUNTER_MASK GENMASK(30, 0) +#define TOD_SYNC_LOAD_REQ_CTRL (0xa21) +#define TOD_SYNC_LOAD_REQ_CTRL_VFC3A (0x6a1) +#define SYNC_LOAD_ENABLE BIT(1) +#define SUB_SYNC_LOAD_ENABLE BIT(0) +#define SYNC_LOAD_REQ BIT(0) + +#define LPF_MODE_CNFG (0xa80) +#define LPF_MODE_CNFG_VFC3A (0x700) +enum lpf_mode { + LPF_DISABLED = 0, + LPF_WP = 1, + LPF_HOLDOVER = 2, + LPF_WF = 3, + LPF_INVALID = 4 +}; +#define LPF_CTRL (0xa98) +#define LPF_CTRL_VFC3A (0x718) +#define LPF_EN BIT(0) + +#define LPF_BW_CNFG (0xa81) +#define LPF_BW_SHIFT GENMASK(7, 3) +#define LPF_BW_MULT GENMASK(2, 0) +#define LPF_BW_SHIFT_DEFAULT (0xb) +#define LPF_BW_MULT_DEFAULT (0x0) +#define LPF_BW_SHIFT_1PPS (0x5) + +#define LPF_WR_PHASE_CTRL (0xaa8) +#define LPF_WR_PHASE_CTRL_VFC3A (0x728) +#define LPF_WR_FREQ_CTRL (0xab0) +#define LPF_WR_FREQ_CTRL_VFC3A (0x730) + +#define TIME_CLOCK_TDC_FANOUT_CNFG (0xB00) +#define TIME_SYNC_TO_TDC_EN BIT(0) +#define SIG1_MUX_SEL_MASK GENMASK(7, 4) +#define SIG2_MUX_SEL_MASK GENMASK(11, 8) +enum tdc_mux_sel { + REF0 = 0, + REF1 = 1, + REF2 = 2, + REF3 = 3, + REF_CLK5 = 4, + REF_CLK6 = 5, + DPLL_FB_TO_TDC = 6, + DPLL_FB_DIVIDED_TO_TDC = 7, + TIME_CLK_DIVIDED = 8, + TIME_SYNC = 9, +}; + +#define TIME_CLOCK_MEAS_CNFG (0xB04) +#define TDC_MEAS_MODE BIT(0) +enum tdc_meas_mode { + CONTINUOUS = 0, + ONE_SHOT = 1, + MEAS_MODE_INVALID = 2, +}; + +#define TIME_CLOCK_MEAS_DIV_CNFG (0xB08) +#define TIME_REF_DIV_MASK GENMASK(29, 24) + +#define TIME_CLOCK_MEAS_CTRL (0xB10) +#define TDC_MEAS_EN BIT(0) +#define TDC_MEAS_START BIT(1) + +#define TDC_FIFO_READ_REQ (0xB2F) +#define TDC_FIFO_READ (0xB30) +#define COARSE_MEAS_MASK GENMASK_ULL(39, 13) +#define FINE_MEAS_MASK GENMASK(12, 0) + +#define TDC_FIFO_CTRL (0xB12) +#define FIFO_CLEAR BIT(0) +#define TDC_FIFO_STS (0xB38) +#define FIFO_FULL BIT(1) +#define FIFO_EMPTY BIT(0) +#define TDC_FIFO_EVENT (0xB39) +#define FIFO_OVERRUN BIT(1) + +/* DPLL */ +#define MAX_REFERENCE_INDEX (3) +#define MAX_NUM_REF_PRIORITY (4) + +#define MAX_DPLL_INDEX (2) + +#define DPLL_STS (0x580) +#define DPLL_STS_VFC3A (0x571) +#define DPLL_STATE_STS_MASK (0x70) +#define DPLL_STATE_STS_SHIFT (4) +#define DPLL_REF_SEL_STS_MASK (0x6) +#define DPLL_REF_SEL_STS_SHIFT (1) + +#define DPLL_REF_PRIORITY_CNFG (0x502) +#define DPLL_REFX_PRIORITY_DISABLE_MASK (0xf) +#define DPLL_REF0_PRIORITY_ENABLE_AND_SET_MASK (0x31) +#define DPLL_REF1_PRIORITY_ENABLE_AND_SET_MASK (0xc2) +#define DPLL_REF2_PRIORITY_ENABLE_AND_SET_MASK (0x304) +#define DPLL_REF3_PRIORITY_ENABLE_AND_SET_MASK (0xc08) +#define DPLL_REF0_PRIORITY_SHIFT (4) +#define DPLL_REF1_PRIORITY_SHIFT (6) +#define DPLL_REF2_PRIORITY_SHIFT (8) +#define DPLL_REF3_PRIORITY_SHIFT (10) + +enum dpll_state { + DPLL_STATE_MIN = 0, + DPLL_STATE_FREERUN = DPLL_STATE_MIN, + DPLL_STATE_LOCKED = 1, + DPLL_STATE_HOLDOVER = 2, + DPLL_STATE_WRITE_FREQUENCY = 3, + DPLL_STATE_ACQUIRE = 4, + DPLL_STATE_HITLESS_SWITCH = 5, + DPLL_STATE_MAX = DPLL_STATE_HITLESS_SWITCH +}; + +/* REFMON */ +#define LOSMON_STS_0 (0x81e) +#define LOSMON_STS_0_VFC3A (0x18e) +#define LOSMON_STS_1 (0x82e) +#define LOSMON_STS_1_VFC3A (0x19e) +#define LOSMON_STS_2 (0x83e) +#define LOSMON_STS_2_VFC3A (0x1ae) +#define LOSMON_STS_3 (0x84e) +#define LOSMON_STS_3_VFC3A (0x1be) +#define LOS_STS_MASK (0x1) + +#define FREQMON_STS_0 (0x874) +#define FREQMON_STS_0_VFC3A (0x1d4) +#define FREQMON_STS_1 (0x894) +#define FREQMON_STS_1_VFC3A (0x1f4) +#define FREQMON_STS_2 (0x8b4) +#define FREQMON_STS_2_VFC3A (0x214) +#define FREQMON_STS_3 (0x8d4) +#define FREQMON_STS_3_VFC3A (0x234) +#define FREQ_FAIL_STS_SHIFT (31) + +/* Firmware interface */ +#define TIME_CLK_FREQ_ADDR (0xffa0) +#define XTAL_FREQ_ADDR (0xffa1) + +/* + * Return register address and field mask based on passed in firmware version + */ +#define IDTFC3_FW_REG(FW, VER, REG) (((FW) < (VER)) ? (REG) : (REG##_##VER)) +#define IDTFC3_FW_FIELD(FW, VER, FIELD) (((FW) < (VER)) ? (FIELD) : (FIELD##_##VER)) +enum fw_version { + V_DEFAULT = 0, + VFC3W = 1, + VFC3A = 2 +}; + +/* XTAL_FREQ_ADDR/TIME_CLK_FREQ_ADDR */ +enum { + FREQ_MIN = 0, + FREQ_25M = 1, + FREQ_49_152M = 2, + FREQ_50M = 3, + FREQ_100M = 4, + FREQ_125M = 5, + FREQ_250M = 6, + FREQ_MAX +}; + +struct idtfc3_hw_param { + u32 xtal_freq; + u32 time_clk_freq; +}; + +struct idtfc3_fwrc { + u8 hiaddr; + u8 loaddr; + u8 value; + u8 reserved; +} __packed; + +static inline void idtfc3_default_hw_param(struct idtfc3_hw_param *hw_param) +{ + hw_param->xtal_freq = 49152000; + hw_param->time_clk_freq = 25000000; +} + +static inline int idtfc3_set_hw_param(struct idtfc3_hw_param *hw_param, + u16 addr, u8 val) +{ + if (addr == XTAL_FREQ_ADDR) + switch (val) { + case FREQ_49_152M: + hw_param->xtal_freq = 49152000; + break; + case FREQ_50M: + hw_param->xtal_freq = 50000000; + break; + default: + return -EINVAL; + } + else if (addr == TIME_CLK_FREQ_ADDR) + switch (val) { + case FREQ_25M: + hw_param->time_clk_freq = 25000000; + break; + case FREQ_50M: + hw_param->time_clk_freq = 50000000; + break; + case FREQ_100M: + hw_param->time_clk_freq = 100000000; + break; + case FREQ_125M: + hw_param->time_clk_freq = 125000000; + break; + case FREQ_250M: + hw_param->time_clk_freq = 250000000; + break; + default: + return -EINVAL; + } + else + return -EFAULT; + + return 0; +} + +#endif diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h index 3d5c480d58ea..51b47966a04d 100644 --- a/include/linux/mfd/lp8788.h +++ b/include/linux/mfd/lp8788.h @@ -10,7 +10,6 @@ #ifndef __MFD_LP8788_H__ #define __MFD_LP8788_H__ -#include <linux/gpio.h> #include <linux/irqdomain.h> #include <linux/pwm.h> #include <linux/regmap.h> @@ -159,21 +158,17 @@ struct lp8788; /* * lp8788_buck1_dvs - * @gpio : gpio pin number for dvs control * @vsel : dvs selector for buck v1 register */ struct lp8788_buck1_dvs { - int gpio; enum lp8788_dvs_sel vsel; }; /* * lp8788_buck2_dvs - * @gpio : two gpio pin numbers are used for dvs * @vsel : dvs selector for buck v2 register */ struct lp8788_buck2_dvs { - int gpio[LP8788_NUM_BUCK2_DVS]; enum lp8788_dvs_sel vsel; }; @@ -268,8 +263,8 @@ struct lp8788_vib_platform_data { * @buck_data : regulator initial data for buck * @dldo_data : regulator initial data for digital ldo * @aldo_data : regulator initial data for analog ldo - * @buck1_dvs : gpio configurations for buck1 dvs - * @buck2_dvs : gpio configurations for buck2 dvs + * @buck1_dvs : configurations for buck1 dvs + * @buck2_dvs : configurations for buck2 dvs * @chg_pdata : platform data for charger driver * @alarm_sel : rtc alarm selection (1 or 2) * @bl_pdata : configurable data for backlight driver diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index ea4a4b1b246a..1fbda1f8967d 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -15,7 +15,7 @@ #define ICH_RES_GPE0 1 /* GPIO compatibility */ -enum { +enum lpc_gpio_versions { ICH_I3100_GPIO, ICH_V5_GPIO, ICH_V6_GPIO, @@ -26,11 +26,14 @@ enum { AVOTON_GPIO, }; +struct lpc_ich_gpio_info; + struct lpc_ich_info { char name[32]; unsigned int iTCO_version; - unsigned int gpio_version; + enum lpc_gpio_versions gpio_version; enum intel_spi_type spi_type; + const struct lpc_ich_gpio_info *gpio_info; u8 use_gpio; }; diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index 3acceeedbaba..ea635d12a741 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -441,8 +441,4 @@ enum max77686_types { TYPE_MAX77802, }; -extern int max77686_irq_init(struct max77686_dev *max77686); -extern void max77686_irq_exit(struct max77686_dev *max77686); -extern int max77686_irq_resume(struct max77686_dev *max77686); - #endif /* __LINUX_MFD_MAX77686_PRIV_H */ diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 311f7d3d2323..54444ff2a5de 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -405,7 +405,7 @@ enum max77693_haptic_reg { MAX77693_HAPTIC_REG_END, }; -/* max77693-pmic LSCNFG configuraton register */ +/* max77693-pmic LSCNFG configuration register */ #define MAX77693_PMIC_LOW_SYS_MASK 0x80 #define MAX77693_PMIC_LOW_SYS_SHIFT 7 diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h index 0bc7454c4dbe..2fb4db67f110 100644 --- a/include/linux/mfd/max77843-private.h +++ b/include/linux/mfd/max77843-private.h @@ -198,7 +198,7 @@ enum max77843_irq_muic { #define MAX77843_MCONFIG_MEN_MASK BIT(MCONFIG_MEN_SHIFT) #define MAX77843_MCONFIG_PDIV_MASK (0x3 << MCONFIG_PDIV_SHIFT) -/* Max77843 charger insterrupts */ +/* Max77843 charger interrupts */ #define MAX77843_CHG_BYP_I BIT(0) #define MAX77843_CHG_BATP_I BIT(2) #define MAX77843_CHG_BAT_I BIT(3) diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index 6193905abbb5..5c2cc1103437 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -178,7 +178,6 @@ struct max8997_platform_data { * */ bool ignore_gpiodvs_side_effect; - int buck125_gpios[3]; /* GPIO of [0]SET1, [1]SET2, [2]SET3 */ int buck125_default_idx; /* Default value of SET1, 2, 3 */ unsigned int buck1_voltage[8]; /* buckx_voltage in uV */ bool buck1_gpiodvs; diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h index 79c020bd0c70..a054e55c8646 100644 --- a/include/linux/mfd/max8998.h +++ b/include/linux/mfd/max8998.h @@ -65,10 +65,7 @@ struct max8998_regulator_data { * be other than the preset values. * @buck1_voltage: BUCK1 DVS mode 1 voltage registers * @buck2_voltage: BUCK2 DVS mode 2 voltage registers - * @buck1_set1: BUCK1 gpio pin 1 to set output voltage - * @buck1_set2: BUCK1 gpio pin 2 to set output voltage * @buck1_default_idx: Default for BUCK1 gpio pin 1, 2 - * @buck2_set3: BUCK2 gpio pin to set output voltage * @buck2_default_idx: Default for BUCK2 gpio pin. * @wakeup: Allow to wake up from suspend * @rtc_delay: LP3974 RTC chip bug that requires delay after a register @@ -91,10 +88,7 @@ struct max8998_platform_data { bool buck_voltage_lock; int buck1_voltage[4]; int buck2_voltage[2]; - int buck1_set1; - int buck1_set2; int buck1_default_idx; - int buck2_set3; int buck2_default_idx; bool wakeup; bool rtc_delay; diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h index 3d33517f178c..d83e87298ac4 100644 --- a/include/linux/mfd/mt6358/registers.h +++ b/include/linux/mfd/mt6358/registers.h @@ -262,6 +262,12 @@ #define MT6358_LDO_VBIF28_CON3 0x1db0 #define MT6358_VCAMA1_ANA_CON0 0x1e08 #define MT6358_VCAMA2_ANA_CON0 0x1e0c +#define MT6358_VFE28_ANA_CON0 0x1e10 +#define MT6358_VCN28_ANA_CON0 0x1e14 +#define MT6358_VBIF28_ANA_CON0 0x1e18 +#define MT6358_VAUD28_ANA_CON0 0x1e1c +#define MT6358_VAUX18_ANA_CON0 0x1e20 +#define MT6358_VXO22_ANA_CON0 0x1e24 #define MT6358_VCN33_ANA_CON0 0x1e28 #define MT6358_VSIM1_ANA_CON0 0x1e2c #define MT6358_VSIM2_ANA_CON0 0x1e30 @@ -288,4 +294,21 @@ #define MT6358_AUD_TOP_INT_CON0 0x2228 #define MT6358_AUD_TOP_INT_STATUS0 0x2234 +/* + * MT6366 has no VCAM*, but has other regulators in its place. The names + * keep the MT6358 prefix for ease of use in the regulator driver. + */ +#define MT6358_LDO_VSRAM_CON5 0x1bf8 +#define MT6358_LDO_VM18_CON0 MT6358_LDO_VCAMA1_CON0 +#define MT6358_LDO_VM18_CON1 MT6358_LDO_VCAMA1_CON1 +#define MT6358_LDO_VM18_CON2 MT6358_LDO_VCAMA1_CON2 +#define MT6358_LDO_VMDDR_CON0 MT6358_LDO_VCAMA2_CON0 +#define MT6358_LDO_VMDDR_CON1 MT6358_LDO_VCAMA2_CON1 +#define MT6358_LDO_VMDDR_CON2 MT6358_LDO_VCAMA2_CON2 +#define MT6358_LDO_VSRAM_CORE_CON0 MT6358_LDO_VCAMD_CON0 +#define MT6358_LDO_VSRAM_CORE_DBG0 0x1cb6 +#define MT6358_LDO_VSRAM_CORE_DBG1 0x1cb8 +#define MT6358_VM18_ANA_CON0 MT6358_VCAMA1_ANA_CON0 +#define MT6358_VMDDR_ANA_CON0 MT6358_VCAMD_ANA_CON0 + #endif /* __MFD_MT6358_REGISTERS_H__ */ diff --git a/include/linux/mfd/rz-mtu3.h b/include/linux/mfd/rz-mtu3.h index c5173bc06270..8421d49500bf 100644 --- a/include/linux/mfd/rz-mtu3.h +++ b/include/linux/mfd/rz-mtu3.h @@ -151,7 +151,6 @@ struct rz_mtu3 { void *priv_data; }; -#if IS_ENABLED(CONFIG_RZ_MTU3) static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch) { mutex_lock(&ch->lock); @@ -188,70 +187,5 @@ void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val); void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val); void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, u16 off, u16 pos, u8 val); -#else -static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch) -{ - return false; -} - -static inline void rz_mtu3_release_channel(struct rz_mtu3_channel *ch) -{ -} - -static inline bool rz_mtu3_is_enabled(struct rz_mtu3_channel *ch) -{ - return false; -} - -static inline void rz_mtu3_disable(struct rz_mtu3_channel *ch) -{ -} - -static inline int rz_mtu3_enable(struct rz_mtu3_channel *ch) -{ - return 0; -} - -static inline u8 rz_mtu3_8bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u16 rz_mtu3_16bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u32 rz_mtu3_32bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u16 rz_mtu3_shared_reg_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline void rz_mtu3_8bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u8 val) -{ -} - -static inline void rz_mtu3_16bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u16 val) -{ -} - -static inline void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val) -{ -} - -static inline void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val) -{ -} - -static inline void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, - u16 off, u16 pos, u8 val) -{ -} -#endif #endif /* __MFD_RZ_MTU3_H__ */ diff --git a/include/linux/mfd/si476x-platform.h b/include/linux/mfd/si476x-platform.h index 18363b773d07..cb99e16ca947 100644 --- a/include/linux/mfd/si476x-platform.h +++ b/include/linux/mfd/si476x-platform.h @@ -10,7 +10,7 @@ #ifndef __SI476X_PLATFORM_H__ #define __SI476X_PLATFORM_H__ -/* It is possible to select one of the four adresses using pins A0 +/* It is possible to select one of the four addresses using pins A0 * and A1 on SI476x */ #define SI476X_I2C_ADDR_1 0x60 #define SI476X_I2C_ADDR_2 0x61 diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index 1b94325febb3..ca35af30745f 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -102,6 +102,15 @@ enum stm32_timers_dmas { STM32_TIMERS_MAX_DMAS, }; +/* STM32 Timer may have either a unique global interrupt or 4 interrupt lines */ +enum stm32_timers_irqs { + STM32_TIMERS_IRQ_GLOBAL_BRK, /* global or brk IRQ */ + STM32_TIMERS_IRQ_UP, + STM32_TIMERS_IRQ_TRG_COM, + STM32_TIMERS_IRQ_CC, + STM32_TIMERS_MAX_IRQS, +}; + /** * struct stm32_timers_dma - STM32 timer DMA handling. * @completion: end of DMA transfer completion @@ -123,6 +132,8 @@ struct stm32_timers { struct regmap *regmap; u32 max_arr; struct stm32_timers_dma dma; /* Only to be used by the parent */ + unsigned int nr_irqs; + int irq[STM32_TIMERS_MAX_IRQS]; }; #if IS_REACHABLE(CONFIG_MFD_STM32_TIMERS) diff --git a/include/linux/mfd/sun4i-gpadc.h b/include/linux/mfd/sun4i-gpadc.h index ea0ccf33a459..021f820f9d52 100644 --- a/include/linux/mfd/sun4i-gpadc.h +++ b/include/linux/mfd/sun4i-gpadc.h @@ -81,8 +81,8 @@ #define SUN4I_GPADC_TEMP_DATA 0x20 #define SUN4I_GPADC_DATA 0x24 -#define SUN4I_GPADC_IRQ_FIFO_DATA 0 -#define SUN4I_GPADC_IRQ_TEMP_DATA 1 +#define SUN4I_GPADC_IRQ_FIFO_DATA 1 +#define SUN4I_GPADC_IRQ_TEMP_DATA 2 /* 10s delay before suspending the IP */ #define SUN4I_GPADC_AUTOSUSPEND_DELAY 10000 diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index fecc2fa2a364..c315903f6dab 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -17,20 +17,17 @@ struct device_node; #ifdef CONFIG_MFD_SYSCON -extern struct regmap *device_node_to_regmap(struct device_node *np); -extern struct regmap *syscon_node_to_regmap(struct device_node *np); -extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s); -extern struct regmap *syscon_regmap_lookup_by_phandle( - struct device_node *np, - const char *property); -extern struct regmap *syscon_regmap_lookup_by_phandle_args( - struct device_node *np, - const char *property, - int arg_count, - unsigned int *out_args); -extern struct regmap *syscon_regmap_lookup_by_phandle_optional( - struct device_node *np, - const char *property); +struct regmap *device_node_to_regmap(struct device_node *np); +struct regmap *syscon_node_to_regmap(struct device_node *np); +struct regmap *syscon_regmap_lookup_by_compatible(const char *s); +struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, + const char *property); +struct regmap *syscon_regmap_lookup_by_phandle_args(struct device_node *np, + const char *property, + int arg_count, + unsigned int *out_args); +struct regmap *syscon_regmap_lookup_by_phandle_optional(struct device_node *np, + const char *property); #else static inline struct regmap *device_node_to_regmap(struct device_node *np) { diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h index 16f87cccc003..9185b5cd8371 100644 --- a/include/linux/mfd/tps65086.h +++ b/include/linux/mfd/tps65086.h @@ -13,8 +13,9 @@ #include <linux/regmap.h> /* List of registers for TPS65086 */ -#define TPS65086_DEVICEID 0x01 -#define TPS65086_IRQ 0x02 +#define TPS65086_DEVICEID1 0x00 +#define TPS65086_DEVICEID2 0x01 +#define TPS65086_IRQ 0x02 #define TPS65086_IRQ_MASK 0x03 #define TPS65086_PMICSTAT 0x04 #define TPS65086_SHUTDNSRC 0x05 @@ -75,10 +76,16 @@ #define TPS65086_IRQ_SHUTDN_MASK BIT(3) #define TPS65086_IRQ_FAULT_MASK BIT(7) -/* DEVICEID Register field definitions */ -#define TPS65086_DEVICEID_PART_MASK GENMASK(3, 0) -#define TPS65086_DEVICEID_OTP_MASK GENMASK(5, 4) -#define TPS65086_DEVICEID_REV_MASK GENMASK(7, 6) +/* DEVICEID1 Register field definitions */ +#define TPS6508640_ID 0x00 +#define TPS65086401_ID 0x01 +#define TPS6508641_ID 0x10 +#define TPS65086470_ID 0x70 + +/* DEVICEID2 Register field definitions */ +#define TPS65086_DEVICEID2_PART_MASK GENMASK(3, 0) +#define TPS65086_DEVICEID2_OTP_MASK GENMASK(5, 4) +#define TPS65086_DEVICEID2_REV_MASK GENMASK(7, 6) /* VID Masks */ #define BUCK_VID_MASK GENMASK(7, 1) @@ -92,6 +99,8 @@ enum tps65086_irqs { TPS65086_IRQ_FAULT, }; +struct tps65086_regulator_config; + /** * struct tps65086 - state holder for the tps65086 driver * @@ -100,6 +109,8 @@ enum tps65086_irqs { struct tps65086 { struct device *dev; struct regmap *regmap; + unsigned int chip_id; + const struct tps65086_regulator_config *reg_config; /* IRQ Data */ int irq; diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index 701925db75b3..f67ef0a4e041 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -749,7 +749,7 @@ #define VDDCTRL_ST_SHIFT 0 -/*Register VDDCTRL_OP (0x28) bit definitios */ +/*Register VDDCTRL_OP (0x28) bit definitions */ #define VDDCTRL_OP_CMD_MASK 0x80 #define VDDCTRL_OP_CMD_SHIFT 7 #define VDDCTRL_OP_SEL_MASK 0x7F diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index c062d91a67d9..85dc406173db 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -461,6 +461,7 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot) #define TWL4030_PM_MASTER_GLOBAL_TST 0xb6 +#define TWL6030_PHOENIX_DEV_ON 0x06 /*----------------------------------------------------------------------*/ /* Power bus message definitions */ diff --git a/include/linux/mhi.h b/include/linux/mhi.h index f6de4b6ecfc7..77b8c0a26674 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -266,6 +266,7 @@ struct mhi_event_config { * struct mhi_controller_config - Root MHI controller configuration * @max_channels: Maximum number of channels supported * @timeout_ms: Timeout value for operations. 0 means use default + * @ready_timeout_ms: Timeout value for waiting device to be ready (optional) * @buf_len: Size of automatically allocated buffers. 0 means use default * @num_channels: Number of channels defined in @ch_cfg * @ch_cfg: Array of defined channels @@ -277,6 +278,7 @@ struct mhi_event_config { struct mhi_controller_config { u32 max_channels; u32 timeout_ms; + u32 ready_timeout_ms; u32 buf_len; u32 num_channels; const struct mhi_channel_config *ch_cfg; @@ -299,6 +301,10 @@ struct mhi_controller_config { * @iova_start: IOMMU starting address for data (required) * @iova_stop: IOMMU stop address for data (required) * @fw_image: Firmware image name for normal booting (optional) + * @fw_data: Firmware image data content for normal booting, used only + * if fw_image is NULL and fbc_download is true (optional) + * @fw_sz: Firmware image data size for normal booting, used only if fw_image + * is NULL and fbc_download is true (optional) * @edl_image: Firmware image name for emergency download mode (optional) * @rddm_size: RAM dump size that host should allocate for debugging purpose * @sbl_size: SBL image size downloaded through BHIe (optional) @@ -314,18 +320,14 @@ struct mhi_controller_config { * @hw_ev_rings: Number of hardware event rings * @sw_ev_rings: Number of software event rings * @nr_irqs: Number of IRQ allocated by bus master (required) - * @family_number: MHI controller family number - * @device_number: MHI controller device number - * @major_version: MHI controller major revision number - * @minor_version: MHI controller minor revision number * @serial_number: MHI controller serial number obtained from BHI - * @oem_pk_hash: MHI controller OEM PK Hash obtained from BHI * @mhi_event: MHI event ring configurations table * @mhi_cmd: MHI command ring configurations table * @mhi_ctxt: MHI device context, shared memory between host and device * @pm_mutex: Mutex for suspend/resume operation * @pm_lock: Lock for protecting MHI power management state * @timeout_ms: Timeout in ms for state transitions + * @ready_timeout_ms: Timeout in ms for waiting device to be ready (optional) * @pm_state: MHI power management state * @db_access: DB access states * @ee: MHI device execution environment @@ -362,15 +364,6 @@ struct mhi_controller_config { * Fields marked as (required) need to be populated by the controller driver * before calling mhi_register_controller(). For the fields marked as (optional) * they can be populated depending on the usecase. - * - * The following fields are present for the purpose of implementing any device - * specific quirks or customizations for specific MHI revisions used in device - * by the controller drivers. The MHI stack will just populate these fields - * during mhi_register_controller(): - * family_number - * device_number - * major_version - * minor_version */ struct mhi_controller { struct device *cntrl_dev; @@ -384,6 +377,8 @@ struct mhi_controller { dma_addr_t iova_start; dma_addr_t iova_stop; const char *fw_image; + const u8 *fw_data; + size_t fw_sz; const char *edl_image; size_t rddm_size; size_t sbl_size; @@ -399,12 +394,7 @@ struct mhi_controller { u32 hw_ev_rings; u32 sw_ev_rings; u32 nr_irqs; - u32 family_number; - u32 device_number; - u32 major_version; - u32 minor_version; u32 serial_number; - u32 oem_pk_hash[MHI_MAX_OEM_PK_HASH_SEGMENTS]; struct mhi_event *mhi_event; struct mhi_cmd *mhi_cmd; @@ -413,6 +403,7 @@ struct mhi_controller { struct mutex pm_mutex; rwlock_t pm_lock; u32 timeout_ms; + u32 ready_timeout_ms; u32 pm_state; u32 db_access; enum mhi_ee_type ee; diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h index f198a8ac7ee7..11bf3212f782 100644 --- a/include/linux/mhi_ep.h +++ b/include/linux/mhi_ep.h @@ -50,6 +50,27 @@ struct mhi_ep_db_info { }; /** + * struct mhi_ep_buf_info - MHI Endpoint transfer buffer info + * @mhi_dev: MHI device associated with this buffer + * @dev_addr: Address of the buffer in endpoint + * @host_addr: Address of the bufffer in host + * @size: Size of the buffer + * @code: Transfer completion code + * @cb: Callback to be executed by controller drivers after transfer completion (async) + * @cb_buf: Opaque buffer to be passed to the callback + */ +struct mhi_ep_buf_info { + struct mhi_ep_device *mhi_dev; + void *dev_addr; + u64 host_addr; + size_t size; + int code; + + void (*cb)(struct mhi_ep_buf_info *buf_info); + void *cb_buf; +}; + +/** * struct mhi_ep_cntrl - MHI Endpoint controller structure * @cntrl_dev: Pointer to the struct device of physical bus acting as the MHI * Endpoint controller @@ -82,8 +103,10 @@ struct mhi_ep_db_info { * @raise_irq: CB function for raising IRQ to the host * @alloc_map: CB function for allocating memory in endpoint for storing host context and mapping it * @unmap_free: CB function to unmap and free the allocated memory in endpoint for storing host context - * @read_from_host: CB function for reading from host memory from endpoint - * @write_to_host: CB function for writing to host memory from endpoint + * @read_sync: CB function for reading from host memory synchronously + * @write_sync: CB function for writing to host memory synchronously + * @read_async: CB function for reading from host memory asynchronously + * @write_async: CB function for writing to host memory asynchronously * @mhi_state: MHI Endpoint state * @max_chan: Maximum channels supported by the endpoint controller * @mru: MRU (Maximum Receive Unit) value of the endpoint controller @@ -128,14 +151,19 @@ struct mhi_ep_cntrl { struct work_struct reset_work; struct work_struct cmd_ring_work; struct work_struct ch_ring_work; + struct kmem_cache *ring_item_cache; + struct kmem_cache *ev_ring_el_cache; + struct kmem_cache *tre_buf_cache; void (*raise_irq)(struct mhi_ep_cntrl *mhi_cntrl, u32 vector); int (*alloc_map)(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr, phys_addr_t *phys_ptr, void __iomem **virt, size_t size); void (*unmap_free)(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr, phys_addr_t phys, void __iomem *virt, size_t size); - int (*read_from_host)(struct mhi_ep_cntrl *mhi_cntrl, u64 from, void *to, size_t size); - int (*write_to_host)(struct mhi_ep_cntrl *mhi_cntrl, void *from, u64 to, size_t size); + int (*read_sync)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info); + int (*write_sync)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info); + int (*read_async)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info); + int (*write_async)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info); enum mhi_state mhi_state; diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 8bef1ab62bba..591bf5b5e8dc 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -41,9 +41,10 @@ #define PHY_ID_KSZ9477 0x00221631 /* struct phy_device dev_flags definitions */ -#define MICREL_PHY_50MHZ_CLK 0x00000001 -#define MICREL_PHY_FXEN 0x00000002 -#define MICREL_KSZ8_P1_ERRATA 0x00000003 +#define MICREL_PHY_50MHZ_CLK BIT(0) +#define MICREL_PHY_FXEN BIT(1) +#define MICREL_KSZ8_P1_ERRATA BIT(2) +#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC @@ -63,6 +64,10 @@ #define KSZ886X_BMCR_DISABLE_TRANSMIT BIT(1) #define KSZ886X_BMCR_DISABLE_LED BIT(0) +/* PHY Special Control/Status Register (Reg 31) */ #define KSZ886X_CTRL_MDIX_STAT BIT(4) +#define KSZ886X_CTRL_FORCE_LINK BIT(3) +#define KSZ886X_CTRL_PWRSAVE BIT(2) +#define KSZ886X_CTRL_REMOTE_LOOPBACK BIT(1) #endif /* _MICREL_PHY_H */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 711dd9412561..2ce13e8a309b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -142,10 +142,10 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING -int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, +int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node); #else -static inline int migrate_misplaced_page(struct page *page, +static inline int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h index fa940bbaf8ae..26b04f73f214 100644 --- a/include/linux/mii_timestamper.h +++ b/include/linux/mii_timestamper.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/ethtool.h> #include <linux/skbuff.h> +#include <linux/net_tstamp.h> struct phy_device; @@ -51,7 +52,8 @@ struct mii_timestamper { struct sk_buff *skb, int type); int (*hwtstamp)(struct mii_timestamper *mii_ts, - struct ifreq *ifreq); + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack); void (*link_state)(struct mii_timestamper *mii_ts, struct phy_device *phydev); diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 44077837385f..d52daf45861b 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -35,31 +35,33 @@ static __always_inline void min_heapify(struct min_heap *heap, int pos, const struct min_heap_callbacks *func) { - void *left, *right, *parent, *smallest; + void *left, *right; void *data = heap->data; + void *root = data + pos * func->elem_size; + int i = pos, j; + /* Find the sift-down path all the way to the leaves. */ for (;;) { - if (pos * 2 + 1 >= heap->nr) + if (i * 2 + 2 >= heap->nr) break; + left = data + (i * 2 + 1) * func->elem_size; + right = data + (i * 2 + 2) * func->elem_size; + i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2; + } - left = data + ((pos * 2 + 1) * func->elem_size); - parent = data + (pos * func->elem_size); - smallest = parent; - if (func->less(left, smallest)) - smallest = left; - - if (pos * 2 + 2 < heap->nr) { - right = data + ((pos * 2 + 2) * func->elem_size); - if (func->less(right, smallest)) - smallest = right; - } - if (smallest == parent) - break; - func->swp(smallest, parent); - if (smallest == left) - pos = (pos * 2) + 1; - else - pos = (pos * 2) + 2; + /* Special case for the last leaf with no sibling. */ + if (i * 2 + 2 == heap->nr) + i = i * 2 + 1; + + /* Backtrack to the correct location. */ + while (i != pos && func->less(root, data + i * func->elem_size)) + i = (i - 1) / 2; + + /* Shift the element into its correct place. */ + j = i; + while (i != pos) { + i = (i - 1) / 2; + func->swp(data + i * func->elem_size, data + j * func->elem_size); } } @@ -70,7 +72,7 @@ void min_heapify_all(struct min_heap *heap, { int i; - for (i = heap->nr / 2; i >= 0; i--) + for (i = heap->nr / 2 - 1; i >= 0; i--) min_heapify(heap, i, func); } diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 396df1121bff..2ec559284a9f 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,59 +2,77 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include <linux/build_bug.h> +#include <linux/compiler.h> #include <linux/const.h> +#include <linux/types.h> /* * min()/max()/clamp() macros must accomplish three things: * - * - avoid multiple evaluations of the arguments (so side-effects like + * - Avoid multiple evaluations of the arguments (so side-effects like * "x++" happen only once) when non-constant. - * - perform strict type-checking (to generate warnings instead of - * nasty runtime surprises). See the "unnecessary" pointer comparison - * in __typecheck(). - * - retain result as a constant expressions when called with only + * - Retain result as a constant expressions when called with only * constant expressions (to avoid tripping VLA warnings in stack * allocation usage). + * - Perform signed v unsigned type-checking (to generate compile + * errors instead of nasty runtime surprises). + * - Unsigned char/short are always promoted to signed int and can be + * compared against signed or unsigned arguments. + * - Unsigned arguments can be compared against non-negative signed constants. + * - Comparison of a signed argument against an unsigned constant fails + * even if the constant is below __INT_MAX__ and could be cast to int. */ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -#define __no_side_effects(x, y) \ - (__is_constexpr(x) && __is_constexpr(y)) +/* is_signed_type() isn't a constexpr for pointer types */ +#define __is_signed(x) \ + __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ + is_signed_type(typeof(x)), 0) -#define __safe_cmp(x, y) \ - (__typecheck(x, y) && __no_side_effects(x, y)) +/* True for a non-negative signed int constant */ +#define __is_noneg_int(x) \ + (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) -#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) +#define __types_ok(x, y) \ + (__is_signed(x) == __is_signed(y) || \ + __is_signed((x) + 0) == __is_signed((y) + 0) || \ + __is_noneg_int(x) || __is_noneg_int(y)) -#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - __cmp(unique_x, unique_y, op); }) +#define __cmp_op_min < +#define __cmp_op_max > -#define __careful_cmp(x, y, op) \ - __builtin_choose_expr(__safe_cmp(x, y), \ - __cmp(x, y, op), \ - __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) +#define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y)) + +#define __cmp_once(op, x, y, unique_x, unique_y) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + static_assert(__types_ok(x, y), \ + #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ + __cmp(op, unique_x, unique_y); }) + +#define __careful_cmp(op, x, y) \ + __builtin_choose_expr(__is_constexpr((x) - (y)), \ + __cmp(op, x, y), \ + __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y))) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) -#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ - typeof(val) unique_val = (val); \ - typeof(lo) unique_lo = (lo); \ - typeof(hi) unique_hi = (hi); \ - __clamp(unique_val, unique_lo, unique_hi); }) - -#define __clamp_input_check(lo, hi) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((lo) > (hi)), (lo) > (hi), false))) +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ + typeof(val) unique_val = (val); \ + typeof(lo) unique_lo = (lo); \ + typeof(hi) unique_hi = (hi); \ + static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ + (lo) <= (hi), true), \ + "clamp() low limit " #lo " greater than high limit " #hi); \ + static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ + static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ + __clamp(unique_val, unique_lo, unique_hi); }) #define __careful_clamp(val, lo, hi) ({ \ - __clamp_input_check(lo, hi) + \ - __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) && \ - __typecheck(hi, lo) && __is_constexpr(val) && \ - __is_constexpr(lo) && __is_constexpr(hi), \ + __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ __clamp(val, lo, hi), \ __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) @@ -64,14 +82,31 @@ * @x: first value * @y: second value */ -#define min(x, y) __careful_cmp(x, y, <) +#define min(x, y) __careful_cmp(min, x, y) /** * max - return maximum of two values of the same or compatible types * @x: first value * @y: second value */ -#define max(x, y) __careful_cmp(x, y, >) +#define max(x, y) __careful_cmp(max, x, y) + +/** + * umin - return minimum of two non-negative values + * Signed types are zero extended to match a larger unsigned type. + * @x: first value + * @y: second value + */ +#define umin(x, y) \ + __careful_cmp(min, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) + +/** + * umax - return maximum of two non-negative values + * @x: first value + * @y: second value + */ +#define umax(x, y) \ + __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) /** * min3 - return minimum of three values @@ -123,7 +158,7 @@ * @x: first value * @y: second value */ -#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) +#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y)) /** * max_t - return maximum of two values, using the specified type @@ -131,7 +166,50 @@ * @x: first value * @y: second value */ -#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) +#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y)) + +/* + * Do not check the array parameter using __must_be_array(). + * In the following legit use-case where the "array" passed is a simple pointer, + * __must_be_array() will return a failure. + * --- 8< --- + * int *buff + * ... + * min = min_array(buff, nb_items); + * --- 8< --- + * + * The first typeof(&(array)[0]) is needed in order to support arrays of both + * 'int *buff' and 'int buff[N]' types. + * + * The array can be an array of const items. + * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order + * to discard the const qualifier for the __element variable. + */ +#define __minmax_array(op, array, len) ({ \ + typeof(&(array)[0]) __array = (array); \ + typeof(len) __len = (len); \ + __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\ + while (__len--) \ + __element = op(__element, __array[__len]); \ + __element; }) + +/** + * min_array - return minimum of values present in an array + * @array: array + * @len: array length + * + * Note that @len must not be zero (empty array). + */ +#define min_array(array, len) __minmax_array(min, array, len) + +/** + * max_array - return maximum of values present in an array + * @array: array + * @len: array length + * + * Note that @len must not be zero (empty array). + */ +#define max_array(array, len) __minmax_array(max, array, len) /** * clamp_t - return a value clamped to a given range using a given type @@ -158,6 +236,32 @@ */ #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) +static inline bool in_range64(u64 val, u64 start, u64 len) +{ + return (val - start) < len; +} + +static inline bool in_range32(u32 val, u32 start, u32 len) +{ + return (val - start) < len; +} + +/** + * in_range - Determine if a value lies within a range. + * @val: Value to test. + * @start: First value in range. + * @len: Number of values in range. + * + * This is more efficient than "if (start <= val && val < (start + len))". + * It also gives a different answer if @start + @len overflows the size of + * the type by a sufficient amount to encompass @val. Decide for yourself + * which behaviour you want, or prove that start + len never overflow. + * Do not blindly replace one form with the other. + */ +#define in_range(val, start, len) \ + ((sizeof(start) | sizeof(len) | sizeof(val)) <= sizeof(u32) ? \ + in_range32(val, start, len) : in_range64(val, start, len)) + /** * swap - swap values of @a and @b * @a: first value diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index c238207d1615..e799b1f8d05b 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -31,17 +31,18 @@ struct misc_cg; * struct misc_res: Per cgroup per misc type resource * @max: Maximum limit on the resource. * @usage: Current usage of the resource. - * @failed: True if charged failed for the resource in a cgroup. + * @events: Number of times, the resource limit exceeded. */ struct misc_res { - unsigned long max; - atomic_long_t usage; - atomic_long_t events; + u64 max; + atomic64_t usage; + atomic64_t events; }; /** * struct misc_cg - Miscellaneous controller's cgroup structure. * @css: cgroup subsys state object. + * @events_file: Handle for the misc resources events file. * @res: Array of misc resources usage in the cgroup. */ struct misc_cg { @@ -53,12 +54,10 @@ struct misc_cg { struct misc_res res[MISC_CG_RES_TYPES]; }; -unsigned long misc_cg_res_total_usage(enum misc_res_type type); -int misc_cg_set_capacity(enum misc_res_type type, unsigned long capacity); -int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, - unsigned long amount); -void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, - unsigned long amount); +u64 misc_cg_res_total_usage(enum misc_res_type type); +int misc_cg_set_capacity(enum misc_res_type type, u64 capacity); +int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount); +void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount); /** * css_misc() - Get misc cgroup from the css. @@ -99,27 +98,26 @@ static inline void put_misc_cg(struct misc_cg *cg) #else /* !CONFIG_CGROUP_MISC */ -static inline unsigned long misc_cg_res_total_usage(enum misc_res_type type) +static inline u64 misc_cg_res_total_usage(enum misc_res_type type) { return 0; } -static inline int misc_cg_set_capacity(enum misc_res_type type, - unsigned long capacity) +static inline int misc_cg_set_capacity(enum misc_res_type type, u64 capacity) { return 0; } static inline int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, - unsigned long amount) + u64 amount) { return 0; } static inline void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, - unsigned long amount) + u64 amount) { } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6646634a0b9d..27f42f713c89 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -33,6 +33,7 @@ #ifndef MLX4_DEVICE_H #define MLX4_DEVICE_H +#include <linux/auxiliary_bus.h> #include <linux/if_ether.h> #include <linux/pci.h> #include <linux/completion.h> @@ -889,6 +890,12 @@ struct mlx4_dev { u8 uar_page_shift; }; +struct mlx4_adev { + struct auxiliary_device adev; + struct mlx4_dev *mdev; + int idx; +}; + struct mlx4_clock_params { u64 offset; u8 bar; @@ -1087,6 +1094,19 @@ static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) (offset & (PAGE_SIZE - 1)); } +static inline int mlx4_is_bonded(struct mlx4_dev *dev) +{ + return !!(dev->flags & MLX4_FLAG_BONDED); +} + +static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev) +{ + return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev)); +} + +int mlx4_queue_bond_work(struct mlx4_dev *dev, int is_bonded, u8 v2p_p1, + u8 v2p_p2); + int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 1834c8fad12e..69825223081f 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -34,8 +34,12 @@ #define MLX4_DRIVER_H #include <net/devlink.h> +#include <linux/auxiliary_bus.h> +#include <linux/notifier.h> #include <linux/mlx4/device.h> +#define MLX4_ADEV_NAME "mlx4_core" + struct mlx4_dev; #define MLX4_MAC_MASK 0xffffffffffffULL @@ -54,41 +58,19 @@ enum { MLX4_INTFF_BONDING = 1 << 0 }; -struct mlx4_interface { - void * (*add) (struct mlx4_dev *dev); - void (*remove)(struct mlx4_dev *dev, void *context); - void (*event) (struct mlx4_dev *dev, void *context, - enum mlx4_dev_event event, unsigned long param); - void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); - void (*activate)(struct mlx4_dev *dev, void *context); - struct list_head list; +struct mlx4_adrv { + struct auxiliary_driver adrv; enum mlx4_protocol protocol; int flags; }; -int mlx4_register_interface(struct mlx4_interface *intf); -void mlx4_unregister_interface(struct mlx4_interface *intf); - -int mlx4_bond(struct mlx4_dev *dev); -int mlx4_unbond(struct mlx4_dev *dev); -static inline int mlx4_is_bonded(struct mlx4_dev *dev) -{ - return !!(dev->flags & MLX4_FLAG_BONDED); -} - -static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev) -{ - return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev)); -} - -struct mlx4_port_map { - u8 port1; - u8 port2; -}; - -int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p); +int mlx4_register_auxiliary_driver(struct mlx4_adrv *madrv); +void mlx4_unregister_auxiliary_driver(struct mlx4_adrv *madrv); -void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port); +int mlx4_register_event_notifier(struct mlx4_dev *dev, + struct notifier_block *nb); +int mlx4_unregister_event_notifier(struct mlx4_dev *dev, + struct notifier_block *nb); struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 80cc12a9a531..01275c6e8468 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -364,6 +364,11 @@ enum mlx5_event { enum mlx5_driver_event { MLX5_DRIVER_EVENT_TYPE_TRAP = 0, MLX5_DRIVER_EVENT_UPLINK_NETDEV, + MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, + MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, }; enum { @@ -913,7 +918,7 @@ static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe) return (cqe->tls_outer_l3_tunneled >> 3) & 0x3; } -static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe) +static inline bool cqe_has_vlan(const struct mlx5_cqe64 *cqe) { return cqe->l4_l3_hdr_type & 0x1; } @@ -1208,9 +1213,7 @@ enum mlx5_cap_type { MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, - MLX5_CAP_RESERVED, - MLX5_CAP_VECTOR_CALC, - MLX5_CAP_QOS, + MLX5_CAP_QOS = 0xc, MLX5_CAP_DEBUG, MLX5_CAP_RESERVED_14, MLX5_CAP_DEV_MEM, @@ -1220,7 +1223,6 @@ enum mlx5_cap_type { MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, MLX5_CAP_CRYPTO = 0x1a, - MLX5_CAP_DEV_SHAMPO = 0x1d, MLX5_CAP_MACSEC = 0x1f, MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, @@ -1239,7 +1241,6 @@ enum mlx5_pcam_feature_groups { enum mlx5_mcam_reg_groups { MLX5_MCAM_REGS_FIRST_128 = 0x0, - MLX5_MCAM_REGS_0x9080_0x90FF = 0x1, MLX5_MCAM_REGS_0x9100_0x917F = 0x2, MLX5_MCAM_REGS_NUM = 0x3, }; @@ -1279,10 +1280,6 @@ enum mlx5_qcam_feature_groups { MLX5_GET(per_protocol_networking_offload_caps,\ mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->cur, cap) -#define MLX5_CAP_ETH_MAX(mdev, cap) \ - MLX5_GET(per_protocol_networking_offload_caps,\ - mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->max, cap) - #define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->caps.hca[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS]->cur, cap) @@ -1305,77 +1302,40 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP64_FLOWTABLE(mdev, cap) \ MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap) -#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ - MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->max, cap) - #define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap) -#define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) - #define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap) -#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap) - #define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap) -#define MLX5_CAP_FLOWTABLE_SNIFFER_RX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_sniffer.cap) - #define MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_sniffer.cap) -#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap) - #define MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_rdma.cap) -#define MLX5_CAP_FLOWTABLE_RDMA_RX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_rdma.cap) - #define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap) -#define MLX5_CAP_FLOWTABLE_RDMA_TX_MAX(mdev, cap) \ - MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_rdma.cap) - #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) -#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ - MLX5_GET(flow_table_eswitch_cap, \ - mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->max, cap) - #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) -#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ - MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) - #define MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_egress.cap) -#define MLX5_CAP_ESW_EGRESS_ACL_MAX(mdev, cap) \ - MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_egress.cap) - #define MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_ingress.cap) -#define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \ - MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap) - #define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, ft_field_support_2_esw_fdb.cap) -#define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2_MAX(mdev, cap) \ - MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, ft_field_support_2_esw_fdb.cap) - #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap) @@ -1384,10 +1344,6 @@ enum mlx5_qcam_feature_groups { MLX5_GET64(flow_table_eswitch_cap, \ (mdev)->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) -#define MLX5_CAP_ESW_MAX(mdev, cap) \ - MLX5_GET(e_switch_cap, \ - mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap) - #define MLX5_CAP_PORT_SELECTION(mdev, cap) \ MLX5_GET(port_selection_cap, \ mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, cap) @@ -1400,26 +1356,15 @@ enum mlx5_qcam_feature_groups { MLX5_GET(adv_virtualization_cap, \ mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap) -#define MLX5_CAP_ADV_VIRTUALIZATION_MAX(mdev, cap) \ - MLX5_GET(adv_virtualization_cap, \ - mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->max, cap) - #define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) -#define MLX5_CAP_FLOWTABLE_PORT_SELECTION_MAX(mdev, cap) \ - MLX5_CAP_PORT_SELECTION_MAX(mdev, flow_table_properties_port_selection.cap) - #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) #define MLX5_CAP_ODP_MAX(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap) -#define MLX5_CAP_VECTOR_CALC(mdev, cap) \ - MLX5_GET(vector_calc_cap, \ - mdev->caps.hca[MLX5_CAP_VECTOR_CALC]->cur, cap) - #define MLX5_CAP_QOS(mdev, cap)\ MLX5_GET(qos_cap, mdev->caps.hca[MLX5_CAP_QOS]->cur, cap) @@ -1436,10 +1381,6 @@ enum mlx5_qcam_feature_groups { MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_FIRST_128], \ mng_access_reg_cap_mask.access_regs.reg) -#define MLX5_CAP_MCAM_REG1(mdev, reg) \ - MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9080_0x90FF], \ - mng_access_reg_cap_mask.access_regs1.reg) - #define MLX5_CAP_MCAM_REG2(mdev, reg) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9100_0x917F], \ mng_access_reg_cap_mask.access_regs2.reg) @@ -1485,9 +1426,6 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_CRYPTO(mdev, cap)\ MLX5_GET(crypto_cap, (mdev)->caps.hca[MLX5_CAP_CRYPTO]->cur, cap) -#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\ - MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap) - #define MLX5_CAP_MACSEC(mdev, cap)\ MLX5_GET(macsec_cap, (mdev)->caps.hca[MLX5_CAP_MACSEC]->cur, cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 25d0528f9219..bf9324a31ae9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -134,6 +134,7 @@ enum { MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MTCAP = 0x9009, MLX5_REG_MTMP = 0x900A, MLX5_REG_MCIA = 0x9014, MLX5_REG_MFRL = 0x9028, @@ -149,11 +150,14 @@ enum { MLX5_REG_MTPPSE = 0x9054, MLX5_REG_MTUTC = 0x9055, MLX5_REG_MPEGC = 0x9056, + MLX5_REG_MPIR = 0x9059, MLX5_REG_MCQS = 0x9060, MLX5_REG_MCQI = 0x9061, MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, + MLX5_REG_MSECQ = 0x9155, + MLX5_REG_MSEES = 0x9156, MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, @@ -287,18 +291,23 @@ struct mlx5_cmd_stats { struct mlx5_cmd { struct mlx5_nb nb; + /* members which needs to be queried or reinitialized each reload */ + struct { + u16 cmdif_rev; + u8 log_sz; + u8 log_stride; + int max_reg_cmds; + unsigned long bitmask; + struct semaphore sem; + struct semaphore pages_sem; + struct semaphore throttle_sem; + } vars; enum mlx5_cmdif_state state; void *cmd_alloc_buf; dma_addr_t alloc_dma; int alloc_size; void *cmd_buf; dma_addr_t dma; - u16 cmdif_rev; - u8 log_sz; - u8 log_stride; - int max_reg_cmds; - int events; - u32 __iomem *vector; /* protect command queue allocations */ @@ -308,12 +317,8 @@ struct mlx5_cmd { */ spinlock_t token_lock; u8 token; - unsigned long bitmask; char wq_name[MLX5_CMD_WQ_MAX_NAME]; struct workqueue_struct *wq; - struct semaphore sem; - struct semaphore pages_sem; - struct semaphore throttle_sem; int mode; u16 allowed_opcode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; @@ -321,7 +326,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; + struct xarray stats; }; struct mlx5_cmd_mailbox { @@ -501,7 +506,7 @@ struct mlx5_events; struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; -struct mlx5_devcom; +struct mlx5_devcom_dev; struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; @@ -611,6 +616,7 @@ struct mlx5_priv { int adev_idx; int sw_vhca_id; struct mlx5_events *events; + struct mlx5_vhca_events *vhca_events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; @@ -618,7 +624,8 @@ struct mlx5_priv { struct mlx5_core_sriov sriov; struct mlx5_lag *lag; u32 flags; - struct mlx5_devcom *devcom; + struct mlx5_devcom_dev *devc; + struct mlx5_devcom_comp_dev *hca_devcom_comp; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; struct mlx5_fc_stats fc_stats; @@ -672,6 +679,9 @@ struct mlx5e_resources { struct mlx5_td td; u32 mkey; struct mlx5_sq_bfreg bfreg; +#define MLX5_MAX_NUM_TC 8 + u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC]; + bool tisn_valid; } hw_objs; struct net_device *uplink_netdev; struct mutex uplink_netdev_lock; @@ -682,6 +692,7 @@ enum mlx5_sw_icm_type { MLX5_SW_ICM_TYPE_STEERING, MLX5_SW_ICM_TYPE_HEADER_MODIFY, MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN, + MLX5_SW_ICM_TYPE_SW_ENCAP, }; #define MLX5_MAX_RESERVED_GIDS 8 @@ -725,7 +736,6 @@ struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_geneve; struct mlx5_hv_vhca; -struct mlx5_thermal; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) @@ -804,7 +814,16 @@ struct mlx5_core_dev { struct mlx5_rsc_dump *rsc_dump; u32 vsc_addr; struct mlx5_hv_vhca *hv_vhca; - struct mlx5_thermal *thermal; + struct mlx5_hwmon *hwmon; + u64 num_block_tc; + u64 num_block_ipsec; +#ifdef CONFIG_MLX5_MACSEC + struct mlx5_macsec_fs *macsec_fs; + /* MACsec notifier chain to sync MACsec core and IB database */ + struct blocking_notifier_head macsec_nh; +#endif + u64 num_ipsec_offloads; + struct mlx5_sd *sd; }; struct mlx5_db { @@ -1018,7 +1037,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data); + void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); @@ -1029,10 +1049,6 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); -struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, - gfp_t flags, int npages); -void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, - struct mlx5_cmd_mailbox *head); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey); @@ -1046,8 +1062,6 @@ void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev); -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1055,7 +1069,7 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); +int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); @@ -1087,8 +1101,6 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); __be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); -int mlx5_query_odp_caps(struct mlx5_core_dev *dev, - struct mlx5_odp_caps *odp_caps); int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); @@ -1105,9 +1117,8 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); -unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev); -struct cpumask * -mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector); +unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev); +int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, @@ -1191,12 +1202,6 @@ int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev, void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev, int vf_id, struct notifier_block *nb); -#ifdef CONFIG_MLX5_CORE_IPOIB -struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)); -#endif /* CONFIG_MLX5_CORE_IPOIB */ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, struct ib_device *device, struct rdma_netdev_alloc_params *params); @@ -1320,6 +1325,52 @@ static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) return mlx5_is_roce_on(dev); } +#ifdef CONFIG_MLX5_MACSEC +static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev) +{ + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload)) + return false; + + if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) || + !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec)) + return false; + + if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) || + !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) && + !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt)) + return false; + + if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) && + !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt)) + return false; + + return true; +} + +#define NIC_RDMA_BOTH_DIRS_CAPS (MLX5_FT_NIC_RX_2_NIC_RX_RDMA | MLX5_FT_NIC_TX_RDMA_2_NIC_TX) + +static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) +{ + if (((MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & + NIC_RDMA_BOTH_DIRS_CAPS) != NIC_RDMA_BOTH_DIRS_CAPS) || + !MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, max_modify_header_actions) || + !mlx5e_is_macsec_device(mdev) || !mdev->macsec_fs) + return false; + + return true; +} +#endif + enum { MLX5_OCTWORD = 16, }; diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index e2701ed0200e..df73a2ccc9af 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -7,6 +7,7 @@ #define _MLX5_ESWITCH_ #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <net/devlink.h> #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) @@ -144,6 +145,9 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \ ESW_TUN_OPTS_OFFSET + 1) +/* reuse tun_opts for the mapped ipsec obj id when tun_id is 0 (invalid) */ +#define ESW_IPSEC_RX_MAPPED_ID_MASK GENMASK(ESW_TUN_OPTS_BITS - 1, 0) + u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); @@ -207,4 +211,11 @@ static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; } +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + #endif diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 2cb404c7ea13..3fb428ce7d1c 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -67,6 +67,7 @@ enum { MLX5_FLOW_TABLE_TERMINATION = BIT(2), MLX5_FLOW_TABLE_UNMANAGED = BIT(3), MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4), + MLX5_FLOW_TABLE_UPLINK_VPORT = BIT(5), }; #define LEFTOVERS_RULE_NUM 2 @@ -105,15 +106,19 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS, MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC, MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC, + MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC, + MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC, }; enum { FDB_BYPASS_PATH, + FDB_CRYPTO_INGRESS, FDB_TC_OFFLOAD, FDB_FT_OFFLOAD, FDB_TC_MISS, FDB_BR_OFFLOAD, FDB_SLOW_PATH, + FDB_CRYPTO_EGRESS, FDB_PER_VPORT, }; @@ -127,6 +132,7 @@ struct mlx5_flow_handle; enum { FLOW_CONTEXT_HAS_TAG = BIT(0), + FLOW_CONTEXT_UPLINK_HAIRPIN_EN = BIT(1), }; struct mlx5_flow_context { diff --git a/include/linux/mlx5/macsec.h b/include/linux/mlx5/macsec.h new file mode 100644 index 000000000000..f7ff4c2a95d0 --- /dev/null +++ b/include/linux/mlx5/macsec.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef MLX5_MACSEC_H +#define MLX5_MACSEC_H + +#ifdef CONFIG_MLX5_MACSEC +struct mlx5_macsec_event_data { + struct mlx5_macsec_fs *macsec_fs; + void *macdev; + u32 fs_id; + bool is_tx; +}; + +int mlx5_macsec_add_roce_rule(void *macdev, const struct sockaddr *addr, u16 gid_idx, + struct list_head *tx_rules_list, struct list_head *rx_rules_list, + struct mlx5_macsec_fs *macsec_fs); + +void mlx5_macsec_del_roce_rule(u16 gid_idx, struct mlx5_macsec_fs *macsec_fs, + struct list_head *tx_rules_list, struct list_head *rx_rules_list); + +void mlx5_macsec_add_roce_sa_rules(u32 fs_id, const struct sockaddr *addr, u16 gid_idx, + struct list_head *tx_rules_list, + struct list_head *rx_rules_list, + struct mlx5_macsec_fs *macsec_fs, bool is_tx); + +void mlx5_macsec_del_roce_sa_rules(u32 fs_id, struct mlx5_macsec_fs *macsec_fs, + struct list_head *tx_rules_list, + struct list_head *rx_rules_list, bool is_tx); + +#endif +#endif /* MLX5_MACSEC_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 33344a71c3e3..c940b329a475 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -65,9 +65,11 @@ enum { enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ETHERNET_OFFLOADS = 0x1, MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, + MLX5_SET_HCA_CAP_OP_MOD_IPSEC = 0x15, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION = 0x25, }; @@ -310,6 +312,7 @@ enum { MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, + MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, MLX5_CMD_OP_MAX }; @@ -432,7 +435,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 flow_table_modify[0x1]; u8 reformat[0x1]; u8 decap[0x1]; - u8 reserved_at_9[0x1]; + u8 reset_root_to_default[0x1]; u8 pop_vlan[0x1]; u8 push_vlan[0x1]; u8 reserved_at_c[0x1]; @@ -464,10 +467,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reformat_add_esp_trasport[0x1]; u8 reformat_l2_to_l3_esp_tunnel[0x1]; - u8 reserved_at_42[0x1]; + u8 reformat_add_esp_transport_over_udp[0x1]; u8 reformat_del_esp_trasport[0x1]; u8 reformat_l3_esp_tunnel_to_l2[0x1]; - u8 reserved_at_45[0x1]; + u8 reformat_del_esp_transport_over_udp[0x1]; u8 execute_aso[0x1]; u8 reserved_at_47[0x19]; @@ -618,7 +621,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_140[0x8]; u8 bth_dst_qp[0x18]; - u8 reserved_at_160[0x20]; + u8 inner_esp_spi[0x20]; u8 outer_esp_spi[0x20]; u8 reserved_at_1a0[0x60]; }; @@ -1100,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x1]; + u8 roce_cc_general[0x1]; u8 qp_ooo_transmit_default[0x1]; u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; @@ -1190,7 +1193,8 @@ struct mlx5_ifc_device_mem_cap_bits { u8 log_sw_icm_alloc_granularity[0x6]; u8 log_steering_sw_icm_size[0x8]; - u8 reserved_at_120[0x18]; + u8 log_indirect_encap_sw_icm_size[0x8]; + u8 reserved_at_128[0x10]; u8 log_header_modify_pattern_sw_icm_size[0x8]; u8 header_modify_sw_icm_start_address[0x40]; @@ -1201,7 +1205,11 @@ struct mlx5_ifc_device_mem_cap_bits { u8 memic_operations[0x20]; - u8 reserved_at_220[0x5e0]; + u8 reserved_at_220[0x20]; + + u8 indirect_encap_sw_icm_start_address[0x40]; + + u8 reserved_at_280[0x580]; }; struct mlx5_ifc_device_event_cap_bits { @@ -1229,7 +1237,14 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 max_emulated_devices[0x8]; u8 max_num_virtio_queues[0x18]; - u8 reserved_at_a0[0x60]; + u8 reserved_at_a0[0x20]; + + u8 reserved_at_c0[0x13]; + u8 desc_group_mkey_supported[0x1]; + u8 freeze_to_rdy_supported[0x1]; + u8 reserved_at_d5[0xb]; + + u8 reserved_at_e0[0x20]; u8 umem_1_buffer_param_a[0x20]; @@ -1314,33 +1329,6 @@ struct mlx5_ifc_odp_cap_bits { u8 reserved_at_120[0x6E0]; }; -struct mlx5_ifc_calc_op { - u8 reserved_at_0[0x10]; - u8 reserved_at_10[0x9]; - u8 op_swap_endianness[0x1]; - u8 op_min[0x1]; - u8 op_xor[0x1]; - u8 op_or[0x1]; - u8 op_and[0x1]; - u8 op_max[0x1]; - u8 op_add[0x1]; -}; - -struct mlx5_ifc_vector_calc_cap_bits { - u8 calc_matrix[0x1]; - u8 reserved_at_1[0x1f]; - u8 reserved_at_20[0x8]; - u8 max_vec_count[0x8]; - u8 reserved_at_30[0xd]; - u8 max_chunk_size[0x3]; - struct mlx5_ifc_calc_op calc0; - struct mlx5_ifc_calc_op calc1; - struct mlx5_ifc_calc_op calc2; - struct mlx5_ifc_calc_op calc3; - - u8 reserved_at_c0[0x720]; -}; - struct mlx5_ifc_tls_cap_bits { u8 tls_1_2_aes_gcm_128[0x1]; u8 tls_1_3_aes_gcm_128[0x1]; @@ -1819,7 +1807,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 disable_local_lb_uc[0x1]; u8 disable_local_lb_mc[0x1]; u8 log_min_hairpin_wq_data_sz[0x5]; - u8 reserved_at_3e8[0x2]; + u8 reserved_at_3e8[0x1]; + u8 silent_mode[0x1]; u8 vhca_state[0x1]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; @@ -1836,7 +1825,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_460[0x1]; u8 ats[0x1]; - u8 reserved_at_462[0x1]; + u8 cross_vhca_rqt[0x1]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x1]; u8 crypto[0x1]; @@ -1959,6 +1948,15 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 match_definer_format_supported[0x40]; }; +enum { + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_TO_REMOTE_FLOW_TABLE_MISS = 0x80000, + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_ROOT_TO_REMOTE_FLOW_TABLE = (1ULL << 20), +}; + +enum { + MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE = 0x200, +}; + struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; @@ -1973,9 +1971,15 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x8]; u8 migration_multi_load[0x1]; u8 migration_tracking_state[0x1]; - u8 reserved_at_ca[0x16]; + u8 reserved_at_ca[0x6]; + u8 migration_in_chunks[0x1]; + u8 reserved_at_d1[0xf]; + + u8 cross_vhca_object_to_object_supported[0x20]; - u8 reserved_at_e0[0xc0]; + u8 allowed_object_for_other_vhca_access[0x40]; + + u8 reserved_at_140[0x60]; u8 flow_table_type_2_type[0x8]; u8 reserved_at_1a8[0x3]; @@ -1996,7 +2000,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_260[0x120]; u8 reserved_at_380[0x10]; u8 ec_vf_vport_base[0x10]; - u8 reserved_at_3a0[0x460]; + + u8 reserved_at_3a0[0x10]; + u8 max_rqt_vhca_id[0x10]; + + u8 reserved_at_3c0[0x440]; }; enum mlx5_ifc_flow_destination_type { @@ -2155,6 +2163,13 @@ struct mlx5_ifc_rq_num_bits { u8 rq_num[0x18]; }; +struct mlx5_ifc_rq_vhca_bits { + u8 reserved_at_0[0x8]; + u8 rq_num[0x18]; + u8 reserved_at_20[0x10]; + u8 rq_vhca_id[0x10]; +}; + struct mlx5_ifc_mac_address_layout_bits { u8 reserved_at_0[0x10]; u8 mac_addr_47_32[0x10]; @@ -3435,20 +3450,6 @@ struct mlx5_ifc_roce_addr_layout_bits { u8 reserved_at_e0[0x20]; }; -struct mlx5_ifc_shampo_cap_bits { - u8 reserved_at_0[0x3]; - u8 shampo_log_max_reservation_size[0x5]; - u8 reserved_at_8[0x3]; - u8 shampo_log_min_reservation_size[0x5]; - u8 shampo_min_mss_size[0x10]; - - u8 reserved_at_20[0x3]; - u8 shampo_max_log_headers_entry_size[0x5]; - u8 reserved_at_28[0x18]; - - u8 reserved_at_40[0x7c0]; -}; - struct mlx5_ifc_crypto_cap_bits { u8 reserved_at_0[0x3]; u8 synchronize_dek[0x1]; @@ -3484,16 +3485,15 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_port_selection_cap_bits port_selection_cap; - struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; struct mlx5_ifc_qos_cap_bits qos_cap; struct mlx5_ifc_debug_cap_bits debug_cap; struct mlx5_ifc_fpga_cap_bits fpga_cap; struct mlx5_ifc_tls_cap_bits tls_cap; struct mlx5_ifc_device_mem_cap_bits device_mem_cap; struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; - struct mlx5_ifc_shampo_cap_bits shampo_cap; struct mlx5_ifc_macsec_cap_bits macsec_cap; struct mlx5_ifc_crypto_cap_bits crypto_cap; + struct mlx5_ifc_ipsec_cap_bits ipsec_cap; u8 reserved_at_0[0x8000]; }; @@ -3576,7 +3576,7 @@ struct mlx5_ifc_flow_context_bits { u8 action[0x10]; u8 extended_destination[0x1]; - u8 reserved_at_81[0x1]; + u8 uplink_hairpin_en[0x1]; u8 flow_source[0x2]; u8 encrypt_decrypt_type[0x4]; u8 destination_list_size[0x18]; @@ -3920,7 +3920,10 @@ struct mlx5_ifc_rqtc_bits { u8 reserved_at_e0[0x6a0]; - struct mlx5_ifc_rq_num_bits rq_num[]; + union { + DECLARE_FLEX_ARRAY(struct mlx5_ifc_rq_num_bits, rq_num); + DECLARE_FLEX_ARRAY(struct mlx5_ifc_rq_vhca_bits, rq_vhca); + }; }; enum { @@ -4033,8 +4036,13 @@ struct mlx5_ifc_nic_vport_context_bits { u8 affiliation_criteria[0x4]; u8 affiliated_vhca_id[0x10]; - u8 reserved_at_60[0xd0]; + u8 reserved_at_60[0xa0]; + + u8 reserved_at_100[0x1]; + u8 sd_group[0x3]; + u8 reserved_at_104[0x1c]; + u8 reserved_at_120[0x10]; u8 mtu[0x10]; u8 system_image_guid[0x40]; @@ -4763,7 +4771,10 @@ struct mlx5_ifc_set_l2_table_entry_in_bits { u8 reserved_at_c0[0x20]; - u8 reserved_at_e0[0x13]; + u8 reserved_at_e0[0x10]; + u8 silent_mode_valid[0x1]; + u8 silent_mode[0x1]; + u8 reserved_at_f2[0x1]; u8 vlan_valid[0x1]; u8 vlan[0xc]; @@ -6409,6 +6420,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_allow_other_vhca_access_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + u8 reserved_at_40[0x50]; + u8 object_type_to_be_accessed[0x10]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_c0[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; +}; + +struct mlx5_ifc_allow_other_vhca_access_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + u8 syndrome[0x20]; + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_modify_header_arg_bits { u8 reserved_at_0[0x80]; @@ -6431,6 +6464,24 @@ struct mlx5_ifc_create_match_definer_out_bits { struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; }; +struct mlx5_ifc_alias_context_bits { + u8 vhca_id_to_be_accessed[0x10]; + u8 reserved_at_10[0xd]; + u8 status[0x3]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_40[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; + u8 metadata[0x80]; +}; + +struct mlx5_ifc_create_alias_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_alias_context_bits alias_ctx; +}; + enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -6665,9 +6716,12 @@ enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5, MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6, + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4 = 0x7, MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8, MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9, + MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP = 0xa, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb, + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6 = 0xc, MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf, MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10, MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11, @@ -10065,6 +10119,19 @@ struct mlx5_ifc_mpegc_reg_bits { u8 reserved_at_60[0x100]; }; +struct mlx5_ifc_mpir_reg_bits { + u8 sdm[0x1]; + u8 reserved_at_1[0x1b]; + u8 host_buses[0x4]; + + u8 reserved_at_20[0x20]; + + u8 local_port[0x8]; + u8 reserved_at_28[0x18]; + + u8 reserved_at_60[0x20]; +}; + enum { MLX5_MTUTC_FREQ_ADJ_UNITS_PPB = 0x0, MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM = 0x1, @@ -10079,7 +10146,10 @@ enum { struct mlx5_ifc_mtutc_reg_bits { u8 reserved_at_0[0x5]; u8 freq_adj_units[0x3]; - u8 reserved_at_8[0x14]; + u8 reserved_at_8[0x3]; + u8 log_max_freq_adjustment[0x5]; + + u8 reserved_at_10[0xc]; u8 operation[0x4]; u8 freq_adjustment[0x20]; @@ -10183,7 +10253,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 mcqi[0x1]; u8 mcqs[0x1]; - u8 regs_95_to_87[0x9]; + u8 regs_95_to_90[0x6]; + u8 mpir[0x1]; + u8 regs_88_to_87[0x2]; u8 mpegc[0x1]; u8 mtutc[0x1]; u8 regs_84_to_68[0x11]; @@ -10191,9 +10263,13 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_63_to_46[0x12]; u8 mrtc[0x1]; - u8 regs_44_to_32[0xd]; + u8 regs_44_to_41[0x4]; + u8 mfrl[0x1]; + u8 regs_39_to_32[0x8]; - u8 regs_31_to_0[0x20]; + u8 regs_31_to_10[0x16]; + u8 mtmp[0x1]; + u8 regs_8_to_0[0x9]; }; struct mlx5_ifc_mcam_access_reg_bits1 { @@ -10211,7 +10287,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 { u8 mirc[0x1]; u8 regs_97_to_96[0x2]; - u8 regs_95_to_64[0x20]; + u8 regs_95_to_87[0x09]; + u8 synce_registers[0x2]; + u8 regs_84_to_64[0x15]; u8 regs_63_to_32[0x20]; @@ -10587,6 +10665,7 @@ enum { MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0, MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1, MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2, + MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET = 0x7, }; enum { @@ -10607,6 +10686,7 @@ enum { MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe, MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12, }; struct mlx5_ifc_initial_seg_bits { @@ -10853,8 +10933,9 @@ enum { MLX5_MFRL_REG_RESET_STATE_IDLE = 0, MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1, MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2, - MLX5_MFRL_REG_RESET_STATE_TIMEOUT = 3, + MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT = 3, MLX5_MFRL_REG_RESET_STATE_NACK = 4, + MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT = 5, }; enum { @@ -10946,6 +11027,15 @@ struct mlx5_ifc_mrtc_reg_bits { u8 time_l[0x20]; }; +struct mlx5_ifc_mtcap_reg_bits { + u8 reserved_at_0[0x19]; + u8 sensor_count[0x7]; + + u8 reserved_at_20[0x20]; + + u8 sensor_map[0x40]; +}; + struct mlx5_ifc_mtmp_reg_bits { u8 reserved_at_0[0x14]; u8 sensor_index[0xc]; @@ -11033,6 +11123,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mfrl_reg_bits mfrl_reg; struct mlx5_ifc_mtutc_reg_bits mtutc_reg; struct mlx5_ifc_mrtc_reg_bits mrtc_reg; + struct mlx5_ifc_mtcap_reg_bits mtcap_reg; struct mlx5_ifc_mtmp_reg_bits mtmp_reg; u8 reserved_at_0[0x60e0]; }; @@ -11941,6 +12032,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; enum { @@ -11960,6 +12052,13 @@ enum { MLX5_IPSEC_ASO_INC_SN = 0x2, }; +enum { + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT = 0x0, + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT = 0x1, + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2, + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3, +}; + struct mlx5_ifc_ipsec_aso_bits { u8 valid[0x1]; u8 reserved_at_201[0x1]; @@ -12416,7 +12515,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 op_mod[0x10]; u8 incremental[0x1]; - u8 reserved_at_41[0xf]; + u8 chunk[0x1]; + u8 reserved_at_42[0xe]; u8 vhca_id[0x10]; u8 reserved_at_60[0x20]; @@ -12432,7 +12532,11 @@ struct mlx5_ifc_query_vhca_migration_state_out_bits { u8 required_umem_size[0x20]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x20]; + + u8 remaining_total_size[0x40]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_save_vhca_state_in_bits { @@ -12464,7 +12568,7 @@ struct mlx5_ifc_save_vhca_state_out_bits { u8 actual_image_size[0x20]; - u8 reserved_at_60[0x20]; + u8 next_required_umem_size[0x20]; }; struct mlx5_ifc_load_vhca_state_in_bits { @@ -12573,4 +12677,72 @@ struct mlx5_ifc_modify_page_track_obj_in_bits { struct mlx5_ifc_page_track_bits obj_context; }; +struct mlx5_ifc_query_page_track_obj_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + struct mlx5_ifc_page_track_bits obj_context; +}; + +struct mlx5_ifc_msecq_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x12]; + u8 network_option[0x2]; + u8 local_ssm_code[0x4]; + u8 local_enhanced_ssm_code[0x8]; + + u8 local_clock_identity[0x40]; + + u8 reserved_at_80[0x180]; +}; + +enum { + MLX5_MSEES_FIELD_SELECT_ENABLE = BIT(0), + MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS = BIT(1), + MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE = BIT(2), +}; + +enum mlx5_msees_admin_status { + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_ADMIN_STATUS_TRACK = 0x1, +}; + +enum mlx5_msees_oper_status { + MLX5_MSEES_OPER_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_OPER_STATUS_SELF_TRACK = 0x1, + MLX5_MSEES_OPER_STATUS_OTHER_TRACK = 0x2, + MLX5_MSEES_OPER_STATUS_HOLDOVER = 0x3, + MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER = 0x4, + MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5, +}; + +enum mlx5_msees_failure_reason { + MLX5_MSEES_FAILURE_REASON_UNDEFINED_ERROR = 0x0, + MLX5_MSEES_FAILURE_REASON_PORT_DOWN = 0x1, + MLX5_MSEES_FAILURE_REASON_TOO_HIGH_FREQUENCY_DIFF = 0x2, + MLX5_MSEES_FAILURE_REASON_NET_SYNCHRONIZER_DEVICE_ERROR = 0x3, + MLX5_MSEES_FAILURE_REASON_LACK_OF_RESOURCES = 0x4, +}; + +struct mlx5_ifc_msees_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 lp_msb[0x2]; + u8 reserved_at_14[0xc]; + + u8 field_select[0x20]; + + u8 admin_status[0x4]; + u8 oper_status[0x4]; + u8 ho_acq[0x1]; + u8 reserved_at_49[0xc]; + u8 admin_freq_measure[0x1]; + u8 oper_freq_measure[0x1]; + u8 failure_reason[0x9]; + + u8 frequency_diff[0x20]; + + u8 reserved_at_80[0x180]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 9becdc3fa503..40371c916cf9 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -74,7 +74,11 @@ struct mlx5_ifc_virtio_q_bits { u8 reserved_at_320[0x8]; u8 pd[0x18]; - u8 reserved_at_340[0xc0]; + u8 reserved_at_340[0x20]; + + u8 desc_group_mkey[0x20]; + + u8 reserved_at_380[0x80]; }; struct mlx5_ifc_virtio_net_q_object_bits { @@ -141,6 +145,11 @@ enum { MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS = (u64)1 << 6, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX = (u64)1 << 7, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX = (u64)1 << 8, + MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY = (u64)1 << 11, + MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, }; enum { diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 98b2e1e149f9..26092c78a985 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -115,8 +115,9 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, - MLX5E_400GAUI_8 = 15, + MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, MLX5E_EXT_LINK_MODES_NUMBER, }; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index bd53cf4be7bd..f0e55bf3ec8b 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index fbb9bf447889..c36cc6d82926 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -72,6 +72,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); +int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, u16 vport, u64 node_guid); diff --git a/include/linux/mm.h b/include/linux/mm.h index 34f9dba17c1a..b6bdaa18b9e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -36,6 +36,7 @@ struct anon_vma; struct anon_vma_chain; struct user_struct; struct pt_regs; +struct folio_batch; extern int sysctl_page_lock_unfairness; @@ -86,7 +87,7 @@ extern int sysctl_legacy_va_layout; #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS extern const int mmap_rnd_bits_min; -extern const int mmap_rnd_bits_max; +extern int mmap_rnd_bits_max __ro_after_init; extern int mmap_rnd_bits __read_mostly; #endif #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS @@ -226,7 +227,6 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) -#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) static inline struct folio *lru_to_folio(struct list_head *head) { return list_entry((head)->prev, struct folio, lru); @@ -319,11 +319,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) +#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS @@ -339,14 +341,27 @@ extern unsigned int kobjsize(const void *objp); #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ +#ifdef CONFIG_X86_USER_SHADOW_STACK +/* + * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of + * support core mm. + * + * These VMAs will get a single end guard page. This helps userspace protect + * itself from attacks. A single page is enough for current shadow stack archs + * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c + * for more details on the guard size. + */ +# define VM_SHADOW_STACK VM_HIGH_ARCH_5 +#else +# define VM_SHADOW_STACK VM_NONE +#endif + #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ #elif defined(CONFIG_PPC) # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 -#elif defined(CONFIG_IA64) -# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_SPARC64) # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ # define VM_ARCH_CLEAR VM_SPARC_ADI @@ -370,12 +385,26 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 37 +# define VM_UFFD_MINOR_BIT 38 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ +/* + * This flag is used to connect VFIO to arch specific KVM code. It + * indicates that the memory under this VMA is safe for use with any + * non-cachable memory type inside KVM. Some VFIO devices, on some + * platforms, are thought to be unsafe and can cause machine crashes + * if KVM does not lock down the memory type. + */ +#ifdef CONFIG_64BIT +#define VM_ALLOW_ANY_UNCACHED_BIT 39 +#define VM_ALLOW_ANY_UNCACHED BIT(VM_ALLOW_ANY_UNCACHED_BIT) +#else +#define VM_ALLOW_ANY_UNCACHED VM_NONE +#endif + /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) @@ -397,6 +426,8 @@ extern unsigned int kobjsize(const void *objp); #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif +#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) + #ifdef CONFIG_STACK_GROWSUP #define VM_STACK VM_GROWSUP #define VM_STACK_EARLY VM_GROWSDOWN @@ -532,13 +563,6 @@ struct vm_fault { */ }; -/* page entry size for vm->huge_fault() */ -enum page_entry_size { - PE_SIZE_PTE = 0, - PE_SIZE_PMD, - PE_SIZE_PUD, -}; - /* * These are the virtual MM functions - opening of an area, closing and * unmapping it (needed to keep files on disk up-to-date etc), pointer @@ -562,8 +586,7 @@ struct vm_operations_struct { int (*mprotect)(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long newflags); vm_fault_t (*fault)(struct vm_fault *vmf); - vm_fault_t (*huge_fault)(struct vm_fault *vmf, - enum page_entry_size pe_size); + vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); vm_fault_t (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); unsigned long (*pagesize)(struct vm_area_struct * area); @@ -608,7 +631,7 @@ struct vm_operations_struct { * policy. */ struct mempolicy *(*get_policy)(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, pgoff_t *ilx); #endif /* * Called by vm_normal_page() for special PTEs to find the @@ -679,6 +702,7 @@ static inline void vma_end_read(struct vm_area_struct *vma) rcu_read_unlock(); } +/* WARNING! Can only be used if mmap_lock is expected to be write-locked */ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) { mmap_assert_write_locked(vma->vm_mm); @@ -691,6 +715,11 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) return (vma->vm_lock_seq == *mm_lock_seq); } +/* + * Begin writing to a VMA. + * Exclude concurrent readers under the per-VMA lock until the currently + * write-locked mmap_lock is dropped or downgraded. + */ static inline void vma_start_write(struct vm_area_struct *vma) { int mm_lock_seq; @@ -709,26 +738,17 @@ static inline void vma_start_write(struct vm_area_struct *vma) up_write(&vma->vm_lock->lock); } -static inline bool vma_try_start_write(struct vm_area_struct *vma) +static inline void vma_assert_write_locked(struct vm_area_struct *vma) { int mm_lock_seq; - if (__is_vma_write_locked(vma, &mm_lock_seq)) - return true; - - if (!down_write_trylock(&vma->vm_lock->lock)) - return false; - - WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); - up_write(&vma->vm_lock->lock); - return true; + VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); } -static inline void vma_assert_write_locked(struct vm_area_struct *vma) +static inline void vma_assert_locked(struct vm_area_struct *vma) { - int mm_lock_seq; - - VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); + if (!rwsem_is_locked(&vma->vm_lock->lock)) + vma_assert_write_locked(vma); } static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) @@ -739,6 +759,22 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) vma->detached = detached; } +static inline void release_fault_lock(struct vm_fault *vmf) +{ + if (vmf->flags & FAULT_FLAG_VMA_LOCK) + vma_end_read(vmf->vma); + else + mmap_read_unlock(vmf->vma->vm_mm); +} + +static inline void assert_fault_locked(struct vm_fault *vmf) +{ + if (vmf->flags & FAULT_FLAG_VMA_LOCK) + vma_assert_locked(vmf->vma); + else + mmap_assert_locked(vmf->vma->vm_mm); +} + struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address); @@ -748,25 +784,45 @@ static inline bool vma_start_read(struct vm_area_struct *vma) { return false; } static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} -static inline bool vma_try_start_write(struct vm_area_struct *vma) - { return true; } -static inline void vma_assert_write_locked(struct vm_area_struct *vma) {} +static inline void vma_assert_write_locked(struct vm_area_struct *vma) + { mmap_assert_write_locked(vma->vm_mm); } static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) {} +static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address) +{ + return NULL; +} + +static inline void vma_assert_locked(struct vm_area_struct *vma) +{ + mmap_assert_locked(vma->vm_mm); +} + +static inline void release_fault_lock(struct vm_fault *vmf) +{ + mmap_read_unlock(vmf->vma->vm_mm); +} + +static inline void assert_fault_locked(struct vm_fault *vmf) +{ + mmap_assert_locked(vmf->vma->vm_mm); +} + #endif /* CONFIG_PER_VMA_LOCK */ +extern const struct vm_operations_struct vma_dummy_vm_ops; + /* * WARNING: vma_init does not initialize vma->vm_lock. * Use vm_area_alloc()/vm_area_free() if vma needs locking. */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { - static const struct vm_operations_struct dummy_vm_ops = {}; - memset(vma, 0, sizeof(*vma)); vma->vm_mm = mm; - vma->vm_ops = &dummy_vm_ops; + vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); vma_mark_detached(vma, false); vma_numab_state_init(vma); @@ -779,18 +835,22 @@ static inline void vm_flags_init(struct vm_area_struct *vma, ACCESS_PRIVATE(vma, __vm_flags) = flags; } -/* Use when VMA is part of the VMA tree and modifications need coordination */ +/* + * Use when VMA is part of the VMA tree and modifications need coordination + * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and + * it should be locked explicitly beforehand. + */ static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_t flags) { - vma_start_write(vma); + vma_assert_write_locked(vma); vm_flags_init(vma, flags); } static inline void vm_flags_reset_once(struct vm_area_struct *vma, vm_flags_t flags) { - vma_start_write(vma); + vma_assert_write_locked(vma); WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); } @@ -839,6 +899,31 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) return !vma->vm_ops; } +/* + * Indicate if the VMA is a heap for the given task; for + * /proc/PID/maps that is the heap of the main task. + */ +static inline bool vma_is_initial_heap(const struct vm_area_struct *vma) +{ + return vma->vm_start < vma->vm_mm->brk && + vma->vm_end > vma->vm_mm->start_brk; +} + +/* + * Indicate if the VMA is a stack for the given task; for + * /proc/PID/maps that is the stack of the main task. + */ +static inline bool vma_is_initial_stack(const struct vm_area_struct *vma) +{ + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; +} + static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) { int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); @@ -869,6 +954,17 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) return vma->vm_flags & VM_ACCESS_FLAGS; } +static inline bool is_shared_maywrite(vm_flags_t vm_flags) +{ + return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == + (VM_SHARED | VM_MAYWRITE); +} + +static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) +{ + return is_shared_maywrite(vma->vm_flags); +} + static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) { @@ -917,6 +1013,17 @@ static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi, return mas_expected_entries(&vmi->mas, count); } +static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, + unsigned long start, unsigned long end, gfp_t gfp) +{ + __mas_set_range(&vmi->mas, start, end - 1); + mas_store_gfp(&vmi->mas, NULL, gfp); + if (unlikely(mas_is_err(&vmi->mas))) + return -ENOMEM; + + return 0; +} + /* Free any unused preallocations */ static inline void vma_iter_free(struct vma_iterator *vmi) { @@ -976,7 +1083,7 @@ struct inode; * compound_order() can be called without holding a reference, which means * that niceties like page_folio() don't work. These callers should be * prepared to handle wild return values. For example, PG_head may be - * set before _folio_order is initialised, or this may be a tail page. + * set before the order is initialised, or this may be a tail page. * See compaction.c for some good examples. */ static inline unsigned int compound_order(struct page *page) @@ -985,7 +1092,7 @@ static inline unsigned int compound_order(struct page *page) if (!test_bit(PG_head, &folio->flags)) return 0; - return folio->_folio_order; + return folio->_flags_1 & 0xff; } /** @@ -1001,7 +1108,7 @@ static inline unsigned int folio_order(struct folio *folio) { if (!folio_test_large(folio)) return 0; - return folio->_folio_order; + return folio->_flags_1 & 0xff; } #include <linux/huge_mm.h> @@ -1072,11 +1179,6 @@ unsigned long vmalloc_to_pfn(const void *addr); * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there * is no special casing required. */ - -#ifndef is_ioremap_addr -#define is_ioremap_addr(x) is_vmalloc_addr(x) -#endif - #ifdef CONFIG_MMU extern bool is_vmalloc_addr(const void *x); extern int is_vmalloc_or_module_addr(const void *x); @@ -1095,7 +1197,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) * How many times the entire folio is mapped as a single unit (eg by a * PMD or PUD entry). This is probably not what you want, except for * debugging purposes - it does not include PTE-mapped sub-pages; look - * at folio_mapcount() or page_mapcount() or total_mapcount() instead. + * at folio_mapcount() or page_mapcount() instead. */ static inline int folio_entire_mapcount(struct folio *folio) { @@ -1121,14 +1223,16 @@ static inline void page_mapcount_reset(struct page *page) * a large folio, it includes the number of times this page is mapped * as part of that folio. * - * The result is undefined for pages which cannot be mapped into userspace. - * For example SLAB or special types of pages. See function page_has_type(). - * They use this field in struct page differently. + * Will report 0 for pages which cannot be mapped into userspace, eg + * slab, page tables and similar. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < 0) + mapcount = 0; if (unlikely(PageCompound(page))) mapcount += folio_entire_mapcount(page_folio(page)); @@ -1155,13 +1259,6 @@ static inline int folio_mapcount(struct folio *folio) return folio_total_mapcount(folio); } -static inline int total_mapcount(struct page *page) -{ - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) + 1; - return folio_total_mapcount(page_folio(page)); -} - static inline bool folio_large_is_mapped(struct folio *folio) { /* @@ -1220,33 +1317,6 @@ void folio_copy(struct folio *dst, struct folio *src); unsigned long nr_free_buffer_pages(void); -/* - * Compound pages have a destructor function. Provide a - * prototype for that function and accessor functions. - * These are _only_ valid on the head of a compound page. - */ -typedef void compound_page_dtor(struct page *); - -/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */ -enum compound_dtor_id { - NULL_COMPOUND_DTOR, - COMPOUND_PAGE_DTOR, -#ifdef CONFIG_HUGETLB_PAGE - HUGETLB_PAGE_DTOR, -#endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - TRANSHUGE_PAGE_DTOR, -#endif - NR_COMPOUND_DTORS, -}; - -static inline void folio_set_compound_dtor(struct folio *folio, - enum compound_dtor_id compound_dtor) -{ - VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio); - folio->_folio_dtor = compound_dtor; -} - void destroy_large_folio(struct folio *folio); /* Returns the number of bytes in this potentially compound page. */ @@ -1282,8 +1352,6 @@ static inline unsigned long thp_size(struct page *page) return PAGE_SIZE << thp_order(page); } -void free_compound_page(struct page *page); - #ifdef CONFIG_MMU /* * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when @@ -1294,15 +1362,15 @@ void free_compound_page(struct page *page); static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) { if (likely(vma->vm_flags & VM_WRITE)) - pte = pte_mkwrite(pte); + pte = pte_mkwrite(pte, vma); return pte; } vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); -void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr); +void set_pte_range(struct vm_fault *vmf, struct folio *folio, + struct page *page, unsigned int nr, unsigned long addr); vm_fault_t finish_fault(struct vm_fault *vmf); -vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #endif /* @@ -1460,6 +1528,8 @@ static inline void folio_put_refs(struct folio *folio, int refs) __folio_put(folio); } +void folios_put_refs(struct folio_batch *folios, unsigned int *refs); + /* * union release_pages_arg - an array of pages or folios * @@ -1482,18 +1552,19 @@ void release_pages(release_pages_arg, int nr); /** * folios_put - Decrement the reference count on an array of folios. * @folios: The folios. - * @nr: How many folios there are. * - * Like folio_put(), but for an array of folios. This is more efficient - * than writing the loop yourself as it will optimise the locks which - * need to be taken if the folios are freed. + * Like folio_put(), but for a batch of folios. This is more efficient + * than writing the loop yourself as it will optimise the locks which need + * to be taken if the folios are freed. The folios batch is returned + * empty and ready to be reused for another batch; there is no need to + * reinitialise it. * * Context: May be called in process or interrupt context, but not in NMI * context. May be called while holding a spinlock. */ -static inline void folios_put(struct folio **folios, unsigned int nr) +static inline void folios_put(struct folio_batch *folios) { - release_pages(folios, nr); + folios_put_refs(folios, NULL); } static inline void put_page(struct page *page) @@ -1586,13 +1657,11 @@ static inline int page_zone_id(struct page *page) } #ifdef NODE_NOT_IN_PAGE_FLAGS -extern int page_to_nid(const struct page *page); +int page_to_nid(const struct page *page); #else static inline int page_to_nid(const struct page *page) { - struct page *p = (struct page *)page; - - return (PF_POISONED_CHECK(p)->flags >> NODES_PGSHIFT) & NODES_MASK; + return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK; } #endif @@ -1651,26 +1720,26 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) #define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS -static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { - return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK); + return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK); } -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return page->_last_cpupid; + return folio->_last_cpupid; } static inline void page_cpupid_reset_last(struct page *page) { page->_last_cpupid = -1 & LAST_CPUPID_MASK; } #else -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; + return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; } -extern int page_cpupid_xchg_last(struct page *page, int cpupid); +int folio_xchg_last_cpupid(struct folio *folio, int cpupid); static inline void page_cpupid_reset_last(struct page *page) { @@ -1678,11 +1747,12 @@ static inline void page_cpupid_reset_last(struct page *page) } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ -static inline int xchg_page_access_time(struct page *page, int time) +static inline int folio_xchg_access_time(struct folio *folio, int time) { int last_time; - last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS); + last_time = folio_xchg_last_cpupid(folio, + time >> PAGE_ACCESS_TIME_BUCKETS); return last_time << PAGE_ACCESS_TIME_BUCKETS; } @@ -1691,24 +1761,24 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) unsigned int pid_bit; pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); - if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) { - __set_bit(pid_bit, &vma->numab_state->access_pids[1]); + if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) { + __set_bit(pid_bit, &vma->numab_state->pids_active[1]); } } #else /* !CONFIG_NUMA_BALANCING */ -static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { - return page_to_nid(page); /* XXX */ + return folio_nid(folio); /* XXX */ } -static inline int xchg_page_access_time(struct page *page, int time) +static inline int folio_xchg_access_time(struct folio *folio, int time) { return 0; } -static inline int page_cpupid_last(struct page *page) +static inline int folio_last_cpupid(struct folio *folio) { - return page_to_nid(page); /* XXX */ + return folio_nid(folio); /* XXX */ } static inline int cpupid_to_nid(int cpupid) @@ -1760,7 +1830,7 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) static inline u8 page_kasan_tag(const struct page *page) { - u8 tag = 0xff; + u8 tag = KASAN_TAG_KERNEL; if (kasan_enabled()) { tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; @@ -1789,7 +1859,7 @@ static inline void page_kasan_tag_set(struct page *page, u8 tag) static inline void page_kasan_tag_reset(struct page *page) { if (kasan_enabled()) - page_kasan_tag_set(page, 0xff); + page_kasan_tag_set(page, KASAN_TAG_KERNEL); } #else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ @@ -1909,15 +1979,15 @@ static inline bool page_maybe_dma_pinned(struct page *page) * * The caller has to hold the PT lock and the vma->vm_mm->->write_protect_seq. */ -static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, - struct page *page) +static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma, + struct folio *folio) { VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & 1)); if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) return false; - return page_maybe_dma_pinned(page); + return folio_maybe_dma_pinned(folio); } /** @@ -2006,10 +2076,17 @@ static inline long folio_nr_pages(struct folio *folio) #ifdef CONFIG_64BIT return folio->_folio_nr_pages; #else - return 1L << folio->_folio_order; + return 1L << (folio->_flags_1 & 0xff); #endif } +/* Only hugetlbfs can allocate folios larger than MAX_ORDER */ +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE +#define MAX_FOLIO_NR_PAGES (1UL << PUD_ORDER) +#else +#define MAX_FOLIO_NR_PAGES MAX_ORDER_NR_PAGES +#endif + /* * compound_nr() returns the number of pages in this potentially compound * page. compound_nr() can be called on a tail page, and is defined to @@ -2024,7 +2101,7 @@ static inline unsigned long compound_nr(struct page *page) #ifdef CONFIG_64BIT return folio->_folio_nr_pages; #else - return 1L << folio->_folio_order; + return 1L << (folio->_flags_1 & 0xff); #endif } @@ -2132,11 +2209,6 @@ static inline int arch_make_folio_accessible(struct folio *folio) */ #include <linux/vmstat.h> -static __always_inline void *lowmem_page_address(const struct page *page) -{ - return page_to_virt(page); -} - #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) #define HASHED_PAGE_VIRTUAL #endif @@ -2159,6 +2231,11 @@ void set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif +static __always_inline void *lowmem_page_address(const struct page *page) +{ + return page_to_virt(page); +} + #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) #define page_address(page) lowmem_page_address(page) #define set_page_address(page, address) do { } while(0) @@ -2170,7 +2247,6 @@ static inline void *folio_address(const struct folio *folio) return page_address(&folio->page); } -extern void *page_rmapping(struct page *page); extern pgoff_t __page_file_index(struct page *page); /* @@ -2238,18 +2314,6 @@ extern void pagefault_out_of_memory(void); #define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) /* - * Flags passed to show_mem() and show_free_areas() to suppress output in - * various contexts. - */ -#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ - -extern void __show_free_areas(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); -static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodemask) -{ - __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); -} - -/* * Parameter block passed down to zap_pte_range in exceptional cases. */ struct zap_details { @@ -2305,6 +2369,8 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); @@ -2317,9 +2383,9 @@ static inline void zap_vma_pages(struct vm_area_struct *vma) zap_page_range_single(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); } -void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, +void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *start_vma, unsigned long start, - unsigned long end, bool mm_wr_locked); + unsigned long end, unsigned long tree_end, bool mm_wr_locked); struct mmu_notifier_range; @@ -2340,7 +2406,8 @@ extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); -int generic_error_remove_page(struct address_space *mapping, struct page *page); +int generic_error_remove_folio(struct address_space *mapping, + struct folio *folio); struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, unsigned long address, struct pt_regs *regs); @@ -2391,8 +2458,6 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, @@ -2403,6 +2468,9 @@ long pin_user_pages_remote(struct mm_struct *mm, unsigned int gup_flags, struct page **pages, int *locked); +/* + * Retrieves a single page alongside its VMA. Does not support FOLL_NOWAIT. + */ static inline struct page *get_user_page_vma_remote(struct mm_struct *mm, unsigned long addr, int gup_flags, @@ -2410,12 +2478,15 @@ static inline struct page *get_user_page_vma_remote(struct mm_struct *mm, { struct page *page; struct vm_area_struct *vma; - int got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL); + int got; + + if (WARN_ON_ONCE(unlikely(gup_flags & FOLL_NOWAIT))) + return ERR_PTR(-EINVAL); + + got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL); if (got < 0) return ERR_PTR(got); - if (got == 0) - return NULL; vma = vma_lookup(mm, addr); if (WARN_ON_ONCE(!vma)) { @@ -2458,7 +2529,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, - bool need_rmap_locks); + bool need_rmap_locks, bool for_stack); /* * Flags used by change_protection(). For now we make it a bitmap so @@ -2546,19 +2617,19 @@ static inline void dec_mm_counter(struct mm_struct *mm, int member) mm_trace_rss_stat(mm, member); } -/* Optimized variant when page is already known not to be PageAnon */ -static inline int mm_counter_file(struct page *page) +/* Optimized variant when folio is already known not to be anon */ +static inline int mm_counter_file(struct folio *folio) { - if (PageSwapBacked(page)) + if (folio_test_swapbacked(folio)) return MM_SHMEMPAGES; return MM_FILEPAGES; } -static inline int mm_counter(struct page *page) +static inline int mm_counter(struct folio *folio) { - if (PageAnon(page)) + if (folio_test_anon(folio)) return MM_ANONPAGES; - return mm_counter_file(page); + return mm_counter_file(folio); } static inline unsigned long get_mm_rss(struct mm_struct *mm) @@ -2606,14 +2677,6 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, *maxrss = hiwater_rss; } -#if defined(SPLIT_RSS_COUNTING) -void sync_mm_rss(struct mm_struct *mm); -#else -static inline void sync_mm_rss(struct mm_struct *mm) -{ -} -#endif - #ifndef CONFIG_ARCH_HAS_PTE_SPECIAL static inline int pte_special(pte_t pte) { @@ -2766,42 +2829,93 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU */ +static inline struct ptdesc *virt_to_ptdesc(const void *x) +{ + return page_ptdesc(virt_to_page(x)); +} + +static inline void *ptdesc_to_virt(const struct ptdesc *pt) +{ + return page_to_virt(ptdesc_page(pt)); +} + +static inline void *ptdesc_address(const struct ptdesc *pt) +{ + return folio_address(ptdesc_folio(pt)); +} + +static inline bool pagetable_is_reserved(struct ptdesc *pt) +{ + return folio_test_reserved(ptdesc_folio(pt)); +} + +/** + * pagetable_alloc - Allocate pagetables + * @gfp: GFP flags + * @order: desired pagetable order + * + * pagetable_alloc allocates memory for page tables as well as a page table + * descriptor to describe that memory. + * + * Return: The ptdesc describing the allocated page tables. + */ +static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order) +{ + struct page *page = alloc_pages(gfp | __GFP_COMP, order); + + return page_ptdesc(page); +} + +/** + * pagetable_free - Free pagetables + * @pt: The page table descriptor + * + * pagetable_free frees the memory of all page tables described by a page + * table descriptor and the memory for the descriptor itself. + */ +static inline void pagetable_free(struct ptdesc *pt) +{ + struct page *page = ptdesc_page(pt); + + __free_pages(page, compound_order(page)); +} + #if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS void __init ptlock_cache_init(void); -extern bool ptlock_alloc(struct page *page); -extern void ptlock_free(struct page *page); +bool ptlock_alloc(struct ptdesc *ptdesc); +void ptlock_free(struct ptdesc *ptdesc); -static inline spinlock_t *ptlock_ptr(struct page *page) +static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { - return page->ptl; + return ptdesc->ptl; } #else /* ALLOC_SPLIT_PTLOCKS */ static inline void ptlock_cache_init(void) { } -static inline bool ptlock_alloc(struct page *page) +static inline bool ptlock_alloc(struct ptdesc *ptdesc) { return true; } -static inline void ptlock_free(struct page *page) +static inline void ptlock_free(struct ptdesc *ptdesc) { } -static inline spinlock_t *ptlock_ptr(struct page *page) +static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { - return &page->ptl; + return &ptdesc->ptl; } #endif /* ALLOC_SPLIT_PTLOCKS */ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(pmd_page(*pmd)); + return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } -static inline bool ptlock_init(struct page *page) +static inline bool ptlock_init(struct ptdesc *ptdesc) { /* * prep_new_page() initialize page->private (and therefore page->ptl) @@ -2810,10 +2924,10 @@ static inline bool ptlock_init(struct page *page) * It can happen if arch try to use slab for page table allocation: * slab code uses page->slab_cache, which share storage with page->ptl. */ - VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); - if (!ptlock_alloc(page)) + VM_BUG_ON_PAGE(*(unsigned long *)&ptdesc->ptl, ptdesc_page(ptdesc)); + if (!ptlock_alloc(ptdesc)) return false; - spin_lock_init(ptlock_ptr(page)); + spin_lock_init(ptlock_ptr(ptdesc)); return true; } @@ -2826,24 +2940,28 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return &mm->page_table_lock; } static inline void ptlock_cache_init(void) {} -static inline bool ptlock_init(struct page *page) { return true; } -static inline void ptlock_free(struct page *page) {} +static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } +static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ -static inline bool pgtable_pte_page_ctor(struct page *page) +static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) { - if (!ptlock_init(page)) + struct folio *folio = ptdesc_folio(ptdesc); + + if (!ptlock_init(ptdesc)) return false; - __SetPageTable(page); - inc_lruvec_page_state(page, NR_PAGETABLE); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); return true; } -static inline void pgtable_pte_page_dtor(struct page *page) +static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) { - ptlock_free(page); - __ClearPageTable(page); - dec_lruvec_page_state(page, NR_PAGETABLE); + struct folio *folio = ptdesc_folio(ptdesc); + + ptlock_free(ptdesc); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); } pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); @@ -2892,28 +3010,33 @@ static inline struct page *pmd_pgtable_page(pmd_t *pmd) return virt_to_page((void *)((unsigned long) pmd & mask)); } +static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd) +{ + return page_ptdesc(pmd_pgtable_page(pmd)); +} + static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(pmd_pgtable_page(pmd)); + return ptlock_ptr(pmd_ptdesc(pmd)); } -static inline bool pmd_ptlock_init(struct page *page) +static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - page->pmd_huge_pte = NULL; + ptdesc->pmd_huge_pte = NULL; #endif - return ptlock_init(page); + return ptlock_init(ptdesc); } -static inline void pmd_ptlock_free(struct page *page) +static inline void pmd_ptlock_free(struct ptdesc *ptdesc) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON_PAGE(page->pmd_huge_pte, page); + VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); #endif - ptlock_free(page); + ptlock_free(ptdesc); } -#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte) +#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) #else @@ -2922,8 +3045,8 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return &mm->page_table_lock; } -static inline bool pmd_ptlock_init(struct page *page) { return true; } -static inline void pmd_ptlock_free(struct page *page) {} +static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; } +static inline void pmd_ptlock_free(struct ptdesc *ptdesc) {} #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) @@ -2936,20 +3059,24 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } -static inline bool pgtable_pmd_page_ctor(struct page *page) +static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc) { - if (!pmd_ptlock_init(page)) + struct folio *folio = ptdesc_folio(ptdesc); + + if (!pmd_ptlock_init(ptdesc)) return false; - __SetPageTable(page); - inc_lruvec_page_state(page, NR_PAGETABLE); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); return true; } -static inline void pgtable_pmd_page_dtor(struct page *page) +static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc) { - pmd_ptlock_free(page); - __ClearPageTable(page); - dec_lruvec_page_state(page, NR_PAGETABLE); + struct folio *folio = ptdesc_folio(ptdesc); + + pmd_ptlock_free(ptdesc); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); } /* @@ -2971,6 +3098,22 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) return ptl; } +static inline void pagetable_pud_ctor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); +} + +static inline void pagetable_pud_dtor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); +} + extern void __init pagecache_init(void); extern void free_initmem(void); @@ -3004,6 +3147,11 @@ static inline void mark_page_reserved(struct page *page) adjust_managed_page_count(page, -1); } +static inline void free_reserved_ptdesc(struct ptdesc *pt) +{ + free_reserved_page(ptdesc_page(pt)); +} + /* * Default method to free all the __init memory into the buddy system. * The freed pages will be poisoned with pattern "poison" if it's within @@ -3069,9 +3217,9 @@ extern void mem_init(void); extern void __init mmap_init(void); extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); -static inline void show_mem(unsigned int flags, nodemask_t *nodemask) +static inline void show_mem(void) { - __show_mem(flags, nodemask, MAX_NR_ZONES - 1); + __show_mem(0, NULL, MAX_NR_ZONES - 1); } extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); @@ -3130,22 +3278,73 @@ extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct *next); extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff); -extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, - struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, - unsigned long end, unsigned long vm_flags, struct anon_vma *, - struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, - struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); -extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *, - unsigned long addr, int new_below); -extern int split_vma(struct vma_iterator *vmi, struct vm_area_struct *, - unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); +struct vm_area_struct *vma_modify(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long vm_flags, + struct mempolicy *policy, + struct vm_userfaultfd_ctx uffd_ctx, + struct anon_vma_name *anon_name); + +/* We are about to modify the VMA's flags. */ +static inline struct vm_area_struct +*vma_modify_flags(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long new_flags) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), vma->vm_userfaultfd_ctx, + anon_vma_name(vma)); +} + +/* We are about to modify the VMA's flags and/or anon_name. */ +static inline struct vm_area_struct +*vma_modify_flags_name(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long new_flags, + struct anon_vma_name *new_name) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), vma->vm_userfaultfd_ctx, new_name); +} + +/* We are about to modify the VMA's memory policy. */ +static inline struct vm_area_struct +*vma_modify_policy(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct mempolicy *new_pol) +{ + return vma_modify(vmi, prev, vma, start, end, vma->vm_flags, + new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma)); +} + +/* We are about to modify the VMA's flags and/or uffd context. */ +static inline struct vm_area_struct +*vma_modify_flags_uffd(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long new_flags, + struct vm_userfaultfd_ctx new_ctx) +{ + return vma_modify(vmi, prev, vma, start, end, new_flags, + vma_policy(vma), new_ctx, anon_vma_name(vma)); +} static inline int check_data_rlimit(unsigned long rlim, unsigned long new, @@ -3193,7 +3392,8 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr, struct list_head *uf); extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff, unsigned long *populate, struct list_head *uf); + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, + struct list_head *uf); extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool unlock); @@ -3216,8 +3416,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif -/* These take the mm semaphore themselves */ -extern int __must_check vm_brk(unsigned long, unsigned long); +/* This takes the mm semaphore itself */ extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, @@ -3281,15 +3480,26 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) return mtree_load(&mm->mm_mt, addr); } +static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_GROWSDOWN) + return stack_guard_gap; + + /* See reasoning around the VM_SHADOW_STACK definition */ + if (vma->vm_flags & VM_SHADOW_STACK) + return PAGE_SIZE; + + return 0; +} + static inline unsigned long vm_start_gap(struct vm_area_struct *vma) { + unsigned long gap = stack_guard_start_gap(vma); unsigned long vm_start = vma->vm_start; - if (vma->vm_flags & VM_GROWSDOWN) { - vm_start -= stack_guard_gap; - if (vm_start > vma->vm_start) - vm_start = 0; - } + vm_start -= gap; + if (vm_start > vma->vm_start) + vm_start = 0; return vm_start; } @@ -3403,6 +3613,24 @@ static inline vm_fault_t vmf_error(int err) return VM_FAULT_SIGBUS; } +/* + * Convert errno to return value for ->page_mkwrite() calls. + * + * This should eventually be merged with vmf_error() above, but will need a + * careful audit of all vmf_error() callers. + */ +static inline vm_fault_t vmf_fs_error(int err) +{ + if (err == 0) + return VM_FAULT_LOCKED; + if (err == -EFAULT || err == -EAGAIN) + return VM_FAULT_NOPAGE; + if (err == -ENOMEM) + return VM_FAULT_OOM; + /* -ENOSPC, -EDQUOT, -EIO ... */ + return VM_FAULT_SIGBUS; +} + struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags); @@ -3509,8 +3737,8 @@ static inline bool debug_pagealloc_enabled(void) } /* - * For use in fast paths after init_debug_pagealloc() has run, or when a - * false negative result is not harmful when called too early. + * For use in fast paths after mem_debugging_and_hardening_init() has run, + * or when a false negative result is not harmful when called too early. */ static inline bool debug_pagealloc_enabled_static(void) { @@ -3665,13 +3893,58 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap); #endif -#ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP -static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, - struct dev_pagemap *pgmap) +#ifdef CONFIG_SPARSEMEM_VMEMMAP +static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) { - return is_power_of_2(sizeof(struct page)) && - pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap; + /* number of pfns from base where pfn_to_page() is valid */ + if (altmap) + return altmap->reserve + altmap->free; + return 0; +} + +static inline void vmem_altmap_free(struct vmem_altmap *altmap, + unsigned long nr_pfns) +{ + altmap->alloc -= nr_pfns; +} +#else +static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +{ + return 0; +} + +static inline void vmem_altmap_free(struct vmem_altmap *altmap, + unsigned long nr_pfns) +{ +} +#endif + +#define VMEMMAP_RESERVE_NR 2 +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP +static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap, + struct dev_pagemap *pgmap) +{ + unsigned long nr_pages; + unsigned long nr_vmemmap_pages; + + if (!pgmap || !is_power_of_2(sizeof(struct page))) + return false; + + nr_pages = pgmap_vmemmap_nr(pgmap); + nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT); + /* + * For vmemmap optimization with DAX we need minimum 2 vmemmap + * pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst + */ + return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR); } +/* + * If we don't have an architecture override, use the generic rule + */ +#ifndef vmemmap_can_optimize +#define vmemmap_can_optimize __vmemmap_can_optimize +#endif + #else static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) @@ -3691,6 +3964,7 @@ enum mf_flags { MF_UNPOISON = 1 << 4, MF_SW_SIMULATED = 1 << 5, MF_NO_RETRY = 1 << 6, + MF_MEM_PRE_REMOVE = 1 << 7, }; int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, unsigned long count, int mf_flags); @@ -3860,25 +4134,26 @@ static inline void mem_dump_obj(void *object) {} #endif /** - * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it + * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and + * handle them. * @seals: the seals to check * @vma: the vma to operate on * - * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on - * the vma flags. Return 0 if check pass, or <0 for errors. + * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper + * check/handling on the vma flags. Return 0 if check pass, or <0 for errors. */ -static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) +static inline int seal_check_write(int seals, struct vm_area_struct *vma) { - if (seals & F_SEAL_FUTURE_WRITE) { + if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { /* * New PROT_WRITE and MAP_SHARED mmaps are not allowed when - * "future write" seal active. + * write seals are active. */ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) return -EPERM; /* - * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as + * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as * MAP_SHARED and read-only, take care to not allow mprotect to * revert protections on such mappings. Do this only for shared * mappings. For private mappings, don't need to mask @@ -3922,4 +4197,11 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end) #endif +static inline bool pfn_is_unaccepted_memory(unsigned long pfn) +{ + phys_addr_t paddr = pfn << PAGE_SHIFT; + + return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); +} + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 21d6c72bcc71..f4fe593c1400 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -4,6 +4,7 @@ #include <linux/atomic.h> #include <linux/huge_mm.h> +#include <linux/mm_types.h> #include <linux/swap.h> #include <linux/string.h> #include <linux/userfaultfd_k.h> @@ -231,22 +232,27 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, if (folio_test_unevictable(folio) || !lrugen->enabled) return false; /* - * There are three common cases for this page: - * 1. If it's hot, e.g., freshly faulted in or previously hot and - * migrated, add it to the youngest generation. - * 2. If it's cold but can't be evicted immediately, i.e., an anon page - * not in swapcache or a dirty page pending writeback, add it to the - * second oldest generation. - * 3. Everything else (clean, cold) is added to the oldest generation. + * There are four common cases for this page: + * 1. If it's hot, i.e., freshly faulted in, add it to the youngest + * generation, and it's protected over the rest below. + * 2. If it can't be evicted immediately, i.e., a dirty page pending + * writeback, add it to the second youngest generation. + * 3. If it should be evicted first, e.g., cold and clean from + * folio_rotate_reclaimable(), add it to the oldest generation. + * 4. Everything else falls between 2 & 3 above and is added to the + * second oldest generation if it's considered inactive, or the + * oldest generation otherwise. See lru_gen_is_active(). */ if (folio_test_active(folio)) seq = lrugen->max_seq; else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || (folio_test_reclaim(folio) && (folio_test_dirty(folio) || folio_test_writeback(folio)))) - seq = lrugen->min_seq[type] + 1; - else + seq = lrugen->max_seq - 1; + else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) seq = lrugen->min_seq[type]; + else + seq = lrugen->min_seq[type] + 1; gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; @@ -352,15 +358,6 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) } #ifdef CONFIG_ANON_VMA_NAME -/* - * mmap_lock should be read-locked when calling anon_vma_name(). Caller should - * either keep holding the lock while using the returned pointer or it should - * raise anon_vma_name refcount before releasing the lock. - */ -extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); -extern struct anon_vma_name *anon_vma_name_alloc(const char *name); -extern void anon_vma_name_free(struct kref *kref); - /* mmap_lock should be read-locked */ static inline void anon_vma_name_get(struct anon_vma_name *anon_name) { @@ -415,16 +412,6 @@ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, } #else /* CONFIG_ANON_VMA_NAME */ -static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) -{ - return NULL; -} - -static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) -{ - return NULL; -} - static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {} static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {} static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, @@ -523,6 +510,27 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending) > 1; } +#ifdef CONFIG_MMU +/* + * Computes the pte marker to copy from the given source entry into dst_vma. + * If no marker should be copied, returns 0. + * The caller should insert a new pte created with make_pte_marker(). + */ +static inline pte_marker copy_pte_marker( + swp_entry_t entry, struct vm_area_struct *dst_vma) +{ + pte_marker srcm = pte_marker_get(entry); + /* Always copy error entries. */ + pte_marker dstm = srcm & PTE_MARKER_POISONED; + + /* Only copy PTE markers if UFFD register matches. */ + if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma)) + dstm |= PTE_MARKER_UFFD_WP; + + return dstm; +} +#endif + /* * If this pte is wr-protected by uffd-wp in any form, arm the special pte to * replace a none pte. NOTE! This should only be called when *pte is already diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7d30dc4ff0ff..5240bd7bca33 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -125,36 +125,11 @@ struct page { struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; - union { - /** - * dma_addr_upper: might require a 64-bit - * value on 32-bit architectures. - */ - unsigned long dma_addr_upper; - /** - * For frag page support, not supported in - * 32-bit architectures with 64-bit DMA. - */ - atomic_long_t pp_frag_count; - }; + atomic_long_t pp_ref_count; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ }; - struct { /* Page table pages */ - unsigned long _pt_pad_1; /* compound_head */ - pgtable_t pmd_huge_pte; /* protected by page->ptl */ - unsigned long _pt_pad_2; /* mapping */ - union { - struct mm_struct *pt_mm; /* x86 pgds only */ - atomic_t pt_frag_refcount; /* powerpc */ - }; -#if ALLOC_SPLIT_PTLOCKS - spinlock_t *ptl; -#else - spinlock_t ptl; -#endif - }; struct { /* ZONE_DEVICE pages */ /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; @@ -213,6 +188,10 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; +#endif + #ifdef CONFIG_KMSAN /* * KMSAN metadata for this page: @@ -224,10 +203,6 @@ struct page { struct page *kmsan_shadow; struct page *kmsan_origin; #endif - -#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS - int _last_cpupid; -#endif } _struct_page_alignment; /* @@ -235,8 +210,8 @@ struct page { * * An 'encoded_page' pointer is a pointer to a regular 'struct page', but * with the low bits of the pointer indicating extra context-dependent - * information. Not super-common, but happens in mmu_gather and mlock - * handling, and this acts as a type system check on that use. + * information. Only used in mmu_gather handling, and this acts as a type + * system check on that use. * * We only really have two guaranteed bits in general, although you could * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE) @@ -245,23 +220,56 @@ struct page { * Use the supplied helper functions to endcode/decode the pointer and bits. */ struct encoded_page; -#define ENCODE_PAGE_BITS 3ul + +#define ENCODED_PAGE_BITS 3ul + +/* Perform rmap removal after we have flushed the TLB. */ +#define ENCODED_PAGE_BIT_DELAY_RMAP 1ul + +/* + * The next item in an encoded_page array is the "nr_pages" argument, specifying + * the number of consecutive pages starting from this page, that all belong to + * the same folio. For example, "nr_pages" corresponds to the number of folio + * references that must be dropped. If this bit is not set, "nr_pages" is + * implicitly 1. + */ +#define ENCODED_PAGE_BIT_NR_PAGES_NEXT 2ul + static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags) { - BUILD_BUG_ON(flags > ENCODE_PAGE_BITS); + BUILD_BUG_ON(flags > ENCODED_PAGE_BITS); return (struct encoded_page *)(flags | (unsigned long)page); } static inline unsigned long encoded_page_flags(struct encoded_page *page) { - return ENCODE_PAGE_BITS & (unsigned long)page; + return ENCODED_PAGE_BITS & (unsigned long)page; } static inline struct page *encoded_page_ptr(struct encoded_page *page) { - return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page); + return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page); } +static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr) +{ + VM_WARN_ON_ONCE((nr << 2) >> 2 != nr); + return (struct encoded_page *)(nr << 2); +} + +static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page) +{ + return ((unsigned long)page) >> 2; +} + +/* + * A swap entry has to fit into a "unsigned long", as the entry is hidden + * in the "index" field of the swapper address space. + */ +typedef struct { + unsigned long val; +} swp_entry_t; + /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. @@ -272,14 +280,14 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @index: Offset within the file, in units of pages. For anonymous memory, * this is the index from the beginning of the mmap. * @private: Filesystem per-folio data (see folio_attach_private()). - * Used for swp_entry_t if folio_test_swapcache(). + * @swap: Used for swp_entry_t if folio_test_swapcache(). * @_mapcount: Do not access this member directly. Use folio_mapcount() to * find out how many times this folio is mapped by userspace. * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. - * @_folio_dtor: Which destructor to use for this folio. - * @_folio_order: Do not use directly, call folio_order(). + * @virtual: Virtual address in the kernel direct map. + * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). @@ -317,12 +325,21 @@ struct folio { }; struct address_space *mapping; pgoff_t index; - void *private; + union { + void *private; + swp_entry_t swap; + }; atomic_t _mapcount; atomic_t _refcount; #ifdef CONFIG_MEMCG unsigned long memcg_data; #endif +#if defined(WANT_PAGE_VIRTUAL) + void *virtual; +#endif +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS + int _last_cpupid; +#endif /* private: the union with struct page is transitional */ }; struct page page; @@ -331,9 +348,8 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; + unsigned long _folio_avail; /* public: */ - unsigned char _folio_dtor; - unsigned char _folio_order; atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; atomic_t _pincount; @@ -379,6 +395,12 @@ FOLIO_MATCH(_refcount, _refcount); #ifdef CONFIG_MEMCG FOLIO_MATCH(memcg_data, memcg_data); #endif +#if defined(WANT_PAGE_VIRTUAL) +FOLIO_MATCH(virtual, virtual); +#endif +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +FOLIO_MATCH(_last_cpupid, _last_cpupid); +#endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ @@ -391,8 +413,92 @@ FOLIO_MATCH(compound_head, _head_1); offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); +FOLIO_MATCH(flags, _flags_2a); +FOLIO_MATCH(compound_head, _head_2a); #undef FOLIO_MATCH +/** + * struct ptdesc - Memory descriptor for page tables. + * @__page_flags: Same as page flags. Powerpc only. + * @pt_rcu_head: For freeing page table pages. + * @pt_list: List of used page tables. Used for s390 and x86. + * @_pt_pad_1: Padding that aliases with page's compound head. + * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. + * @__page_mapping: Aliases with page->mapping. Unused for page tables. + * @pt_index: Used for s390 gmap. + * @pt_mm: Used for x86 pgds. + * @pt_frag_refcount: For fragmented page table tracking. Powerpc only. + * @_pt_pad_2: Padding to ensure proper alignment. + * @ptl: Lock for the page table. + * @__page_type: Same as page->page_type. Unused for page tables. + * @__page_refcount: Same as page refcount. + * @pt_memcg_data: Memcg data. Tracked for page tables here. + * + * This struct overlays struct page for now. Do not modify without a good + * understanding of the issues. + */ +struct ptdesc { + unsigned long __page_flags; + + union { + struct rcu_head pt_rcu_head; + struct list_head pt_list; + struct { + unsigned long _pt_pad_1; + pgtable_t pmd_huge_pte; + }; + }; + unsigned long __page_mapping; + + union { + pgoff_t pt_index; + struct mm_struct *pt_mm; + atomic_t pt_frag_refcount; + }; + + union { + unsigned long _pt_pad_2; +#if ALLOC_SPLIT_PTLOCKS + spinlock_t *ptl; +#else + spinlock_t ptl; +#endif + }; + unsigned int __page_type; + atomic_t __page_refcount; +#ifdef CONFIG_MEMCG + unsigned long pt_memcg_data; +#endif +}; + +#define TABLE_MATCH(pg, pt) \ + static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) +TABLE_MATCH(flags, __page_flags); +TABLE_MATCH(compound_head, pt_list); +TABLE_MATCH(compound_head, _pt_pad_1); +TABLE_MATCH(mapping, __page_mapping); +TABLE_MATCH(index, pt_index); +TABLE_MATCH(rcu_head, pt_rcu_head); +TABLE_MATCH(page_type, __page_type); +TABLE_MATCH(_refcount, __page_refcount); +#ifdef CONFIG_MEMCG +TABLE_MATCH(memcg_data, pt_memcg_data); +#endif +#undef TABLE_MATCH +static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); + +#define ptdesc_page(pt) (_Generic((pt), \ + const struct ptdesc *: (const struct page *)(pt), \ + struct ptdesc *: (struct page *)(pt))) + +#define ptdesc_folio(pt) (_Generic((pt), \ + const struct ptdesc *: (const struct folio *)(pt), \ + struct ptdesc *: (struct folio *)(pt))) + +#define page_ptdesc(p) (_Generic((p), \ + const struct page *: (const struct ptdesc *)(p), \ + struct page *: (struct ptdesc *)(p))) + /* * Used for sizing the vmemmap region on some architectures */ @@ -471,14 +577,65 @@ struct anon_vma_name { char name[]; }; +#ifdef CONFIG_ANON_VMA_NAME +/* + * mmap_lock should be read-locked when calling anon_vma_name(). Caller should + * either keep holding the lock while using the returned pointer or it should + * raise anon_vma_name refcount before releasing the lock. + */ +struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); +struct anon_vma_name *anon_vma_name_alloc(const char *name); +void anon_vma_name_free(struct kref *kref); +#else /* CONFIG_ANON_VMA_NAME */ +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) +{ + return NULL; +} +#endif + struct vma_lock { struct rw_semaphore lock; }; struct vma_numab_state { + /* + * Initialised as time in 'jiffies' after which VMA + * should be scanned. Delays first scan of new VMA by at + * least sysctl_numa_balancing_scan_delay: + */ unsigned long next_scan; - unsigned long next_pid_reset; - unsigned long access_pids[2]; + + /* + * Time in jiffies when pids_active[] is reset to + * detect phase change behaviour: + */ + unsigned long pids_active_reset; + + /* + * Approximate tracking of PIDs that trapped a NUMA hinting + * fault. May produce false positives due to hash collisions. + * + * [0] Previous PID tracking + * [1] Current PID tracking + * + * Window moves after next_pid_reset has expired approximately + * every VMA_PID_RESET_PERIOD jiffies: + */ + unsigned long pids_active[2]; + + /* MM scan sequence ID when scan first started after VMA creation */ + int start_scan_seq; + + /* + * MM scan sequence ID when the VMA was last completely scanned. + * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq + */ + int prev_scan_seq; }; /* @@ -587,6 +744,12 @@ struct vm_area_struct { struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; +#ifdef CONFIG_NUMA +#define vma_policy(vma) ((vma)->vm_policy) +#else +#define vma_policy(vma) NULL +#endif + #ifdef CONFIG_SCHED_MM_CID struct mm_cid { u64 time; @@ -595,6 +758,7 @@ struct mm_cid { #endif struct kioctx_table; +struct iommu_mm_data; struct mm_struct { struct { /* @@ -806,13 +970,13 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SVA - u32 pasid; +#ifdef CONFIG_IOMMU_MM_DATA + struct iommu_mm_data *iommu_mm; #endif #ifdef CONFIG_KSM /* * Represent how many pages of this process are involved in KSM - * merging. + * merging (not including ksm_zero_pages). */ unsigned long ksm_merging_pages; /* @@ -820,8 +984,13 @@ struct mm_struct { * including merged and not merged. */ unsigned long ksm_rmap_items; -#endif -#ifdef CONFIG_LRU_GEN + /* + * Represent how many empty pages are merged with kernel zero + * pages when enabling KSM use_zero_pages. + */ + unsigned long ksm_zero_pages; +#endif /* CONFIG_KSM */ +#ifdef CONFIG_LRU_GEN_WALKS_MMU struct { /* this mm_struct is on lru_gen_mm_list */ struct list_head list; @@ -836,7 +1005,7 @@ struct mm_struct { struct mem_cgroup *memcg; #endif } lru_gen; -#endif /* CONFIG_LRU_GEN */ +#endif /* CONFIG_LRU_GEN_WALKS_MMU */ } __randomize_layout; /* @@ -874,11 +1043,13 @@ struct lru_gen_mm_list { spinlock_t lock; }; +#endif /* CONFIG_LRU_GEN */ + +#ifdef CONFIG_LRU_GEN_WALKS_MMU + void lru_gen_add_mm(struct mm_struct *mm); void lru_gen_del_mm(struct mm_struct *mm); -#ifdef CONFIG_MEMCG void lru_gen_migrate_mm(struct mm_struct *mm); -#endif static inline void lru_gen_init_mm(struct mm_struct *mm) { @@ -899,7 +1070,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm) WRITE_ONCE(mm->lru_gen.bitmap, -1); } -#else /* !CONFIG_LRU_GEN */ +#else /* !CONFIG_LRU_GEN_WALKS_MMU */ static inline void lru_gen_add_mm(struct mm_struct *mm) { @@ -909,11 +1080,9 @@ static inline void lru_gen_del_mm(struct mm_struct *mm) { } -#ifdef CONFIG_MEMCG static inline void lru_gen_migrate_mm(struct mm_struct *mm) { } -#endif static inline void lru_gen_init_mm(struct mm_struct *mm) { @@ -923,7 +1092,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm) { } -#endif /* CONFIG_LRU_GEN */ +#endif /* CONFIG_LRU_GEN_WALKS_MMU */ struct vma_iterator { struct ma_state mas; @@ -934,7 +1103,8 @@ struct vma_iterator { .mas = { \ .tree = &(__mm)->mm_mt, \ .index = __addr, \ - .node = MAS_START, \ + .node = NULL, \ + .status = ma_start, \ }, \ } @@ -1105,7 +1275,8 @@ enum vm_fault_reason { { VM_FAULT_RETRY, "RETRY" }, \ { VM_FAULT_FALLBACK, "FALLBACK" }, \ { VM_FAULT_DONE_COW, "DONE_COW" }, \ - { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } + { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }, \ + { VM_FAULT_COMPLETED, "COMPLETED" } struct vm_special_mapping { const char *name; /* The name, e.g. "[vdso]". */ @@ -1139,14 +1310,6 @@ enum tlb_flush_reason { NR_TLB_FLUSH_REASONS, }; - /* - * A swap entry has to fit into a "unsigned long", as the entry is hidden - * in the "index" field of the swapper address space. - */ -typedef struct { - unsigned long val; -} swp_entry_t; - /** * enum fault_flag - Fault flag definitions. * @FAULT_FLAG_WRITE: Fault was a write fault. diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 5414b5c6a103..a2f6179b672b 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -9,9 +9,6 @@ */ #include <linux/types.h> -#include <linux/threads.h> -#include <linux/atomic.h> -#include <linux/cpumask.h> #include <asm/page.h> @@ -36,6 +33,8 @@ enum { NR_MM_COUNTERS }; +struct page; + struct page_frag { struct page *page; #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) @@ -52,8 +51,8 @@ struct tlbflush_unmap_batch { #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* * The arch code makes the following promise: generic code can modify a - * PTE, then call arch_tlbbatch_add_mm() (which internally provides all - * needed barriers), then call arch_tlbbatch_flush(), and the entries + * PTE, then call arch_tlbbatch_add_pending() (which internally provides + * all needed barriers), then call arch_tlbbatch_flush(), and the entries * will be flushed on all CPUs by the time that arch_tlbbatch_flush() * returns. */ diff --git a/include/linux/mman.h b/include/linux/mman.h index cee1e4b566d8..bcb201ab7a41 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -15,6 +15,9 @@ #ifndef MAP_32BIT #define MAP_32BIT 0 #endif +#ifndef MAP_ABOVE4G +#define MAP_ABOVE4G 0 +#endif #ifndef MAP_HUGE_2MB #define MAP_HUGE_2MB 0 #endif @@ -50,6 +53,7 @@ | MAP_STACK \ | MAP_HUGETLB \ | MAP_32BIT \ + | MAP_ABOVE4G \ | MAP_HUGE_2MB \ | MAP_HUGE_1GB) @@ -152,11 +156,20 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | arch_calc_vm_flag_bits(flags); } unsigned long vm_commit_limit(void); +#ifndef arch_memory_deny_write_exec_supported +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return true; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported +#endif + /* * Denies creating a writable executable mapping or gaining executable permissions. * diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index e05e167dbd16..8d38dcb6d044 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -73,6 +73,14 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm) } #ifdef CONFIG_PER_VMA_LOCK +/* + * Drop all currently-held per-VMA locks. + * This is called from the mmap_lock implementation directly before releasing + * a write-locked mmap_lock (or downgrading it to read-locked). + * This should normally NOT be called manually from other places. + * If you want to call this manually anyway, keep in mind that this will release + * *all* VMA write locks, including ones from further up the stack. + */ static inline void vma_end_write_all(struct mm_struct *mm) { mmap_assert_write_locked(mm); @@ -118,16 +126,6 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm) return ret; } -static inline bool mmap_write_trylock(struct mm_struct *mm) -{ - bool ret; - - __mmap_lock_trace_start_locking(mm, true); - ret = down_write_trylock(&mm->mmap_lock) != 0; - __mmap_lock_trace_acquire_returned(mm, true, ret); - return ret; -} - static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index daa2f40d9ce6..f34407cc2788 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -32,6 +32,7 @@ struct mmc_csd { unsigned int r2w_factor; unsigned int max_dtr; unsigned int erase_size; /* In sectors */ + unsigned int wp_grp_size; unsigned int read_blkbits; unsigned int write_blkbits; unsigned int capacity; @@ -52,9 +53,6 @@ struct mmc_ext_csd { u8 part_config; u8 cache_ctrl; u8 rst_n_function; - u8 max_packed_writes; - u8 max_packed_reads; - u8 packed_event_en; unsigned int part_time; /* Units: ms */ unsigned int sa_timeout; /* Units: 100ns */ unsigned int generic_cmd6_time; /* Units: 10ms */ @@ -295,7 +293,9 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ +#define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ + bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ unsigned int erase_size; /* erase size in sectors */ @@ -304,6 +304,7 @@ struct mmc_card { unsigned int eg_boundary; /* don't cross erase-group boundaries */ unsigned int erase_arg; /* erase / trim / discard */ u8 erased_byte; /* value of erased bytes */ + unsigned int wp_grp_size; /* write group size in sectors */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 6efec0b9820c..2c7928a50907 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -27,7 +27,6 @@ struct mmc_command { u32 opcode; u32 arg; #define MMC_CMD23_ARG_REL_WR (1 << 31) -#define MMC_CMD23_ARG_PACKED ((0 << 31) | (1 << 30)) #define MMC_CMD23_ARG_TAG_REQ (1 << 29) u32 resp[4]; unsigned int flags; /* expected response type */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 461d1543893b..5894bf912f7b 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -184,6 +184,12 @@ struct mmc_host_ops { /* Execute HS400 tuning depending host driver */ int (*execute_hs400_tuning)(struct mmc_host *host, struct mmc_card *card); + /* Optional callback to prepare for SD high-speed tuning */ + int (*prepare_sd_hs_tuning)(struct mmc_host *host, struct mmc_card *card); + + /* Optional callback to execute SD high-speed tuning */ + int (*execute_sd_hs_tuning)(struct mmc_host *host, struct mmc_card *card); + /* Prepare switch to DDR during the HS400 init sequence */ int (*hs400_prepare_ddr)(struct mmc_host *host); @@ -520,6 +526,7 @@ struct mmc_host { /* Host Software Queue support */ bool hsq_enabled; + int hsq_depth; u32 err_stats[MMC_ERR_MAX]; unsigned long private[] ____cacheline_aligned; @@ -532,7 +539,7 @@ struct mmc_host *devm_mmc_alloc_host(struct device *dev, int extra); int mmc_add_host(struct mmc_host *); void mmc_remove_host(struct mmc_host *); void mmc_free_host(struct mmc_host *); -void mmc_of_parse_clk_phase(struct mmc_host *host, +void mmc_of_parse_clk_phase(struct device *dev, struct mmc_clk_phase_map *map); int mmc_of_parse(struct mmc_host *host); int mmc_of_parse_voltage(struct mmc_host *host, u32 *mask); @@ -665,6 +672,8 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host, host->err_stats[stat] += 1; } +int mmc_sd_switch(struct mmc_card *card, int mode, int group, u8 value, u8 *resp); +int mmc_send_status(struct mmc_card *card, u32 *status); int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 6f7993803ee7..cf2bcb5da30d 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -257,8 +257,6 @@ static inline bool mmc_ready_for_data(u32 status) #define EXT_CSD_FLUSH_CACHE 32 /* W */ #define EXT_CSD_CACHE_CTRL 33 /* R/W */ #define EXT_CSD_POWER_OFF_NOTIFICATION 34 /* R/W */ -#define EXT_CSD_PACKED_FAILURE_INDEX 35 /* RO */ -#define EXT_CSD_PACKED_CMD_STATUS 36 /* RO */ #define EXT_CSD_EXP_EVENTS_STATUS 54 /* RO, 2 bytes */ #define EXT_CSD_EXP_EVENTS_CTRL 56 /* R/W, 2 bytes */ #define EXT_CSD_DATA_SECTOR_SIZE 61 /* R */ @@ -321,8 +319,6 @@ static inline bool mmc_ready_for_data(u32 status) #define EXT_CSD_SUPPORTED_MODE 493 /* RO */ #define EXT_CSD_TAG_UNIT_SIZE 498 /* RO */ #define EXT_CSD_DATA_TAG_SUPPORT 499 /* RO */ -#define EXT_CSD_MAX_PACKED_WRITES 500 /* RO */ -#define EXT_CSD_MAX_PACKED_READS 501 /* RO */ #define EXT_CSD_BKOPS_SUPPORT 502 /* RO */ #define EXT_CSD_HPI_FEATURES 503 /* RO */ @@ -402,18 +398,12 @@ static inline bool mmc_ready_for_data(u32 status) #define EXT_CSD_PWR_CL_8BIT_SHIFT 4 #define EXT_CSD_PWR_CL_4BIT_SHIFT 0 -#define EXT_CSD_PACKED_EVENT_EN BIT(3) - /* * EXCEPTION_EVENT_STATUS field */ #define EXT_CSD_URGENT_BKOPS BIT(0) #define EXT_CSD_DYNCAP_NEEDED BIT(1) #define EXT_CSD_SYSPOOL_EXHAUSTED BIT(2) -#define EXT_CSD_PACKED_FAILURE BIT(3) - -#define EXT_CSD_PACKED_GENERIC_ERROR BIT(0) -#define EXT_CSD_PACKED_INDEXED_ERROR BIT(1) /* * BKOPS status level diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 7c3e7b0b0e8f..39a7714605a7 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -10,7 +10,7 @@ struct vm_area_struct; struct mm_struct; struct vma_iterator; -void dump_page(struct page *page, const char *reason); +void dump_page(const struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); void vma_iter_dump_tree(const struct vma_iterator *vmi); diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index f2b7a3f04099..bbaec80c78c5 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -11,7 +11,7 @@ #endif #ifndef leave_mm -static inline void leave_mm(int cpu) { } +static inline void leave_mm(void) { } #endif /* diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 64a3e051c3c4..f349e08a9dfe 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -187,27 +187,27 @@ struct mmu_notifier_ops { const struct mmu_notifier_range *range); /* - * invalidate_range() is either called between - * invalidate_range_start() and invalidate_range_end() when the - * VM has to free pages that where unmapped, but before the - * pages are actually freed, or outside of _start()/_end() when - * a (remote) TLB is necessary. + * arch_invalidate_secondary_tlbs() is used to manage a non-CPU TLB + * which shares page-tables with the CPU. The + * invalidate_range_start()/end() callbacks should not be implemented as + * invalidate_secondary_tlbs() already catches the points in time when + * an external TLB needs to be flushed. * - * If invalidate_range() is used to manage a non-CPU TLB with - * shared page-tables, it not necessary to implement the - * invalidate_range_start()/end() notifiers, as - * invalidate_range() already catches the points in time when an - * external TLB range needs to be flushed. For more in depth - * discussion on this see Documentation/mm/mmu_notifier.rst + * This requires arch_invalidate_secondary_tlbs() to be called while + * holding the ptl spin-lock and therefore this callback is not allowed + * to sleep. * - * Note that this function might be called with just a sub-range - * of what was passed to invalidate_range_start()/end(), if - * called between those functions. + * This is called by architecture code whenever invalidating a TLB + * entry. It is assumed that any secondary TLB has the same rules for + * when invalidations are required. If this is not the case architecture + * code will need to call this explicitly when required for secondary + * TLB invalidation. */ - void (*invalidate_range)(struct mmu_notifier *subscription, - struct mm_struct *mm, - unsigned long start, - unsigned long end); + void (*arch_invalidate_secondary_tlbs)( + struct mmu_notifier *subscription, + struct mm_struct *mm, + unsigned long start, + unsigned long end); /* * These callbacks are used with the get/put interface to manage the @@ -395,10 +395,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm, extern void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte); extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); -extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r, - bool only_end); -extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, - unsigned long start, unsigned long end); +extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r); +extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, + unsigned long start, unsigned long end); extern bool mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range); @@ -460,7 +459,14 @@ mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) lock_map_release(&__mmu_notifier_invalidate_range_start_map); } -static inline int +/* + * This version of mmu_notifier_invalidate_range_start() avoids blocking, but it + * can return an error if a notifier can't proceed without blocking, in which + * case you're not allowed to modify PTEs in the specified range. + * + * This is mainly intended for OOM handling. + */ +static inline int __must_check mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { int ret = 0; @@ -481,21 +487,14 @@ mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) might_sleep(); if (mm_has_notifiers(range->mm)) - __mmu_notifier_invalidate_range_end(range, false); -} - -static inline void -mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) -{ - if (mm_has_notifiers(range->mm)) - __mmu_notifier_invalidate_range_end(range, true); + __mmu_notifier_invalidate_range_end(range); } -static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, + unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_range(mm, start, end); + __mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); } static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) @@ -582,45 +581,6 @@ static inline void mmu_notifier_range_init_owner( __young; \ }) -#define ptep_clear_flush_notify(__vma, __address, __ptep) \ -({ \ - unsigned long ___addr = __address & PAGE_MASK; \ - struct mm_struct *___mm = (__vma)->vm_mm; \ - pte_t ___pte; \ - \ - ___pte = ptep_clear_flush(__vma, __address, __ptep); \ - mmu_notifier_invalidate_range(___mm, ___addr, \ - ___addr + PAGE_SIZE); \ - \ - ___pte; \ -}) - -#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ - struct mm_struct *___mm = (__vma)->vm_mm; \ - pmd_t ___pmd; \ - \ - ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ - mmu_notifier_invalidate_range(___mm, ___haddr, \ - ___haddr + HPAGE_PMD_SIZE); \ - \ - ___pmd; \ -}) - -#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \ - struct mm_struct *___mm = (__vma)->vm_mm; \ - pud_t ___pud; \ - \ - ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \ - mmu_notifier_invalidate_range(___mm, ___haddr, \ - ___haddr + HPAGE_PUD_SIZE); \ - \ - ___pud; \ -}) - /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -711,12 +671,7 @@ void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { } -static inline void -mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) -{ -} - -static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, +static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, unsigned long start, unsigned long end) { } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5e50b78d58ea..c11b7cde81ef 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -22,18 +22,21 @@ #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/local_lock.h> +#include <linux/zswap.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER -#define MAX_ORDER 10 +#define MAX_PAGE_ORDER 10 #else -#define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER +#define MAX_PAGE_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif -#define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) +#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) #define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES) +#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) + /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should @@ -73,9 +76,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) +# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \ + get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK)) #else # define is_migrate_cma(migratetype) false # define is_migrate_cma_page(_page) false +# define is_migrate_cma_folio(folio, pfn) false #endif static inline bool is_migrate_movable(int mt) @@ -95,7 +101,7 @@ static inline bool migratetype_is_mergeable(int mt) } #define for_each_migratetype_order(order, type) \ - for (order = 0; order <= MAX_ORDER; order++) \ + for (order = 0; order < NR_PAGE_ORDERS; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) extern int page_group_by_mobility_disabled; @@ -207,6 +213,10 @@ enum node_stat_item { PGPROMOTE_SUCCESS, /* promote successfully */ PGPROMOTE_CANDIDATE, /* candidate pages to promote */ #endif + /* PGDEMOTE_*: pages demoted */ + PGDEMOTE_KSWAPD, + PGDEMOTE_DIRECT, + PGDEMOTE_KHUGEPAGED, NR_VM_NODE_STAT_ITEMS }; @@ -435,14 +445,12 @@ struct lru_gen_folio { atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* whether the multi-gen LRU is enabled */ bool enabled; -#ifdef CONFIG_MEMCG /* the memcg generation this lru_gen_folio belongs to */ u8 gen; /* the list segment this lru_gen_folio belongs to */ u8 seg; /* per-node lru_gen_folio list for global reclaim */ struct hlist_nulls_node list; -#endif }; enum { @@ -459,7 +467,7 @@ enum { #define NR_BLOOM_FILTERS 2 struct lru_gen_mm_state { - /* set to max_seq after each iteration */ + /* synced with max_seq after each iteration */ unsigned long seq; /* where the current iteration continues after */ struct list_head *head; @@ -474,8 +482,8 @@ struct lru_gen_mm_state { struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; - /* unstable max_seq from lru_gen_folio */ - unsigned long max_seq; + /* max_seq from lru_gen_folio: can be out of date */ + unsigned long seq; /* the next address within an mm to scan */ unsigned long next_addr; /* to batch promoted pages */ @@ -488,11 +496,6 @@ struct lru_gen_mm_walk { bool force_scan; }; -void lru_gen_init_lruvec(struct lruvec *lruvec); -void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); - -#ifdef CONFIG_MEMCG - /* * For each node, memcgs are divided into two generations: the old and the * young. For each generation, memcgs are randomly sharded into multiple bins @@ -505,33 +508,37 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); * the old generation, is incremented when all its bins become empty. * * There are four operations: - * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its + * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; - * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its + * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; - * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old + * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; - * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the + * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; - * 2. The first attempt to reclaim an memcg below low, which triggers + * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; - * 3. The first attempt to reclaim an memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_TAIL; - * 4. The second attempt to reclaim an memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_YOUNG; - * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; + * 3. The first attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_TAIL; + * 4. The second attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_YOUNG; + * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; - * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. + * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * - * Note that memcg LRU only applies to global reclaim, and the round-robin - * incrementing of their max_seq counters ensures the eventual fairness to all - * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * Notes: + * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing + * of their max_seq counters ensures the eventual fairness to all eligible + * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * 2. There are only two valid generations: old (seq) and young (seq+1). + * MEMCG_NR_GENS is set to three so that when reading the generation counter + * locklessly, a stale value (seq-1) does not wraparound to young. */ -#define MEMCG_NR_GENS 2 +#define MEMCG_NR_GENS 3 #define MEMCG_NR_BINS 8 struct lru_gen_memcg { @@ -546,6 +553,8 @@ struct lru_gen_memcg { }; void lru_gen_init_pgdat(struct pglist_data *pgdat); +void lru_gen_init_lruvec(struct lruvec *lruvec); +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); @@ -554,19 +563,6 @@ void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid); -#else /* !CONFIG_MEMCG */ - -#define MEMCG_NR_GENS 1 - -struct lru_gen_memcg { -}; - -static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) -{ -} - -#endif /* CONFIG_MEMCG */ - #else /* !CONFIG_LRU_GEN */ static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) @@ -581,8 +577,6 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) { } -#ifdef CONFIG_MEMCG - static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } @@ -607,8 +601,6 @@ static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { } -#endif /* CONFIG_MEMCG */ - #endif /* CONFIG_LRU_GEN */ struct lruvec { @@ -631,16 +623,17 @@ struct lruvec { #ifdef CONFIG_LRU_GEN /* evictable pages divided into generations */ struct lru_gen_folio lrugen; +#ifdef CONFIG_LRU_GEN_WALKS_MMU /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif +#endif /* CONFIG_LRU_GEN */ #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif + struct zswap_lruvec_state zswap_lruvec_state; }; -/* Isolate unmapped pages */ -#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* Isolate unevictable pages */ @@ -676,16 +669,34 @@ enum zone_watermarks { #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) -/* Fields and list protected by pagesets local_lock in page_alloc.c */ +/* + * Flags used in pcp->flags field. + * + * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the + * previous page freeing. To avoid to drain PCP for an accident + * high-order page freeing. + * + * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before + * draining PCP for consecutive high-order pages freeing without + * allocation if data cache slice of CPU is large enough. To reduce + * zone lock contention and keep cache-hot pages reusing. + */ +#define PCPF_PREV_FREE_HIGH_ORDER BIT(0) +#define PCPF_FREE_HIGH_BATCH BIT(1) + struct per_cpu_pages { spinlock_t lock; /* Protects lists field */ int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ + int high_min; /* min high watermark */ + int high_max; /* max high watermark */ int batch; /* chunk size for buddy add/remove */ - short free_factor; /* batch scaling factor during free */ + u8 flags; /* protected by pcp->lock */ + u8 alloc_factor; /* batch scaling factor during allocate */ #ifdef CONFIG_NUMA - short expire; /* When 0, remote pagesets are drained */ + u8 expire; /* When 0, remote pagesets are drained */ #endif + short free_count; /* consecutive free count */ /* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[NR_PCP_LISTS]; @@ -838,7 +849,8 @@ struct zone { * the high and batch values are copied to individual pagesets for * faster access */ - int pageset_high; + int pageset_high_min; + int pageset_high_max; int pageset_batch; #ifndef CONFIG_SPARSEMEM @@ -926,10 +938,10 @@ struct zone { CACHELINE_PADDING(_pad1_); /* free areas of different sizes */ - struct free_area free_area[MAX_ORDER + 1]; + struct free_area free_area[NR_PAGE_ORDERS]; #ifdef CONFIG_UNACCEPTED_MEMORY - /* Pages to be accepted. All pages on the list are MAX_ORDER */ + /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; #endif @@ -999,6 +1011,7 @@ enum zone_flags { * Cleared when kswapd is woken. */ ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */ + ZONE_BELOW_HIGH, /* zone is below high watermark. */ }; static inline unsigned long zone_managed_pages(struct zone *zone) @@ -1738,8 +1751,8 @@ static inline bool movable_only_nodes(nodemask_t *nodes) #define SECTION_BLOCKFLAGS_BITS \ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) -#if (MAX_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS -#error Allocator MAX_ORDER exceeds SECTION_SIZE +#if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS +#error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE #endif static inline unsigned long pfn_to_section_nr(unsigned long pfn) @@ -1771,6 +1784,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { + struct rcu_head rcu; #ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif @@ -1964,7 +1978,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); - return test_bit(idx, ms->usage->subsection_map); + return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) @@ -1988,6 +2002,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; + int ret; /* * Ensure the upper PAGE_SHIFT bits are clear in the @@ -2001,13 +2016,19 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - if (!valid_section(ms)) + rcu_read_lock_sched(); + if (!valid_section(ms)) { + rcu_read_unlock_sched(); return 0; + } /* * Traditionally early sections always returned pfn_valid() for * the entire section-sized span. */ - return early_section(ms) || pfn_section_valid(ms, pfn); + ret = early_section(ms) || pfn_section_valid(ms, pfn); + rcu_read_unlock_sched(); + + return ret; } #endif diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 057c89867aa2..cd4d5c8781f5 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -115,6 +115,9 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) int vfsgid_in_group_p(vfsgid_t vfsgid); +struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); +void mnt_idmap_put(struct mnt_idmap *idmap); + vfsuid_t make_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kuid_t kuid); @@ -241,7 +244,4 @@ static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap, return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid())); } -bool check_fsmapping(const struct mnt_idmap *idmap, - const struct super_block *sb); - #endif /* _LINUX_MNT_IDMAPPING_H */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b0678b093cb2..7a9a07ea451b 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -935,6 +935,12 @@ enum { * struct cdx_device_id - CDX device identifier * @vendor: Vendor ID * @device: Device ID + * @subvendor: Subsystem vendor ID (or CDX_ANY_ID) + * @subdevice: Subsystem device ID (or CDX_ANY_ID) + * @class: Device class + * Most drivers do not need to specify class/class_mask + * as vendor/device is normally sufficient. + * @class_mask: Limit which sub-fields of the class field are compared. * @override_only: Match only when dev->driver_override is this driver. * * Type of entries in the "device Id" table for CDX devices supported by @@ -943,7 +949,25 @@ enum { struct cdx_device_id { __u16 vendor; __u16 device; + __u16 subvendor; + __u16 subdevice; + __u32 class; + __u32 class_mask; __u32 override_only; }; +struct vchiq_device_id { + char name[32]; +}; + +/** + * struct coreboot_device_id - Identifies a coreboot table entry + * @tag: tag ID + * @driver_data: driver specific data + */ +struct coreboot_device_id { + __u32 tag; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/module.h b/include/linux/module.h index a98e188cf37b..1153b0d99a80 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -540,6 +540,8 @@ struct module { struct static_call_site *static_call_sites; #endif #if IS_ENABLED(CONFIG_KUNIT) + int num_kunit_init_suites; + struct kunit_suite **kunit_init_suites; int num_kunit_suites; struct kunit_suite **kunit_suites; #endif @@ -668,7 +670,7 @@ extern void __module_get(struct module *module); * @module: the module we should check for * * Only try to get a module reference count if the module is not being removed. - * This call will fail if the module is already being removed. + * This call will fail if the module is in the process of being removed. * * Care must also be taken to ensure the module exists and is alive prior to * usage of this call. This can be gauranteed through two means: @@ -883,7 +885,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) diff --git a/include/linux/module_symbol.h b/include/linux/module_symbol.h index 7ace7ba30203..77c9895b9ddb 100644 --- a/include/linux/module_symbol.h +++ b/include/linux/module_symbol.h @@ -3,15 +3,13 @@ #define _LINUX_MODULE_SYMBOL_H /* This ignores the intensely annoying "mapping symbols" found in ELF files. */ -static inline int is_mapping_symbol(const char *str) +static inline bool is_mapping_symbol(const char *str) { if (str[0] == '.' && str[1] == 'L') return true; if (str[0] == 'L' && str[1] == '0') return true; - return str[0] == '$' && - (str[1] == 'a' || str[1] == 'd' || str[1] == 't' || str[1] == 'x') - && (str[2] == '\0' || str[2] == '.'); + return str[0] == '$'; } #endif /* _LINUX_MODULE_SYMBOL_H */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 03be088fb439..89b1e0ed9811 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -42,6 +42,11 @@ bool module_init_section(const char *name); */ bool module_exit_section(const char *name); +/* Describes whether within_module_init() will consider this an init section + * or not. This behaviour changes with CONFIG_MODULE_UNLOAD. + */ +bool module_init_layout_section(const char *sname); + /* * Apply the given relocation to the (simplified) ELF. Return -error * or 0. @@ -110,6 +115,14 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +#ifdef CONFIG_MODULES +void flush_module_init_free_work(void); +#else +static inline void flush_module_init_free_work(void) +{ +} +#endif + /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 962cd41a2cb5..bfb85fd13e1f 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -276,7 +276,7 @@ struct kparam_array read-only sections (which is part of respective UNIX ABI on these platforms). So 'const' makes no sense and even causes compile failures with some compilers. */ -#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64) +#if defined(CONFIG_ALPHA) || defined(CONFIG_PPC64) #define __moduleparam_const #else #define __moduleparam_const const @@ -293,7 +293,11 @@ struct kparam_array = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } -/* Obsolete - use module_param_cb() */ +/* + * Useful for describing a set/get pair used only once (i.e. for this + * parameter). For repeated set/get pairs (i.e. the same struct + * kernel_param_ops), use module_param_cb() instead. + */ #define module_param_call(name, _set, _get, arg, perm) \ static const struct kernel_param_ops __param_ops_##name = \ { .flags = 0, .set = _set, .get = _get }; \ @@ -381,6 +385,8 @@ extern bool parameq(const char *name1, const char *name2); */ extern bool parameqn(const char *name1, const char *name2, size_t n); +typedef int (*parse_unknown_fn)(char *param, char *val, const char *doing, void *arg); + /* Called on module insert or kernel boot */ extern char *parse_args(const char *name, char *args, @@ -388,9 +394,7 @@ extern char *parse_args(const char *name, unsigned num, s16 level_min, s16 level_max, - void *arg, - int (*unknown)(char *param, char *val, - const char *doing, void *arg)); + void *arg, parse_unknown_fn unknown); /* Called by module remove. */ #ifdef CONFIG_SYSFS diff --git a/include/linux/mount.h b/include/linux/mount.h index 4f40b40306d0..c34c18b4e8f3 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -50,8 +50,7 @@ struct path; #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \ - MNT_CURSOR) + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB) #define MNT_INTERNAL 0x4000 @@ -65,7 +64,7 @@ struct path; #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 #define MNT_UMOUNT 0x8000000 -#define MNT_CURSOR 0x10000000 +#define MNT_ONRB 0x10000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ @@ -92,8 +91,8 @@ extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); extern struct vfsmount *clone_private_mount(const struct path *path); -extern int __mnt_want_write(struct vfsmount *); -extern void __mnt_drop_write(struct vfsmount *); +int mnt_get_write_access(struct vfsmount *mnt); +void mnt_put_write_access(struct vfsmount *mnt); extern struct vfsmount *fc_mount(struct fs_context *fc); extern struct vfsmount *vfs_create_mount(struct fs_context *fc); diff --git a/include/linux/moxtet.h b/include/linux/moxtet.h index 79184948fab4..ac577699edfd 100644 --- a/include/linux/moxtet.h +++ b/include/linux/moxtet.h @@ -35,8 +35,6 @@ enum turris_mox_module_id { #define MOXTET_NIRQS 16 -extern struct bus_type moxtet_type; - struct moxtet { struct device *dev; struct mutex lock; diff --git a/include/linux/msi.h b/include/linux/msi.h index a50ea79522f8..26d07e23052e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -412,6 +412,7 @@ bool arch_restore_msi_irqs(struct pci_dev *dev); struct irq_domain; struct irq_domain_ops; struct irq_chip; +struct irq_fwspec; struct device_node; struct fwnode_handle; struct msi_domain_info; @@ -431,6 +432,8 @@ struct msi_domain_info; * function. * @msi_post_free: Optional function which is invoked after freeing * all interrupts. + * @msi_translate: Optional translate callback to support the odd wire to + * MSI bridges, e.g. MBIGEN * * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. @@ -468,6 +471,8 @@ struct msi_domain_ops { struct device *dev); void (*msi_post_free)(struct irq_domain *domain, struct device *dev); + int (*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, unsigned int *type); }; /** @@ -547,12 +552,10 @@ enum { MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ MSI_FLAG_FREE_MSI_DESCS = (1 << 6), - /* - * Quirk to handle MSI implementations which do not provide - * masking. Currently known to affect x86, but has to be partially - * handled in the core MSI code. - */ - MSI_FLAG_NOMASK_QUIRK = (1 << 7), + /* Use dev->fwnode for MSI device domain creation */ + MSI_FLAG_USE_DEV_FWNODE = (1 << 7), + /* Set parent->dev into domain->pm_dev on device domain creation */ + MSI_FLAG_PARENT_PM_DEV = (1 << 8), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), @@ -578,6 +581,11 @@ enum { * struct msi_parent_ops - MSI parent domain callbacks and configuration info * * @supported_flags: Required: The supported MSI flags of the parent domain + * @required_flags: Optional: The required MSI flags of the parent MSI domain + * @bus_select_token: Optional: The bus token of the real parent domain for + * irq_domain::select() + * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for + * irq_domain::select() * @prefix: Optional: Prefix for the domain and chip name * @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent * domain specific domain flags, domain ops and interrupt chip @@ -585,6 +593,9 @@ enum { */ struct msi_parent_ops { u32 supported_flags; + u32 required_flags; + u32 bus_select_token; + u32 bus_select_mask; const char *prefix; bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain, struct irq_domain *msi_parent_domain, @@ -633,9 +644,6 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, - irq_write_msi_msg_t write_msi_msg); -void platform_msi_domain_free_irqs(struct device *dev); /* When an MSI domain is used as an intermediate domain */ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, @@ -662,6 +670,10 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); +/* Per device platform MSI */ +int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg); +void platform_device_msi_free_irqs_all(struct device *dev); bool msi_device_has_isolated_msi(struct device *dev); #else /* CONFIG_GENERIC_MSI_IRQ */ diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index d88bb56c18e2..947410faf9e2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -287,7 +287,7 @@ struct cfi_private { unsigned long chipshift; /* Because they're of the same type */ const char *im_name; /* inter_module name for cmdset_setup */ unsigned long quirks; - struct flchip chips[]; /* per-chip data structure for each chip */ + struct flchip chips[] __counted_by(numchips); /* per-chip data structure for each chip */ }; uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h index c04f690871ca..9798c1a1d3b6 100644 --- a/include/linux/mtd/flashchip.h +++ b/include/linux/mtd/flashchip.h @@ -13,6 +13,7 @@ */ #include <linux/sched.h> #include <linux/mutex.h> +#include <linux/wait.h> typedef enum { FL_READY, diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h index 0b6b59f7cfbd..56047a4e54c9 100644 --- a/include/linux/mtd/jedec.h +++ b/include/linux/mtd/jedec.h @@ -21,6 +21,9 @@ struct jedec_ecc_info { /* JEDEC features */ #define JEDEC_FEATURE_16_BIT_BUS (1 << 0) +/* JEDEC Optional Commands */ +#define JEDEC_OPT_CMD_READ_CACHE BIT(1) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ diff --git a/include/linux/mtd/lpc32xx_mlc.h b/include/linux/mtd/lpc32xx_mlc.h index d168c628c0d5..35e971be0950 100644 --- a/include/linux/mtd/lpc32xx_mlc.h +++ b/include/linux/mtd/lpc32xx_mlc.h @@ -11,7 +11,7 @@ #include <linux/dmaengine.h> struct lpc32xx_mlc_platform_data { - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; }; #endif /* __LINUX_MTD_LPC32XX_MLC_H */ diff --git a/include/linux/mtd/lpc32xx_slc.h b/include/linux/mtd/lpc32xx_slc.h index cf54a9f80460..a044b806566b 100644 --- a/include/linux/mtd/lpc32xx_slc.h +++ b/include/linux/mtd/lpc32xx_slc.h @@ -11,7 +11,7 @@ #include <linux/dmaengine.h> struct lpc32xx_slc_platform_data { - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; }; #endif /* __LINUX_MTD_LPC32XX_SLC_H */ diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 7c58c44662b8..8d10d9d2e830 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -223,7 +223,7 @@ struct mtd_part { * @partitions_lock: lock protecting accesses to the partition list. Protects * not only the master partition list, but also all * sub-partitions. - * @suspended: et to 1 when the device is suspended, 0 otherwise + * @suspended: set to 1 when the device is suspended, 0 otherwise * * This struct is embedded in mtd_info and contains master-specific * properties/fields. The master is the root MTD device from the MTD partition @@ -379,7 +379,7 @@ struct mtd_info { struct module *owner; struct device dev; - int usecount; + struct kref refcnt; struct mtd_debug_info dbg; struct nvmem_device *nvmem; struct nvmem_device *otp_user_nvmem; diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h index a7376f9beddf..55ab2e4d62f9 100644 --- a/include/linux/mtd/onfi.h +++ b/include/linux/mtd/onfi.h @@ -55,6 +55,7 @@ #define ONFI_SUBFEATURE_PARAM_LEN 4 /* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_READ_CACHE BIT(1) #define ONFI_OPT_CMD_SET_GET_FEATURES BIT(2) struct nand_onfi_params { diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h index 2e3f43788d48..0421f12156b5 100644 --- a/include/linux/mtd/qinfo.h +++ b/include/linux/mtd/qinfo.h @@ -24,7 +24,7 @@ struct lpddr_private { struct qinfo_chip *qinfo; int numchips; unsigned long chipshift; - struct flchip chips[]; + struct flchip chips[] __counted_by(numchips); }; /* qinfo_query_info structure contains request information for diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 5159d692f9ce..e84522e31301 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -225,6 +225,7 @@ struct gpio_desc; * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @supports_read_cache: The NAND chip supports read cache operations * @set_feature_list: Bitmap of features that can be set * @get_feature_list: Bitmap of features that can be get * @onfi: ONFI specific parameters @@ -233,6 +234,7 @@ struct nand_parameters { /* Generic parameters */ const char *model; bool supports_set_get_features; + bool supports_read_cache; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); @@ -1001,6 +1003,8 @@ struct nand_op_parser { /** * struct nand_operation - NAND operation descriptor * @cs: the CS line to select for this NAND operation + * @deassert_wp: set to true when the operation requires the WP pin to be + * de-asserted (ERASE, PROG, ...) * @instrs: array of instructions to execute * @ninstrs: length of the @instrs array * @@ -1008,6 +1012,7 @@ struct nand_op_parser { */ struct nand_operation { unsigned int cs; + bool deassert_wp; const struct nand_op_instr *instrs; unsigned int ninstrs; }; @@ -1019,6 +1024,14 @@ struct nand_operation { .ninstrs = ARRAY_SIZE(_instrs), \ } +#define NAND_DESTRUCTIVE_OPERATION(_cs, _instrs) \ + { \ + .cs = _cs, \ + .deassert_wp = true, \ + .instrs = _instrs, \ + .ninstrs = ARRAY_SIZE(_instrs), \ + } + int nand_op_parser_exec_op(struct nand_chip *chip, const struct nand_op_parser *parser, const struct nand_operation *op, bool check_only); @@ -1102,6 +1115,7 @@ struct nand_controller_ops { * the bus without restarting an entire read operation nor * changing the column. * @supported_op.cont_read: The controller supports sequential cache reads. + * @controller_wp: the controller is in charge of handling the WP pin. */ struct nand_controller { struct mutex lock; @@ -1110,6 +1124,7 @@ struct nand_controller { unsigned int data_only_read: 1; unsigned int cont_read: 1; } supported_op; + bool controller_wp; }; static inline void nand_controller_init(struct nand_controller *nfc) @@ -1263,6 +1278,7 @@ struct nand_secure_region { * @cont_read: Sequential page read internals * @cont_read.ongoing: Whether a continuous read is ongoing or not * @cont_read.first_page: Start of the continuous read operation + * @cont_read.pause_page: End of the current sequential cache read operation * @cont_read.last_page: End of the continuous read operation * @controller: The hardware controller structure which is shared among multiple * independent devices @@ -1319,6 +1335,7 @@ struct nand_chip { struct { bool ongoing; unsigned int first_page; + unsigned int pause_page; unsigned int last_page; } cont_read; @@ -1540,6 +1557,7 @@ int nand_reset_op(struct nand_chip *chip); int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf, unsigned int len); int nand_status_op(struct nand_chip *chip, u8 *status); +int nand_exit_status_op(struct nand_chip *chip); int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock); int nand_read_page_op(struct nand_chip *chip, unsigned int page, unsigned int offset_in_page, void *buf, unsigned int len); diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 3e285c09d16d..5c19ead60499 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -169,7 +169,7 @@ struct spinand_op; struct spinand_device; -#define SPINAND_MAX_ID_LEN 4 +#define SPINAND_MAX_ID_LEN 5 /* * For erase, write and read operation, we got the following timings : * tBERS (erase) 1ms to 4ms @@ -263,6 +263,7 @@ struct spinand_manufacturer { extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; +extern const struct spinand_manufacturer foresee_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index a529347fd75b..562f92504f2b 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -192,6 +192,7 @@ struct ubi_device_info { * or a volume was removed) * @UBI_VOLUME_RESIZED: a volume has been re-sized * @UBI_VOLUME_RENAMED: a volume has been re-named + * @UBI_VOLUME_SHUTDOWN: a volume is going to removed, shutdown users * @UBI_VOLUME_UPDATED: data has been written to a volume * * These constants define which type of event has happened when a volume @@ -202,6 +203,7 @@ enum { UBI_VOLUME_REMOVED, UBI_VOLUME_RESIZED, UBI_VOLUME_RENAMED, + UBI_VOLUME_SHUTDOWN, UBI_VOLUME_UPDATED, }; diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a33aa9eb9fc3..67edc4ca2bee 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -20,6 +20,7 @@ #include <linux/osq_lock.h> #include <linux/debug_locks.h> #include <linux/cleanup.h> +#include <linux/mutex_types.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -31,54 +32,9 @@ # define __DEP_MAP_MUTEX_INITIALIZER(lockname) #endif -#ifndef CONFIG_PREEMPT_RT - -/* - * Simple, straightforward mutexes with strict semantics: - * - * - only one task can hold the mutex at a time - * - only the owner can unlock the mutex - * - multiple unlocks are not permitted - * - recursive locking is not permitted - * - a mutex object must be initialized via the API - * - a mutex object must not be initialized via memset or copying - * - task may not exit with mutex held - * - memory areas where held locks reside must not be freed - * - held mutexes must not be reinitialized - * - mutexes may not be used in hardware or software interrupt - * contexts such as tasklets and timers - * - * These semantics are fully enforced when DEBUG_MUTEXES is - * enabled. Furthermore, besides enforcing the above rules, the mutex - * debugging code also implements a number of additional features - * that make lock debugging easier and faster: - * - * - uses symbolic names of mutexes, whenever they are printed in debug output - * - point-of-acquire tracking, symbolic lookup of function names - * - list of all locks held in the system, printout of them - * - owner tracking - * - detects self-recursing locks and prints out all relevant info - * - detects multi-task circular deadlocks and prints out all affected - * locks and tasks (and only those tasks) - */ -struct mutex { - atomic_long_t owner; - raw_spinlock_t wait_lock; -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct optimistic_spin_queue osq; /* Spinner MCS lock */ -#endif - struct list_head wait_list; -#ifdef CONFIG_DEBUG_MUTEXES - void *magic; -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - #ifdef CONFIG_DEBUG_MUTEXES -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ +# define __DEBUG_MUTEX_INITIALIZER(lockname) \ , .magic = &lockname extern void mutex_destroy(struct mutex *lock); @@ -91,6 +47,7 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif +#ifndef CONFIG_PREEMPT_RT /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -131,14 +88,6 @@ extern bool mutex_is_locked(struct mutex *lock); /* * Preempt-RT variant based on rtmutexes. */ -#include <linux/rtmutex.h> - -struct mutex { - struct rt_mutex_base rtmutex; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; #define __MUTEX_INITIALIZER(mutexname) \ { \ @@ -151,9 +100,6 @@ struct mutex { extern void __mutex_rt_init(struct mutex *lock, const char *name, struct lock_class_key *key); -extern int mutex_trylock(struct mutex *lock); - -static inline void mutex_destroy(struct mutex *lock) { } #define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex) @@ -221,6 +167,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) -DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) +DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h new file mode 100644 index 000000000000..fdf7f515fde8 --- /dev/null +++ b/include/linux/mutex_types.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_MUTEX_TYPES_H +#define __LINUX_MUTEX_TYPES_H + +#include <linux/atomic.h> +#include <linux/lockdep_types.h> +#include <linux/osq_lock.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> + +#ifndef CONFIG_PREEMPT_RT + +/* + * Simple, straightforward mutexes with strict semantics: + * + * - only one task can hold the mutex at a time + * - only the owner can unlock the mutex + * - multiple unlocks are not permitted + * - recursive locking is not permitted + * - a mutex object must be initialized via the API + * - a mutex object must not be initialized via memset or copying + * - task may not exit with mutex held + * - memory areas where held locks reside must not be freed + * - held mutexes must not be reinitialized + * - mutexes may not be used in hardware or software interrupt + * contexts such as tasklets and timers + * + * These semantics are fully enforced when DEBUG_MUTEXES is + * enabled. Furthermore, besides enforcing the above rules, the mutex + * debugging code also implements a number of additional features + * that make lock debugging easier and faster: + * + * - uses symbolic names of mutexes, whenever they are printed in debug output + * - point-of-acquire tracking, symbolic lookup of function names + * - list of all locks held in the system, printout of them + * - owner tracking + * - detects self-recursing locks and prints out all relevant info + * - detects multi-task circular deadlocks and prints out all affected + * locks and tasks (and only those tasks) + */ +struct mutex { + atomic_long_t owner; + raw_spinlock_t wait_lock; +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + struct optimistic_spin_queue osq; /* Spinner MCS lock */ +#endif + struct list_head wait_list; +#ifdef CONFIG_DEBUG_MUTEXES + void *magic; +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#else /* !CONFIG_PREEMPT_RT */ +/* + * Preempt-RT variant based on rtmutexes. + */ +#include <linux/rtmutex.h> + +struct mutex { + struct rt_mutex_base rtmutex; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#endif /* CONFIG_PREEMPT_RT */ + +#endif /* __LINUX_MUTEX_TYPES_H */ diff --git a/include/linux/namei.h b/include/linux/namei.h index 1463cbda4888..74e0cc14ebf8 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -66,6 +66,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root); @@ -92,6 +93,30 @@ extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +/** + * mode_strip_umask - handle vfs umask stripping + * @dir: parent directory of the new inode + * @mode: mode of the new inode to be created in @dir + * + * In most filesystems, umask stripping depends on whether or not the + * filesystem supports POSIX ACLs. If the filesystem doesn't support it umask + * stripping is done directly in here. If the filesystem does support POSIX + * ACLs umask stripping is deferred until the filesystem calls + * posix_acl_create(). + * + * Some filesystems (like NFSv4) also want to avoid umask stripping by the + * VFS, but don't support POSIX ACLs. Those filesystems can set SB_I_NOUMASK + * to get this effect without declaring that they support POSIX ACLs. + * + * Returns: mode + */ +static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umode_t mode) +{ + if (!IS_POSIXACL(dir) && !(dir->i_sb->s_iflags & SB_I_NOUMASK)) + mode &= ~current_umask(); + return mode; +} + extern int __must_check nd_jump_link(const struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) @@ -112,7 +137,7 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) static inline bool retry_estale(const long error, const unsigned int flags) { - return error == -ESTALE && !(flags & LOOKUP_REVAL); + return unlikely(error == -ESTALE && !(flags & LOOKUP_REVAL)); } #endif /* _LINUX_NAMEI_H */ diff --git a/include/linux/net.h b/include/linux/net.h index 41c608c1b02c..15df6d5f27a7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -123,7 +123,7 @@ struct socket { struct file *file; struct sock *sk; - const struct proto_ops *ops; + const struct proto_ops *ops; /* Might change with IPV6_ADDRFORM or MPTCP. */ struct socket_wq wq; }; @@ -299,10 +299,7 @@ do { \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else #define net_dbg_ratelimited(fmt, ...) \ - do { \ - if (0) \ - no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ - } while (0) + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif #define net_get_random_once(buf, nbytes) \ diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h index ed42bd5f639f..0aa4411528fc 100644 --- a/include/linux/net/intel/i40e_client.h +++ b/include/linux/net/intel/i40e_client.h @@ -45,7 +45,7 @@ struct i40e_qv_info { struct i40e_qvlist_info { u32 num_vectors; - struct i40e_qv_info qv_info[]; + struct i40e_qv_info qv_info[] __counted_by(num_vectors); }; diff --git a/include/linux/net_mm.h b/include/linux/net_mm.h deleted file mode 100644 index b298998bd5a0..000000000000 --- a/include/linux/net_mm.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifdef CONFIG_MMU - -#ifdef CONFIG_INET -extern const struct vm_operations_struct tcp_vm_ops; -static inline bool vma_is_tcp(const struct vm_area_struct *vma) -{ - return vma->vm_ops == &tcp_vm_ops; -} -#else -static inline bool vma_is_tcp(const struct vm_area_struct *vma) -{ - return false; -} -#endif /* CONFIG_INET*/ - -#endif /* CONFIG_MMU */ diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index fd67f3cc0c4b..eb01c37e71e0 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -5,12 +5,23 @@ #include <uapi/linux/net_tstamp.h> +enum hwtstamp_source { + HWTSTAMP_SOURCE_NETDEV, + HWTSTAMP_SOURCE_PHYLIB, +}; + /** * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config * * @flags: see struct hwtstamp_config * @tx_type: see struct hwtstamp_config * @rx_filter: see struct hwtstamp_config + * @ifr: pointer to ifreq structure from the original ioctl request, to pass to + * a legacy implementation of a lower driver + * @copied_to_user: request was passed to a legacy implementation which already + * copied the ioctl request back to user space + * @source: indication whether timestamps should come from the netdev or from + * an attached phylib PHY * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config @@ -20,6 +31,9 @@ struct kernel_hwtstamp_config { int flags; int tx_type; int rx_filter; + struct ifreq *ifr; + bool copied_to_user; + enum hwtstamp_source source; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, @@ -30,4 +44,20 @@ static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kern kernel_cfg->rx_filter = cfg->rx_filter; } +static inline void hwtstamp_config_from_kernel(struct hwtstamp_config *cfg, + const struct kernel_hwtstamp_config *kernel_cfg) +{ + cfg->flags = kernel_cfg->flags; + cfg->tx_type = kernel_cfg->tx_type; + cfg->rx_filter = kernel_cfg->rx_filter; +} + +static inline bool kernel_hwtstamp_config_changed(const struct kernel_hwtstamp_config *a, + const struct kernel_hwtstamp_config *b) +{ + return a->flags != b->flags || + a->tx_type != b->tx_type || + a->rx_filter != b->rx_filter; +} + #endif /* _LINUX_NET_TIMESTAMPING_H_ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b828c7a75be2..cb37817d6382 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,7 +40,6 @@ #include <net/dcbnl.h> #endif #include <net/netprio_cgroup.h> -#include <net/xdp.h> #include <linux/netdev_features.h> #include <linux/neighbour.h> @@ -57,6 +56,7 @@ struct netpoll_info; struct device; struct ethtool_ops; +struct kernel_hwtstamp_config; struct phy_device; struct dsa_port; struct ip_tunnel_parm; @@ -76,8 +76,12 @@ struct udp_tunnel_nic_info; struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; +struct xdp_frame; +struct xdp_metadata_ops; struct xdp_md; +typedef u32 xdp_features_t; + void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -221,12 +225,6 @@ struct net_device_core_stats { #include <linux/cache.h> #include <linux/skbuff.h> -#ifdef CONFIG_RPS -#include <linux/static_key.h> -extern struct static_key_false rps_needed; -extern struct static_key_false rfs_needed; -#endif - struct neighbour; struct neigh_parms; struct sk_buff; @@ -376,6 +374,7 @@ struct napi_struct { /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; + int irq; }; enum { @@ -476,6 +475,29 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n) return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); } +/** + * napi_is_scheduled - test if NAPI is scheduled + * @n: NAPI context + * + * This check is "best-effort". With no locking implemented, + * a NAPI can be scheduled or terminate right after this check + * and produce not precise results. + * + * NAPI_STATE_SCHED is an internal state, napi_is_scheduled + * should not be used normally and napi_schedule should be + * used instead. + * + * Use only if the driver really needs to check if a NAPI + * is scheduled for example in the context of delayed timer + * that can be skipped if a NAPI is already scheduled. + * + * Return True if NAPI is scheduled, False otherwise. + */ +static inline bool napi_is_scheduled(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_SCHED, &n->state); +} + bool napi_schedule_prep(struct napi_struct *n); /** @@ -484,11 +506,18 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. + * Return true if we schedule a NAPI or false if not. + * Refer to napi_schedule_prep() for additional reason on why + * a NAPI might not be scheduled. */ -static inline void napi_schedule(struct napi_struct *n) +static inline bool napi_schedule(struct napi_struct *n) { - if (napi_schedule_prep(n)) + if (napi_schedule_prep(n)) { __napi_schedule(n); + return true; + } + + return false; } /** @@ -503,16 +532,6 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) __napi_schedule_irqoff(n); } -/* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ -static inline bool napi_reschedule(struct napi_struct *napi) -{ - if (napi_schedule_prep(napi)) { - __napi_schedule(napi); - return true; - } - return false; -} - /** * napi_complete_done - NAPI processing complete * @n: NAPI context @@ -639,6 +658,10 @@ struct netdev_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif + /* NAPI instance for the queue + * Readers and writers must hold RTNL + */ + struct napi_struct *napi; /* * write-mostly part */ @@ -701,112 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node #endif } -#ifdef CONFIG_RPS -/* - * This structure holds an RPS map which can be of variable length. The - * map is an array of CPUs. - */ -struct rps_map { - unsigned int len; - struct rcu_head rcu; - u16 cpus[]; -}; -#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) - -/* - * The rps_dev_flow structure contains the mapping of a flow to a CPU, the - * tail pointer for that CPU's input queue at the time of last enqueue, and - * a hardware filter index. - */ -struct rps_dev_flow { - u16 cpu; - u16 filter; - unsigned int last_qtail; -}; -#define RPS_NO_FILTER 0xffff - -/* - * The rps_dev_flow_table structure contains a table of flow mappings. - */ -struct rps_dev_flow_table { - unsigned int mask; - struct rcu_head rcu; - struct rps_dev_flow flows[]; -}; -#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - ((_num) * sizeof(struct rps_dev_flow))) - -/* - * The rps_sock_flow_table contains mappings of flows to the last CPU - * on which they were processed by the application (set in recvmsg). - * Each entry is a 32bit value. Upper part is the high-order bits - * of flow hash, lower part is CPU number. - * rps_cpu_mask is used to partition the space, depending on number of - * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 - * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, - * meaning we use 32-6=26 bits for the hash. - */ -struct rps_sock_flow_table { - u32 mask; - - u32 ents[] ____cacheline_aligned_in_smp; -}; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) - -#define RPS_NO_CPU 0xffff - -extern u32 rps_cpu_mask; -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - -static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, - u32 hash) -{ - if (table && hash) { - unsigned int index = hash & table->mask; - u32 val = hash & ~rps_cpu_mask; - - /* We only give a hint, preemption can change CPU under us */ - val |= raw_smp_processor_id(); - - /* The following WRITE_ONCE() is paired with the READ_ONCE() - * here, and another one in get_rps_cpu(). - */ - if (READ_ONCE(table->ents[index]) != val) - WRITE_ONCE(table->ents[index], val); - } -} - #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); #endif -#endif /* CONFIG_RPS */ - -/* This structure contains an instance of an RX queue. */ -struct netdev_rx_queue { - struct xdp_rxq_info xdp_rxq; -#ifdef CONFIG_RPS - struct rps_map __rcu *rps_map; - struct rps_dev_flow_table __rcu *rps_flow_table; -#endif - struct kobject kobj; - struct net_device *dev; - netdevice_tracker dev_tracker; - -#ifdef CONFIG_XDP_SOCKETS - struct xsk_buff_pool *pool; -#endif -} ____cacheline_aligned_in_smp; - -/* - * RX queue sysfs structures and functions. - */ -struct rx_queue_attribute { - struct attribute attr; - ssize_t (*show)(struct netdev_rx_queue *queue, char *buf); - ssize_t (*store)(struct netdev_rx_queue *queue, - const char *buf, size_t len); -}; /* XPS map type and offset of the xps map within net_device->xps_maps[]. */ enum xps_map_type { @@ -939,6 +860,7 @@ struct net_device_path { u8 queue; u16 wcid; u8 bss; + u8 amsdu; } mtk_wdma; }; }; @@ -1056,7 +978,7 @@ struct xfrmdev_ops { bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); - void (*xdo_dev_state_update_curlft) (struct xfrm_state *x); + void (*xdo_dev_state_update_stats) (struct xfrm_state *x); int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack); void (*xdo_dev_policy_delete) (struct xfrm_policy *x); void (*xdo_dev_policy_free) (struct xfrm_policy *x); @@ -1309,9 +1231,7 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. - * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], - * struct net_device *dev, - * u16 vid, + * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, @@ -1325,6 +1245,9 @@ struct netdev_net_notifier { * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], * struct netlink_ext_ack *extack); * Deletes the MDB entry from dev. + * int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[], + * struct netlink_ext_ack *extack); + * Bulk deletes MDB entries from dev. * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, * struct netlink_callback *cb); * Dumps MDB entries from dev. The first argument (marker) in the netlink @@ -1418,6 +1341,16 @@ struct netdev_net_notifier { * Get hardware timestamp based on normal/adjustable time or free running * cycle counter. This function is required if physical clock supports a * free running cycle counter. + * + * int (*ndo_hwtstamp_get)(struct net_device *dev, + * struct kernel_hwtstamp_config *kernel_config); + * Get the currently configured hardware timestamping parameters for the + * NIC device. + * + * int (*ndo_hwtstamp_set)(struct net_device *dev, + * struct kernel_hwtstamp_config *kernel_config, + * struct netlink_ext_ack *extack); + * Change the hardware timestamping parameters for NIC device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1576,10 +1509,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); - int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, - struct nlattr *tb[], + int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, - u16 vid, struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1599,9 +1530,16 @@ struct net_device_ops { int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); + int (*ndo_mdb_del_bulk)(struct net_device *dev, + struct nlattr *tb[], + struct netlink_ext_ack *extack); int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); + int (*ndo_mdb_get)(struct net_device *dev, + struct nlattr *tb[], u32 portid, + u32 seq, + struct netlink_ext_ack *extack); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, @@ -1652,12 +1590,11 @@ struct net_device_ops { ktime_t (*ndo_get_tstamp)(struct net_device *dev, const struct skb_shared_hwtstamps *hwtstamps, bool cycles); -}; - -struct xdp_metadata_ops { - int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); - int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, - enum xdp_rss_hash_type *rss_type); + int (*ndo_hwtstamp_get)(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config); + int (*ndo_hwtstamp_set)(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack); }; /** @@ -1708,6 +1645,9 @@ struct xdp_metadata_ops { * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN + * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to + * ndo_hwtstamp_set() for all timestamp requests regardless of source, + * even if those aren't HWTSTAMP_SOURCE_NETDEV. */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1743,6 +1683,7 @@ enum netdev_priv_flags { IFF_NO_ADDRCONF = BIT_ULL(30), IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), + IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1783,6 +1724,22 @@ enum netdev_ml_priv_type { ML_PRIV_CAN, }; +enum netdev_stat_type { + NETDEV_PCPU_STAT_NONE, + NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ + NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ + NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ +}; + +enum netdev_reg_state { + NETREG_UNINITIALIZED = 0, + NETREG_REGISTERED, /* completed register_netdevice */ + NETREG_UNREGISTERING, /* called unregister_netdevice */ + NETREG_UNREGISTERED, /* completed unregister todo */ + NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ +}; + /** * struct net_device - The DEVICE structure. * @@ -1844,6 +1801,7 @@ enum netdev_ml_priv_type { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @xdp_metadata_ops: Includes pointers to XDP metadata callbacks. + * @xsk_tx_metadata_ops: Includes pointers to AF_XDP TX metadata callbacks. * @ethtool_ops: Management operations * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour @@ -1930,8 +1888,7 @@ enum netdev_ml_priv_type { * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one - * @miniq_ingress: ingress/clsact qdisc specific data for - * ingress processing + * @tcx_ingress: BPF & clsact qdisc specific data for ingress processing * @ingress_queue: XXX: need comments on this one * @nf_hooks_ingress: netfilter hooks executed for ingress packets * @broadcast: hw bcast address @@ -1952,8 +1909,7 @@ enum netdev_ml_priv_type { * @xps_maps: all CPUs/RXQs maps for XPS device * * @xps_maps: XXX: need comments on this one - * @miniq_egress: clsact qdisc specific data for - * egress processing + * @tcx_egress: BPF & clsact qdisc specific data for egress processing * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by @@ -1979,10 +1935,14 @@ enum netdev_ml_priv_type { * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type - * @lstats: Loopback statistics - * @tstats: Tunnel statistics - * @dstats: Dummy statistics - * @vstats: Virtual ethernet statistics + * + * @pcpu_stat_type: Type of device statistics which the core should + * allocate/free: none, lstats, tstats, dstats. none + * means the driver is handling statistics allocation/ + * freeing internally. + * @lstats: Loopback statistics: packets, bytes + * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes + * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP @@ -1995,6 +1955,7 @@ enum netdev_ml_priv_type { * * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops + * @stat_ops: Optional ops for queue-aware statistics * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size @@ -2045,6 +2006,8 @@ enum netdev_ml_priv_type { * receive offload (GRO) * @gro_ipv4_max_size: Maximum size of aggregated packet in generic * receive offload (GRO), for IPv4. + * @xdp_zc_max_segs: Maximum number of segments supported by AF_XDP + * zero copy driver * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. @@ -2058,11 +2021,86 @@ enum netdev_ml_priv_type { * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * + * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, + * where the clock is recovered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ struct net_device { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/net_device.rst. + * Please update the document when adding new fields. + */ + + /* TX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_tx); + unsigned long long priv_flags; + const struct net_device_ops *netdev_ops; + const struct header_ops *header_ops; + struct netdev_queue *_tx; + netdev_features_t gso_partial_features; + unsigned int real_num_tx_queues; + unsigned int gso_max_size; + unsigned int gso_ipv4_max_size; + u16 gso_max_segs; + s16 num_tc; + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ + unsigned int mtu; + unsigned short needed_headroom; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; +#ifdef CONFIG_XPS + struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; +#endif +#ifdef CONFIG_NETFILTER_EGRESS + struct nf_hook_entries __rcu *nf_hooks_egress; +#endif +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_egress; +#endif + __cacheline_group_end(net_device_read_tx); + + /* TXRX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_txrx); + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; + struct pcpu_dstats __percpu *dstats; + }; + unsigned long state; + unsigned int flags; + unsigned short hard_header_len; + netdev_features_t features; + struct inet6_dev __rcu *ip6_ptr; + __cacheline_group_end(net_device_read_txrx); + + /* RX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_rx); + struct bpf_prog __rcu *xdp_prog; + struct list_head ptype_specific; + int ifindex; + unsigned int real_num_rx_queues; + struct netdev_rx_queue *_rx; + unsigned long gro_flush_timeout; + int napi_defer_hard_irqs; + unsigned int gro_max_size; + unsigned int gro_ipv4_max_size; + rx_handler_func_t __rcu *rx_handler; + void __rcu *rx_handler_data; + possible_net_t nd_net; +#ifdef CONFIG_NETPOLL + struct netpoll_info __rcu *npinfo; +#endif +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_ingress; +#endif + __cacheline_group_end(net_device_read_rx); + char name[IFNAMSIZ]; struct netdev_name_node *name_node; struct dev_ifalias __rcu *ifalias; @@ -2080,14 +2118,12 @@ struct net_device { * part of the usual set specified in Space.c. */ - unsigned long state; struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; struct list_head close_list; struct list_head ptype_all; - struct list_head ptype_specific; struct { struct list_head upper; @@ -2095,31 +2131,18 @@ struct net_device { } adj_list; /* Read-mostly cache-line for fast-path access */ - unsigned int flags; xdp_features_t xdp_features; - unsigned long long priv_flags; - const struct net_device_ops *netdev_ops; const struct xdp_metadata_ops *xdp_metadata_ops; - int ifindex; + const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops; unsigned short gflags; - unsigned short hard_header_len; - /* Note : dev->mtu is often read without holding a lock. - * Writers usually hold RTNL. - * It is recommended to use READ_ONCE() to annotate the reads, - * and to use WRITE_ONCE() to annotate the writes. - */ - unsigned int mtu; - unsigned short needed_headroom; unsigned short needed_tailroom; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; - netdev_features_t gso_partial_features; unsigned int min_mtu; unsigned int max_mtu; @@ -2157,9 +2180,7 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif - const struct header_ops *header_ops; - - unsigned char operstate; + unsigned int operstate; unsigned char link_mode; unsigned char if_port; @@ -2199,9 +2220,7 @@ struct net_device { /* Protocol-specific pointers */ - struct in_device __rcu *ip_ptr; - struct inet6_dev __rcu *ip6_ptr; #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif @@ -2236,26 +2255,13 @@ struct net_device { /* Interface address info used in eth_type_trans() */ const unsigned char *dev_addr; - struct netdev_rx_queue *_rx; unsigned int num_rx_queues; - unsigned int real_num_rx_queues; - - struct bpf_prog __rcu *xdp_prog; - unsigned long gro_flush_timeout; - int napi_defer_hard_irqs; #define GRO_LEGACY_MAX_SIZE 65536u /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), * and shinfo->gso_segs is a 16bit field. */ #define GRO_MAX_SIZE (8 * 65535u) - unsigned int gro_max_size; - unsigned int gro_ipv4_max_size; - rx_handler_func_t __rcu *rx_handler; - void __rcu *rx_handler_data; - -#ifdef CONFIG_NET_CLS_ACT - struct mini_Qdisc __rcu *miniq_ingress; -#endif + unsigned int xdp_zc_max_segs; struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS struct nf_hook_entries __rcu *nf_hooks_ingress; @@ -2270,25 +2276,13 @@ struct net_device { /* * Cache lines mostly used on transmit path */ - struct netdev_queue *_tx ____cacheline_aligned_in_smp; unsigned int num_tx_queues; - unsigned int real_num_tx_queues; struct Qdisc __rcu *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; struct xdp_dev_bulk_queue __percpu *xdp_bulkq; -#ifdef CONFIG_XPS - struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; -#endif -#ifdef CONFIG_NET_CLS_ACT - struct mini_Qdisc __rcu *miniq_egress; -#endif -#ifdef CONFIG_NETFILTER_EGRESS - struct nf_hook_entries __rcu *nf_hooks_egress; -#endif - #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); #endif @@ -2309,13 +2303,7 @@ struct net_device { struct list_head link_watch_list; - enum { NETREG_UNINITIALIZED=0, - NETREG_REGISTERED, /* completed register_netdevice */ - NETREG_UNREGISTERING, /* called unregister_netdevice */ - NETREG_UNREGISTERED, /* completed unregister todo */ - NETREG_RELEASED, /* called free_netdev */ - NETREG_DUMMY, /* dummy device for NAPI poll */ - } reg_state:8; + u8 reg_state; bool dismantle; @@ -2327,21 +2315,11 @@ struct net_device { bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); -#ifdef CONFIG_NETPOLL - struct netpoll_info __rcu *npinfo; -#endif - - possible_net_t nd_net; - /* mid-layer private */ void *ml_priv; enum netdev_ml_priv_type ml_priv_type; - union { - struct pcpu_lstats __percpu *lstats; - struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; - }; + enum netdev_stat_type pcpu_stat_type:8; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; @@ -2358,6 +2336,8 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; + const struct netdev_stat_ops *stat_ops; + /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u @@ -2366,20 +2346,15 @@ struct net_device { */ #define GSO_MAX_SIZE (8 * GSO_MAX_SEGS) - unsigned int gso_max_size; #define TSO_LEGACY_MAX_SIZE 65536 #define TSO_MAX_SIZE UINT_MAX unsigned int tso_max_size; - u16 gso_max_segs; #define TSO_MAX_SEGS U16_MAX u16 tso_max_segs; - unsigned int gso_ipv4_max_size; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif - s16 num_tc; - struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) @@ -2414,6 +2389,14 @@ struct net_device { struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port; + +#if IS_ENABLED(CONFIG_DPLL) + struct dpll_pin __rcu *dpll_pin; +#endif +#if IS_ENABLED(CONFIG_PAGE_POOL) + /** @page_pools: page pools created for this netdevice */ + struct hlist_head page_pools; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2618,6 +2601,15 @@ static inline void *netdev_priv(const struct net_device *dev) */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) +void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, + enum netdev_queue_type type, + struct napi_struct *napi); + +static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) +{ + napi->irq = irq; +} + /* Default NAPI poll() weight * Device drivers are strongly advised to not use bigger value */ @@ -2734,6 +2726,16 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); +struct pcpu_dstats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + struct u64_stats_sync syncp; +} __aligned(8 * sizeof(u64)); + struct pcpu_lstats { u64_stats_t packets; u64_stats_t bytes; @@ -2994,8 +2996,6 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev); int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info); -extern rwlock_t dev_base_lock; /* Device list lock */ - #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_reverse(net, d) \ @@ -3016,6 +3016,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */ if (netdev_master_upper_dev_get_rcu(slave) == (bond)) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) +#define for_each_netdev_dump(net, d, ifindex) \ + xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex)) + static inline struct net_device *next_net_device(struct net_device *dev) { struct list_head *lh; @@ -3115,7 +3118,7 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); void netdev_freemem(struct net_device *dev); -int init_dummy_netdev(struct net_device *dev); +void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, @@ -3130,8 +3133,6 @@ struct net_device *netdev_get_by_name(struct net *net, const char *name, netdevice_tracker *tracker, gfp_t gfp); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); -int dev_restart(struct net_device *dev); - static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, @@ -3418,6 +3419,16 @@ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue #endif } +static inline int netdev_queue_dql_avail(const struct netdev_queue *txq) +{ +#ifdef CONFIG_BQL + /* Non-BQL migrated drivers will return 0, too. */ + return dql_avail(&txq->dql); +#else + return 0; +#endif +} + /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue @@ -3830,24 +3841,6 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev, int netif_set_real_num_queues(struct net_device *dev, unsigned int txq, unsigned int rxq); -static inline struct netdev_rx_queue * -__netif_get_rx_queue(struct net_device *dev, unsigned int rxq) -{ - return dev->_rx + rxq; -} - -#ifdef CONFIG_SYSFS -static inline unsigned int get_netdev_rx_queue_index( - struct netdev_rx_queue *queue) -{ - struct net_device *dev = queue->dev; - int index = queue - dev->_rx; - - BUG_ON(index >= dev->num_rx_queues); - return index; -} -#endif - int netif_get_num_default_rss_queues(void); void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason); @@ -3895,7 +3888,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); @@ -3908,8 +3901,6 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); -struct packet_offload *gro_find_receive_by_type(__be16 type); -struct packet_offload *gro_find_complete_by_type(__be16 type); static inline void napi_free_frags(struct napi_struct *napi) { @@ -3933,6 +3924,14 @@ int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf __user *ifc); +int generic_hwtstamp_get_lower(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_cfg); +int generic_hwtstamp_set_lower(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_cfg, + struct netlink_ext_ack *extack); +int dev_set_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, @@ -3961,6 +3960,7 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); + struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); @@ -4001,32 +4001,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev); - -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev) -{ - /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ - struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); - - if (likely(p)) - return p; - - return netdev_core_stats_alloc(dev); -} +void netdev_core_stats_inc(struct net_device *dev, u32 offset); #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats __percpu *p; \ - \ - p = dev_core_stats(dev); \ - if (p) \ - this_cpu_inc(p->FIELD); \ + netdev_core_stats_inc(dev, \ + offsetof(struct net_device_core_stats, FIELD)); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) DEV_CORE_STATS_INC(rx_otherhost_dropped) +#undef DEV_CORE_STATS_INC static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, @@ -4166,6 +4153,15 @@ static inline void netdev_ref_replace(struct net_device *odev, void linkwatch_fire_event(struct net_device *dev); /** + * linkwatch_sync_dev - sync linkwatch for the given device + * @dev: network device to sync linkwatch for + * + * Sync linkwatch for the given device, removing it from the + * pending work list (if queued). + */ +void linkwatch_sync_dev(struct net_device *dev); + +/** * netif_carrier_ok - test if carrier present * @dev: network device * @@ -4274,8 +4270,10 @@ static inline bool netif_testing(const struct net_device *dev) */ static inline bool netif_oper_up(const struct net_device *dev) { - return (dev->operstate == IF_OPER_UP || - dev->operstate == IF_OPER_UNKNOWN /* backward compat */); + unsigned int operstate = READ_ONCE(dev->operstate); + + return operstate == IF_OPER_UP || + operstate == IF_OPER_UNKNOWN /* backward compat */; } /** @@ -4714,11 +4712,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); -extern int netdev_max_backlog; -extern int dev_rx_weight; -extern int dev_tx_weight; -extern int gro_normal_batch; - enum { NESTED_SYNC_IMM_BIT, NESTED_SYNC_TODO_BIT, @@ -5102,6 +5095,11 @@ static inline bool netif_is_ovs_port(const struct net_device *dev) return dev->priv_flags & IFF_OVS_DATAPATH; } +static inline bool netif_is_any_bridge_master(const struct net_device *dev) +{ + return netif_is_bridge_master(dev) || netif_is_ovs_master(dev); +} + static inline bool netif_is_any_bridge_port(const struct net_device *dev) { return netif_is_bridge_port(dev) || netif_is_ovs_port(dev); @@ -5170,7 +5168,9 @@ static inline const char *netdev_name(const struct net_device *dev) static inline const char *netdev_reg_state(const struct net_device *dev) { - switch (dev->reg_state) { + u8 reg_state = READ_ONCE(dev->reg_state); + + switch (reg_state) { case NETREG_UNINITIALIZED: return " (uninitialized)"; case NETREG_REGISTERED: return ""; case NETREG_UNREGISTERING: return " (unregistering)"; @@ -5179,7 +5179,7 @@ static inline const char *netdev_reg_state(const struct net_device *dev) case NETREG_DUMMY: return " (dummy)"; } - WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state); + WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state); return " (unknown)"; } @@ -5221,7 +5221,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev) #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) -extern struct list_head ptype_all __read_mostly; extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; extern struct net_device *blackhole_netdev; @@ -5230,5 +5229,6 @@ extern struct net_device *blackhole_netdev; #define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) #define DEV_STATS_ADD(DEV, FIELD, VAL) \ atomic_long_add((VAL), &(DEV)->stats.__##FIELD) +#define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD) #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d4fed4c508ca..2683b2b77612 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -11,6 +11,7 @@ #include <linux/wait.h> #include <linux/list.h> #include <linux/static_key.h> +#include <linux/module.h> #include <linux/netfilter_defs.h> #include <linux/netdevice.h> #include <linux/sockptr.h> @@ -21,6 +22,16 @@ static inline int NF_DROP_GETERR(int verdict) return -(verdict >> NF_VERDICT_QBITS); } +static __always_inline int +NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) +{ + BUILD_BUG_ON(err > 0xffff); + + kfree_skb_reason(skb, reason); + + return ((err << 16) | NF_STOLEN); +} + static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { @@ -359,7 +370,6 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); -int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry); #include <net/flow.h> @@ -463,6 +473,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; @@ -481,6 +492,15 @@ struct nfnl_ct_hook { }; extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; +struct nf_defrag_hook { + struct module *owner; + int (*enable)(struct net *net); + void (*disable)(struct net *net); +}; + +extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook; +extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook; + /* * nf_skb_duplicated - TEE target has sent a packet * diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e8c350a3ade1..e9f4f845d760 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h index 9e937f64a1ad..81286c499325 100644 --- a/include/linux/netfilter/nf_conntrack_h323.h +++ b/include/linux/netfilter/nf_conntrack_h323.h @@ -34,10 +34,6 @@ struct nf_ct_h323_master { int get_h225_addr(struct nf_conn *ct, unsigned char *data, TransportAddress *taddr, union nf_inet_addr *addr, __be16 *port); -void nf_conntrack_h245_expect(struct nf_conn *new, - struct nf_conntrack_expect *this); -void nf_conntrack_q931_expect(struct nf_conn *new, - struct nf_conntrack_expect *this); struct nfct_h323_nat_hooks { int (*set_h245_addr)(struct sk_buff *skb, unsigned int protoff, diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index f33aa6021364..34ce5d2f37a2 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -25,7 +25,6 @@ struct nf_ct_gre_keymap { int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t); -void nf_ct_gre_keymap_flush(struct net *net); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 625f491b95de..fb31312825ae 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,7 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 init[IP_CT_DIR_MAX]; u8 last_dir; u8 flags; }; diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index f980edfdd278..743475ca7e9d 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -42,7 +42,7 @@ static inline int nf_bridge_get_physinif(const struct sk_buff *skb) if (!nf_bridge) return 0; - return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0; + return nf_bridge->physinif; } static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) @@ -56,11 +56,11 @@ static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) } static inline struct net_device * -nf_bridge_get_physindev(const struct sk_buff *skb) +nf_bridge_get_physindev(const struct sk_buff *skb, struct net *net) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - return nf_bridge ? nf_bridge->physindev : NULL; + return nf_bridge ? dev_get_by_index_rcu(net, nf_bridge->physinif) : NULL; } static inline struct net_device * diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 7834c0be2831..61aa48f46dd7 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -51,7 +51,7 @@ struct nf_ipv6_ops { u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); int (*cookie_v6_check)(const struct ipv6hdr *iph, - const struct tcphdr *th, __u32 cookie); + const struct tcphdr *th); #endif void (*route_input)(struct sk_buff *skb); int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, @@ -179,16 +179,16 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, } static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, - const struct tcphdr *th, __u32 cookie) + const struct tcphdr *th) { #if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) - return v6_ops->cookie_v6_check(iph, th, cookie); + return v6_ops->cookie_v6_check(iph, th); #elif IS_BUILTIN(CONFIG_IPV6) - return __cookie_v6_check(iph, th, cookie); + return __cookie_v6_check(iph, th); #endif #endif return 0; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b11a84f6c32b..100cbb261269 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -109,11 +109,18 @@ static inline int wait_on_page_fscache_killable(struct page *page) return folio_wait_private_2_killable(page_folio(page)); } +/* Marks used on xarray-based buffers */ +#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ +#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ + enum netfs_io_source { NETFS_FILL_WITH_ZEROES, NETFS_DOWNLOAD_FROM_SERVER, NETFS_READ_FROM_CACHE, NETFS_INVALID_READ, + NETFS_UPLOAD_TO_SERVER, + NETFS_WRITE_TO_CACHE, + NETFS_INVALID_WRITE, } __mode(byte); typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, @@ -129,9 +136,57 @@ struct netfs_inode { struct fscache_cookie *cache; #endif loff_t remote_i_size; /* Size of the remote file */ + loff_t zero_point; /* Size after which we assume there's no data + * on the server */ + unsigned long flags; +#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ +#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ +#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ +#define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */ +}; + +/* + * A netfs group - for instance a ceph snap. This is marked on dirty pages and + * pages marked with a group must be flushed before they can be written under + * the domain of another group. + */ +struct netfs_group { + refcount_t ref; + void (*free)(struct netfs_group *netfs_group); }; /* + * Information about a dirty page (attached only if necessary). + * folio->private + */ +struct netfs_folio { + struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */ + unsigned int dirty_offset; /* Write-streaming dirty data offset */ + unsigned int dirty_len; /* Write-streaming dirty data length */ +}; +#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ + +static inline struct netfs_folio *netfs_folio_info(struct folio *folio) +{ + void *priv = folio_get_private(folio); + + if ((unsigned long)priv & NETFS_FOLIO_INFO) + return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); + return NULL; +} + +static inline struct netfs_group *netfs_folio_group(struct folio *folio) +{ + struct netfs_folio *finfo; + void *priv = folio_get_private(folio); + + finfo = netfs_folio_info(folio); + if (finfo) + return finfo->netfs_group; + return priv; +} + +/* * Resources required to do operations on a cache. */ struct netfs_cache_resources { @@ -143,17 +198,24 @@ struct netfs_cache_resources { }; /* - * Descriptor for a single component subrequest. + * Descriptor for a single component subrequest. Each operation represents an + * individual read/write from/to a server, a cache, a journal, etc.. + * + * The buffer iterator is persistent for the life of the subrequest struct and + * the pages it points to can be relied on to exist for the duration. */ struct netfs_io_subrequest { struct netfs_io_request *rreq; /* Supervising I/O request */ + struct work_struct work; struct list_head rreq_link; /* Link in rreq->subrequests */ + struct iov_iter io_iter; /* Iterator for this subrequest */ loff_t start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ + unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ enum netfs_io_source source; /* Where to read from/write to */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ @@ -168,6 +230,13 @@ enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_WRITEBACK, /* This write was triggered by writepages */ + NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ + NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ + NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ + NETFS_DIO_READ, /* This is a direct I/O read */ + NETFS_DIO_WRITE, /* This is a direct I/O write */ + nr__netfs_io_origin } __mode(byte); /* @@ -175,19 +244,34 @@ enum netfs_io_origin { * operations to a variety of data stores and then stitch the result together. */ struct netfs_io_request { - struct work_struct work; + union { + struct work_struct work; + struct rcu_head rcu; + }; struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ + struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; + struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head subrequests; /* Contributory I/O operations */ + struct iov_iter iter; /* Unencrypted-side iterator */ + struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ void *netfs_priv; /* Private data for the netfs */ + struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ + unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ unsigned int debug_id; + unsigned int rsize; /* Maximum read size (0 for none) */ + unsigned int wsize; /* Maximum write size (0 for none) */ + unsigned int subreq_counter; /* Next subreq->debug_index */ atomic_t nr_outstanding; /* Number of ops in progress */ atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ + size_t upper_len; /* Length can be extended to here */ + size_t transferred; /* Amount to be indicated as transferred */ short error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ + bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ loff_t i_size; /* Size of the file */ loff_t start; /* Start position */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ @@ -199,17 +283,25 @@ struct netfs_io_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ +#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ +#define NETFS_RREQ_BLOCKED 10 /* We blocked */ const struct netfs_request_ops *netfs_ops; + void (*cleanup)(struct netfs_io_request *req); }; /* * Operations the network filesystem can/must provide to the helpers. */ struct netfs_request_ops { + unsigned int io_request_size; /* Alloc size for netfs_io_request struct */ + unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */ int (*init_request)(struct netfs_io_request *rreq, struct file *file); void (*free_request)(struct netfs_io_request *rreq); - int (*begin_cache_operation)(struct netfs_io_request *rreq); + void (*free_subrequest)(struct netfs_io_subrequest *rreq); + /* Read request handling */ void (*expand_readahead)(struct netfs_io_request *rreq); bool (*clamp_length)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); @@ -217,6 +309,14 @@ struct netfs_request_ops { int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); + + /* Modification handling */ + void (*update_i_size)(struct inode *inode, loff_t i_size); + + /* Write request handling */ + void (*create_write_requests)(struct netfs_io_request *wreq, + loff_t start, size_t len); + void (*invalidate_cache)(struct netfs_io_request *wreq); }; /* @@ -229,8 +329,7 @@ enum netfs_read_from_hole { }; /* - * Table of operations for access to a cache. This is obtained by - * rreq->ops->begin_cache_operation(). + * Table of operations for access to a cache. */ struct netfs_cache_ops { /* End an operation */ @@ -265,8 +364,8 @@ struct netfs_cache_ops { * actually do. */ int (*prepare_write)(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size, - bool no_space_allocated_yet); + loff_t *_start, size_t *_len, size_t upper_len, + loff_t i_size, bool no_space_allocated_yet); /* Prepare an on-demand read operation, shortening it to a cached/uncached * boundary as appropriate. @@ -284,22 +383,62 @@ struct netfs_cache_ops { loff_t *_data_start, size_t *_data_len); }; +/* High-level read API. */ +ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); +ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); +ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); + +/* High-level write API */ +ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, + struct netfs_group *netfs_group); +ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, + struct netfs_group *netfs_group); +ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from); +ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); + +/* Address operations API */ struct readahead_control; void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); int netfs_write_begin(struct netfs_inode *, struct file *, - struct address_space *, loff_t pos, unsigned int len, - struct folio **, void **fsdata); - + struct address_space *, loff_t pos, unsigned int len, + struct folio **, void **fsdata); +int netfs_writepages(struct address_space *mapping, + struct writeback_control *wbc); +bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio); +int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc); +void netfs_clear_inode_writeback(struct inode *inode, const void *aux); +void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); +bool netfs_release_folio(struct folio *folio, gfp_t gfp); +int netfs_launder_folio(struct folio *folio); + +/* VMA operations API. */ +vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); + +/* (Sub)request management API. */ void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, enum netfs_sreq_ref_trace what); -void netfs_stats_show(struct seq_file *); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); +size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, + size_t max_size, size_t max_segs); +struct netfs_io_subrequest *netfs_create_write_request( + struct netfs_io_request *wreq, enum netfs_io_source dest, + loff_t start, size_t len, work_func_t worker); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, + bool was_async); +void netfs_queue_write_request(struct netfs_io_subrequest *subreq); + +int netfs_start_io_read(struct inode *inode); +void netfs_end_io_read(struct inode *inode); +int netfs_start_io_write(struct inode *inode); +void netfs_end_io_write(struct inode *inode); +int netfs_start_io_direct(struct inode *inode); +void netfs_end_io_direct(struct inode *inode); /** * netfs_inode - Get the netfs inode context from the inode @@ -317,30 +456,44 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode) * netfs_inode_init - Initialise a netfslib inode context * @ctx: The netfs inode to initialise * @ops: The netfs's operations list + * @use_zero_point: True to use the zero_point read optimisation * * Initialise the netfs library context struct. This is expected to follow on * directly from the VFS inode struct. */ static inline void netfs_inode_init(struct netfs_inode *ctx, - const struct netfs_request_ops *ops) + const struct netfs_request_ops *ops, + bool use_zero_point) { ctx->ops = ops; ctx->remote_i_size = i_size_read(&ctx->inode); + ctx->zero_point = LLONG_MAX; + ctx->flags = 0; #if IS_ENABLED(CONFIG_FSCACHE) ctx->cache = NULL; #endif + /* ->releasepage() drives zero_point */ + if (use_zero_point) { + ctx->zero_point = ctx->remote_i_size; + mapping_set_release_always(ctx->inode.i_mapping); + } } /** * netfs_resize_file - Note that a file got resized * @ctx: The netfs inode being resized * @new_i_size: The new file size + * @changed_on_server: The change was applied to the server * * Inform the netfs lib that a file got resized so that it can adjust its state. */ -static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size) +static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, + bool changed_on_server) { - ctx->remote_i_size = new_i_size; + if (changed_on_server) + ctx->remote_i_size = new_i_size; + if (new_i_size < ctx->zero_point) + ctx->zero_point = new_i_size; } /** diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9eec3f4f5351..5df7340d4dab 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -50,6 +50,7 @@ struct netlink_kernel_cfg { struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); + void (*release) (struct sock *sk, unsigned long *groups); }; struct sock *__netlink_kernel_create(struct net *net, int unit, @@ -227,6 +228,13 @@ bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); + +typedef int (*netlink_filter_fn)(struct sock *dsk, struct sk_buff *skb, void *data); + +int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, + __u32 portid, __u32 group, gfp_t allocation, + netlink_filter_fn filter, + void *filter_data); int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); int netlink_register_notifier(struct notifier_block *nb); int netlink_unregister_notifier(struct notifier_block *nb); @@ -283,6 +291,7 @@ struct netlink_callback { u16 answer_flags; u32 min_dump_alloc; unsigned int prev_seq, seq; + int flags; bool strict_check; union { u8 ctx[48]; @@ -315,6 +324,7 @@ struct netlink_dump_control { void *data; struct module *module; u32 min_dump_alloc; + int flags; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, @@ -345,5 +355,6 @@ bool netlink_ns_capable(const struct sk_buff *skb, struct user_namespace *ns, int cap); bool netlink_capable(const struct sk_buff *skb, int cap); bool netlink_net_capable(const struct sk_buff *skb, int cap); +struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast); #endif /* __LINUX_NETLINK_H */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 730003c4f4af..ef8d2d618d5b 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -150,7 +150,7 @@ enum nfs_opnum4 { OP_WRITE_SAME = 70, OP_CLONE = 71, - /* xattr support (RFC8726) */ + /* xattr support (RFC8276) */ OP_GETXATTR = 72, OP_SETXATTR = 73, OP_LISTXATTRS = 74, @@ -389,79 +389,203 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; +/* + * Symbol names and values are from RFC 7531 Section 2. + * "XDR Description of NFSv4.0" + */ +enum { + FATTR4_SUPPORTED_ATTRS = 0, + FATTR4_TYPE = 1, + FATTR4_FH_EXPIRE_TYPE = 2, + FATTR4_CHANGE = 3, + FATTR4_SIZE = 4, + FATTR4_LINK_SUPPORT = 5, + FATTR4_SYMLINK_SUPPORT = 6, + FATTR4_NAMED_ATTR = 7, + FATTR4_FSID = 8, + FATTR4_UNIQUE_HANDLES = 9, + FATTR4_LEASE_TIME = 10, + FATTR4_RDATTR_ERROR = 11, + FATTR4_ACL = 12, + FATTR4_ACLSUPPORT = 13, + FATTR4_ARCHIVE = 14, + FATTR4_CANSETTIME = 15, + FATTR4_CASE_INSENSITIVE = 16, + FATTR4_CASE_PRESERVING = 17, + FATTR4_CHOWN_RESTRICTED = 18, + FATTR4_FILEHANDLE = 19, + FATTR4_FILEID = 20, + FATTR4_FILES_AVAIL = 21, + FATTR4_FILES_FREE = 22, + FATTR4_FILES_TOTAL = 23, + FATTR4_FS_LOCATIONS = 24, + FATTR4_HIDDEN = 25, + FATTR4_HOMOGENEOUS = 26, + FATTR4_MAXFILESIZE = 27, + FATTR4_MAXLINK = 28, + FATTR4_MAXNAME = 29, + FATTR4_MAXREAD = 30, + FATTR4_MAXWRITE = 31, + FATTR4_MIMETYPE = 32, + FATTR4_MODE = 33, + FATTR4_NO_TRUNC = 34, + FATTR4_NUMLINKS = 35, + FATTR4_OWNER = 36, + FATTR4_OWNER_GROUP = 37, + FATTR4_QUOTA_AVAIL_HARD = 38, + FATTR4_QUOTA_AVAIL_SOFT = 39, + FATTR4_QUOTA_USED = 40, + FATTR4_RAWDEV = 41, + FATTR4_SPACE_AVAIL = 42, + FATTR4_SPACE_FREE = 43, + FATTR4_SPACE_TOTAL = 44, + FATTR4_SPACE_USED = 45, + FATTR4_SYSTEM = 46, + FATTR4_TIME_ACCESS = 47, + FATTR4_TIME_ACCESS_SET = 48, + FATTR4_TIME_BACKUP = 49, + FATTR4_TIME_CREATE = 50, + FATTR4_TIME_DELTA = 51, + FATTR4_TIME_METADATA = 52, + FATTR4_TIME_MODIFY = 53, + FATTR4_TIME_MODIFY_SET = 54, + FATTR4_MOUNTED_ON_FILEID = 55, +}; + +/* + * Symbol names and values are from RFC 5662 Section 2. + * "XDR Description of NFSv4.1" + */ +enum { + FATTR4_DIR_NOTIF_DELAY = 56, + FATTR4_DIRENT_NOTIF_DELAY = 57, + FATTR4_DACL = 58, + FATTR4_SACL = 59, + FATTR4_CHANGE_POLICY = 60, + FATTR4_FS_STATUS = 61, + FATTR4_FS_LAYOUT_TYPES = 62, + FATTR4_LAYOUT_HINT = 63, + FATTR4_LAYOUT_TYPES = 64, + FATTR4_LAYOUT_BLKSIZE = 65, + FATTR4_LAYOUT_ALIGNMENT = 66, + FATTR4_FS_LOCATIONS_INFO = 67, + FATTR4_MDSTHRESHOLD = 68, + FATTR4_RETENTION_GET = 69, + FATTR4_RETENTION_SET = 70, + FATTR4_RETENTEVT_GET = 71, + FATTR4_RETENTEVT_SET = 72, + FATTR4_RETENTION_HOLD = 73, + FATTR4_MODE_SET_MASKED = 74, + FATTR4_SUPPATTR_EXCLCREAT = 75, + FATTR4_FS_CHARSET_CAP = 76, +}; + +/* + * Symbol names and values are from RFC 7863 Section 2. + * "XDR Description of NFSv4.2" + */ +enum { + FATTR4_CLONE_BLKSIZE = 77, + FATTR4_SPACE_FREED = 78, + FATTR4_CHANGE_ATTR_TYPE = 79, + FATTR4_SEC_LABEL = 80, +}; + +/* + * Symbol names and values are from RFC 8275 Section 5. + * "The mode_umask Attribute" + */ +enum { + FATTR4_MODE_UMASK = 81, +}; + +/* + * Symbol names and values are from RFC 8276 Section 8.6. + * "Numeric Values Assigned to Protocol Extensions" + */ +enum { + FATTR4_XATTR_SUPPORT = 82, +}; + +/* + * The following internal definitions enable processing the above + * attribute bits within 32-bit word boundaries. + */ /* Mandatory Attributes */ -#define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) -#define FATTR4_WORD0_TYPE (1UL << 1) -#define FATTR4_WORD0_FH_EXPIRE_TYPE (1UL << 2) -#define FATTR4_WORD0_CHANGE (1UL << 3) -#define FATTR4_WORD0_SIZE (1UL << 4) -#define FATTR4_WORD0_LINK_SUPPORT (1UL << 5) -#define FATTR4_WORD0_SYMLINK_SUPPORT (1UL << 6) -#define FATTR4_WORD0_NAMED_ATTR (1UL << 7) -#define FATTR4_WORD0_FSID (1UL << 8) -#define FATTR4_WORD0_UNIQUE_HANDLES (1UL << 9) -#define FATTR4_WORD0_LEASE_TIME (1UL << 10) -#define FATTR4_WORD0_RDATTR_ERROR (1UL << 11) +#define FATTR4_WORD0_SUPPORTED_ATTRS BIT(FATTR4_SUPPORTED_ATTRS) +#define FATTR4_WORD0_TYPE BIT(FATTR4_TYPE) +#define FATTR4_WORD0_FH_EXPIRE_TYPE BIT(FATTR4_FH_EXPIRE_TYPE) +#define FATTR4_WORD0_CHANGE BIT(FATTR4_CHANGE) +#define FATTR4_WORD0_SIZE BIT(FATTR4_SIZE) +#define FATTR4_WORD0_LINK_SUPPORT BIT(FATTR4_LINK_SUPPORT) +#define FATTR4_WORD0_SYMLINK_SUPPORT BIT(FATTR4_SYMLINK_SUPPORT) +#define FATTR4_WORD0_NAMED_ATTR BIT(FATTR4_NAMED_ATTR) +#define FATTR4_WORD0_FSID BIT(FATTR4_FSID) +#define FATTR4_WORD0_UNIQUE_HANDLES BIT(FATTR4_UNIQUE_HANDLES) +#define FATTR4_WORD0_LEASE_TIME BIT(FATTR4_LEASE_TIME) +#define FATTR4_WORD0_RDATTR_ERROR BIT(FATTR4_RDATTR_ERROR) /* Mandatory in NFSv4.1 */ -#define FATTR4_WORD2_SUPPATTR_EXCLCREAT (1UL << 11) +#define FATTR4_WORD2_SUPPATTR_EXCLCREAT BIT(FATTR4_SUPPATTR_EXCLCREAT - 64) /* Recommended Attributes */ -#define FATTR4_WORD0_ACL (1UL << 12) -#define FATTR4_WORD0_ACLSUPPORT (1UL << 13) -#define FATTR4_WORD0_ARCHIVE (1UL << 14) -#define FATTR4_WORD0_CANSETTIME (1UL << 15) -#define FATTR4_WORD0_CASE_INSENSITIVE (1UL << 16) -#define FATTR4_WORD0_CASE_PRESERVING (1UL << 17) -#define FATTR4_WORD0_CHOWN_RESTRICTED (1UL << 18) -#define FATTR4_WORD0_FILEHANDLE (1UL << 19) -#define FATTR4_WORD0_FILEID (1UL << 20) -#define FATTR4_WORD0_FILES_AVAIL (1UL << 21) -#define FATTR4_WORD0_FILES_FREE (1UL << 22) -#define FATTR4_WORD0_FILES_TOTAL (1UL << 23) -#define FATTR4_WORD0_FS_LOCATIONS (1UL << 24) -#define FATTR4_WORD0_HIDDEN (1UL << 25) -#define FATTR4_WORD0_HOMOGENEOUS (1UL << 26) -#define FATTR4_WORD0_MAXFILESIZE (1UL << 27) -#define FATTR4_WORD0_MAXLINK (1UL << 28) -#define FATTR4_WORD0_MAXNAME (1UL << 29) -#define FATTR4_WORD0_MAXREAD (1UL << 30) -#define FATTR4_WORD0_MAXWRITE (1UL << 31) -#define FATTR4_WORD1_MIMETYPE (1UL << 0) -#define FATTR4_WORD1_MODE (1UL << 1) -#define FATTR4_WORD1_NO_TRUNC (1UL << 2) -#define FATTR4_WORD1_NUMLINKS (1UL << 3) -#define FATTR4_WORD1_OWNER (1UL << 4) -#define FATTR4_WORD1_OWNER_GROUP (1UL << 5) -#define FATTR4_WORD1_QUOTA_HARD (1UL << 6) -#define FATTR4_WORD1_QUOTA_SOFT (1UL << 7) -#define FATTR4_WORD1_QUOTA_USED (1UL << 8) -#define FATTR4_WORD1_RAWDEV (1UL << 9) -#define FATTR4_WORD1_SPACE_AVAIL (1UL << 10) -#define FATTR4_WORD1_SPACE_FREE (1UL << 11) -#define FATTR4_WORD1_SPACE_TOTAL (1UL << 12) -#define FATTR4_WORD1_SPACE_USED (1UL << 13) -#define FATTR4_WORD1_SYSTEM (1UL << 14) -#define FATTR4_WORD1_TIME_ACCESS (1UL << 15) -#define FATTR4_WORD1_TIME_ACCESS_SET (1UL << 16) -#define FATTR4_WORD1_TIME_BACKUP (1UL << 17) -#define FATTR4_WORD1_TIME_CREATE (1UL << 18) -#define FATTR4_WORD1_TIME_DELTA (1UL << 19) -#define FATTR4_WORD1_TIME_METADATA (1UL << 20) -#define FATTR4_WORD1_TIME_MODIFY (1UL << 21) -#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) -#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) -#define FATTR4_WORD1_DACL (1UL << 26) -#define FATTR4_WORD1_SACL (1UL << 27) -#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) -#define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) -#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) -#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) -#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) -#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) -#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) -#define FATTR4_WORD2_MODE_UMASK (1UL << 17) -#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) +#define FATTR4_WORD0_ACL BIT(FATTR4_ACL) +#define FATTR4_WORD0_ACLSUPPORT BIT(FATTR4_ACLSUPPORT) +#define FATTR4_WORD0_ARCHIVE BIT(FATTR4_ARCHIVE) +#define FATTR4_WORD0_CANSETTIME BIT(FATTR4_CANSETTIME) +#define FATTR4_WORD0_CASE_INSENSITIVE BIT(FATTR4_CASE_INSENSITIVE) +#define FATTR4_WORD0_CASE_PRESERVING BIT(FATTR4_CASE_PRESERVING) +#define FATTR4_WORD0_CHOWN_RESTRICTED BIT(FATTR4_CHOWN_RESTRICTED) +#define FATTR4_WORD0_FILEHANDLE BIT(FATTR4_FILEHANDLE) +#define FATTR4_WORD0_FILEID BIT(FATTR4_FILEID) +#define FATTR4_WORD0_FILES_AVAIL BIT(FATTR4_FILES_AVAIL) +#define FATTR4_WORD0_FILES_FREE BIT(FATTR4_FILES_FREE) +#define FATTR4_WORD0_FILES_TOTAL BIT(FATTR4_FILES_TOTAL) +#define FATTR4_WORD0_FS_LOCATIONS BIT(FATTR4_FS_LOCATIONS) +#define FATTR4_WORD0_HIDDEN BIT(FATTR4_HIDDEN) +#define FATTR4_WORD0_HOMOGENEOUS BIT(FATTR4_HOMOGENEOUS) +#define FATTR4_WORD0_MAXFILESIZE BIT(FATTR4_MAXFILESIZE) +#define FATTR4_WORD0_MAXLINK BIT(FATTR4_MAXLINK) +#define FATTR4_WORD0_MAXNAME BIT(FATTR4_MAXNAME) +#define FATTR4_WORD0_MAXREAD BIT(FATTR4_MAXREAD) +#define FATTR4_WORD0_MAXWRITE BIT(FATTR4_MAXWRITE) + +#define FATTR4_WORD1_MIMETYPE BIT(FATTR4_MIMETYPE - 32) +#define FATTR4_WORD1_MODE BIT(FATTR4_MODE - 32) +#define FATTR4_WORD1_NO_TRUNC BIT(FATTR4_NO_TRUNC - 32) +#define FATTR4_WORD1_NUMLINKS BIT(FATTR4_NUMLINKS - 32) +#define FATTR4_WORD1_OWNER BIT(FATTR4_OWNER - 32) +#define FATTR4_WORD1_OWNER_GROUP BIT(FATTR4_OWNER_GROUP - 32) +#define FATTR4_WORD1_QUOTA_HARD BIT(FATTR4_QUOTA_AVAIL_HARD - 32) +#define FATTR4_WORD1_QUOTA_SOFT BIT(FATTR4_QUOTA_AVAIL_SOFT - 32) +#define FATTR4_WORD1_QUOTA_USED BIT(FATTR4_QUOTA_USED - 32) +#define FATTR4_WORD1_RAWDEV BIT(FATTR4_RAWDEV - 32) +#define FATTR4_WORD1_SPACE_AVAIL BIT(FATTR4_SPACE_AVAIL - 32) +#define FATTR4_WORD1_SPACE_FREE BIT(FATTR4_SPACE_FREE - 32) +#define FATTR4_WORD1_SPACE_TOTAL BIT(FATTR4_SPACE_TOTAL - 32) +#define FATTR4_WORD1_SPACE_USED BIT(FATTR4_SPACE_USED - 32) +#define FATTR4_WORD1_SYSTEM BIT(FATTR4_SYSTEM - 32) +#define FATTR4_WORD1_TIME_ACCESS BIT(FATTR4_TIME_ACCESS - 32) +#define FATTR4_WORD1_TIME_ACCESS_SET BIT(FATTR4_TIME_ACCESS_SET - 32) +#define FATTR4_WORD1_TIME_BACKUP BIT(FATTR4_TIME_BACKUP - 32) +#define FATTR4_WORD1_TIME_CREATE BIT(FATTR4_TIME_CREATE - 32) +#define FATTR4_WORD1_TIME_DELTA BIT(FATTR4_TIME_DELTA - 32) +#define FATTR4_WORD1_TIME_METADATA BIT(FATTR4_TIME_METADATA - 32) +#define FATTR4_WORD1_TIME_MODIFY BIT(FATTR4_TIME_MODIFY - 32) +#define FATTR4_WORD1_TIME_MODIFY_SET BIT(FATTR4_TIME_MODIFY_SET - 32) +#define FATTR4_WORD1_MOUNTED_ON_FILEID BIT(FATTR4_MOUNTED_ON_FILEID - 32) +#define FATTR4_WORD1_DACL BIT(FATTR4_DACL - 32) +#define FATTR4_WORD1_SACL BIT(FATTR4_SACL - 32) +#define FATTR4_WORD1_FS_LAYOUT_TYPES BIT(FATTR4_FS_LAYOUT_TYPES - 32) + +#define FATTR4_WORD2_LAYOUT_TYPES BIT(FATTR4_LAYOUT_TYPES - 64) +#define FATTR4_WORD2_LAYOUT_BLKSIZE BIT(FATTR4_LAYOUT_BLKSIZE - 64) +#define FATTR4_WORD2_MDSTHRESHOLD BIT(FATTR4_MDSTHRESHOLD - 64) +#define FATTR4_WORD2_CLONE_BLKSIZE BIT(FATTR4_CLONE_BLKSIZE - 64) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE BIT(FATTR4_CHANGE_ATTR_TYPE - 64) +#define FATTR4_WORD2_SECURITY_LABEL BIT(FATTR4_SEC_LABEL - 64) +#define FATTR4_WORD2_MODE_UMASK BIT(FATTR4_MODE_UMASK - 64) +#define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) @@ -745,4 +869,26 @@ enum { RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, }; +enum nfs_cb_opnum4 { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + + /* Callback operations new to NFSv4.1 */ + OP_CB_LAYOUTRECALL = 5, + OP_CB_NOTIFY = 6, + OP_CB_PUSH_DELEG = 7, + OP_CB_RECALL_ANY = 8, + OP_CB_RECALLABLE_OBJ_AVAIL = 9, + OP_CB_RECALL_SLOT = 10, + OP_CB_SEQUENCE = 11, + OP_CB_WANTS_CANCELLED = 12, + OP_CB_NOTIFY_LOCK = 13, + OP_CB_NOTIFY_DEVICEID = 14, + + /* Callback operations new to NFSv4.2 */ + OP_CB_OFFLOAD = 15, + + OP_CB_ILLEGAL = 10044, +}; + #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 279262057a92..d59116ac8209 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -595,7 +595,6 @@ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); * linux/fs/nfs/write.c */ extern int nfs_congestion_kb; -extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct folio *folio); extern int nfs_update_folio(struct file *file, struct folio *folio, @@ -612,6 +611,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline bool nfs_have_writebacks(const struct inode *inode) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 20eeba8b009d..92de074e63b9 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -48,6 +48,7 @@ struct nfs_client { #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ +#define NFS_CS_PNFS 9 /* - Server used for pnfs */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -123,6 +124,7 @@ struct nfs_client { char cl_ipaddr[48]; struct net *cl_net; struct list_head pending_cb_stateids; + struct rcu_head rcu; }; /* @@ -238,6 +240,7 @@ struct nfs_server { struct list_head delegations; struct list_head ss_copies; + unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; #define NFS_MIG_IN_TRANSITION (1) @@ -263,6 +266,7 @@ struct nfs_server { const struct cred *cred; bool has_sec_mnt_opts; struct kobject kobj; + struct rcu_head rcu; }; /* Server capabilities */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index aa9f4c6ebe26..1c315f854ea8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -157,7 +157,9 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); -extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); +extern void nfs_join_page_group(struct nfs_page *head, + struct nfs_commit_info *cinfo, + struct inode *inode); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 12bbb5c63664..d09b9773b20c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1772,7 +1772,7 @@ struct nfs_rpc_ops { void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, const struct qstr *); - int (*symlink) (struct inode *, struct dentry *, struct page *, + int (*symlink) (struct inode *, struct dentry *, struct folio *, unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); @@ -1821,13 +1821,6 @@ struct nfs_rpc_ops { }; /* - * NFS_CALL(getattr, inode, (fattr)); - * into - * NFS_PROTO(inode)->getattr(fattr); - */ -#define NFS_CALL(op, inode, args) NFS_PROTO(inode)->op args - -/* * Function vectors etc. for the NFS client */ extern const struct nfs_rpc_ops nfs_v2_clientops; diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e3e6a64b98e0..f53438eae815 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -157,31 +157,31 @@ static inline void touch_nmi_watchdog(void) #ifdef arch_trigger_cpumask_backtrace static inline bool trigger_all_cpu_backtrace(void) { - arch_trigger_cpumask_backtrace(cpu_online_mask, false); + arch_trigger_cpumask_backtrace(cpu_online_mask, -1); return true; } -static inline bool trigger_allbutself_cpu_backtrace(void) +static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu) { - arch_trigger_cpumask_backtrace(cpu_online_mask, true); + arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu); return true; } static inline bool trigger_cpumask_backtrace(struct cpumask *mask) { - arch_trigger_cpumask_backtrace(mask, false); + arch_trigger_cpumask_backtrace(mask, -1); return true; } static inline bool trigger_single_cpu_backtrace(int cpu) { - arch_trigger_cpumask_backtrace(cpumask_of(cpu), false); + arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1); return true; } /* generic implementation */ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self, + int exclude_cpu, void (*raise)(cpumask_t *mask)); bool nmi_cpu_backtrace(struct pt_regs *regs); @@ -190,7 +190,7 @@ static inline bool trigger_all_cpu_backtrace(void) { return false; } -static inline bool trigger_allbutself_cpu_backtrace(void) +static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu) { return false; } @@ -216,13 +216,6 @@ void watchdog_update_hrtimer_threshold(u64 period); static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif -struct ctl_table; -int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); - #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include <asm/nmi.h> #endif diff --git a/include/linux/node.h b/include/linux/node.h index 427a5975cf40..dfc004e4bee7 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -20,20 +20,32 @@ #include <linux/list.h> /** - * struct node_hmem_attrs - heterogeneous memory performance attributes + * struct access_coordinate - generic performance coordinates container * * @read_bandwidth: Read bandwidth in MB/s * @write_bandwidth: Write bandwidth in MB/s * @read_latency: Read latency in nanoseconds * @write_latency: Write latency in nanoseconds */ -struct node_hmem_attrs { +struct access_coordinate { unsigned int read_bandwidth; unsigned int write_bandwidth; unsigned int read_latency; unsigned int write_latency; }; +/* + * ACCESS_COORDINATE_LOCAL correlates to ACCESS CLASS 0 + * - access_coordinate between target node and nearest initiator node + * ACCESS_COORDINATE_CPU correlates to ACCESS CLASS 1 + * - access_coordinate between target node and nearest CPU node + */ +enum access_coordinate_class { + ACCESS_COORDINATE_LOCAL, + ACCESS_COORDINATE_CPU, + ACCESS_COORDINATE_MAX +}; + enum cache_indexing { NODE_CACHE_DIRECT_MAP, NODE_CACHE_INDEXED, @@ -65,8 +77,8 @@ struct node_cache_attrs { #ifdef CONFIG_HMEM_REPORTING void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); -void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, - unsigned access); +void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -74,8 +86,8 @@ static inline void node_add_cache(unsigned int nid, } static inline void node_set_perf_attrs(unsigned int nid, - struct node_hmem_attrs *hmem_attrs, - unsigned access) + struct access_coordinate *coord, + enum access_coordinate_class access) { } #endif @@ -137,7 +149,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, - unsigned access); + enum access_coordinate_class access); #else static inline void node_dev_init(void) { diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 8d07116caaf1..b61438313a73 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -93,10 +93,10 @@ #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/minmax.h> +#include <linux/nodemask_types.h> #include <linux/numa.h> #include <linux/random.h> -typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; /** diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h new file mode 100644 index 000000000000..6b28d97ea6ed --- /dev/null +++ b/include/linux/nodemask_types.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_NODEMASK_TYPES_H +#define __LINUX_NODEMASK_TYPES_H + +#include <linux/bitops.h> +#include <linux/numa.h> + +typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; + +#endif /* __LINUX_NODEMASK_TYPES_H */ diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 86544707236a..45702bdcbceb 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -73,9 +73,7 @@ struct raw_notifier_head { struct srcu_notifier_head { struct mutex mutex; -#ifdef CONFIG_TREE_SRCU struct srcu_usage srcuu; -#endif struct srcu_struct srcu; struct notifier_block __rcu *head; }; @@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } -#ifdef CONFIG_TREE_SRCU #define SRCU_NOTIFIER_INIT(name, pcpu) \ { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ @@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); .srcuu = __SRCU_USAGE_INIT(name.srcuu), \ .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ } -#else -#define SRCU_NOTIFIER_INIT(name, pcpu) \ - { \ - .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .head = NULL, \ - .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ - } -#endif #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 0f1d024bd958..7d22ea50b098 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -7,7 +7,7 @@ struct proc_ns_operations; struct ns_common { - atomic_long_t stashed; + struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; refcount_t count; diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index fee881cded01..5601d14e2886 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -2,6 +2,7 @@ #ifndef _LINUX_NSPROXY_H #define _LINUX_NSPROXY_H +#include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/sched.h> @@ -29,7 +30,7 @@ struct fs_struct; * nsproxy is copied. */ struct nsproxy { - atomic_t count; + refcount_t count; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; @@ -102,14 +103,13 @@ int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) { - if (atomic_dec_and_test(&ns->count)) { + if (refcount_dec_and_test(&ns->count)) free_nsproxy(ns); - } } static inline void get_nsproxy(struct nsproxy *ns) { - atomic_inc(&ns->count); + refcount_inc(&ns->count); } #endif diff --git a/include/linux/nubus.h b/include/linux/nubus.h index bdcd85e622d8..4d103ac8f5c7 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -89,8 +89,6 @@ struct nubus_driver { void (*remove)(struct nubus_board *board); }; -extern struct bus_type nubus_bus_type; - /* Generic NuBus interface functions, modelled after the PCI interface */ #ifdef CONFIG_PROC_FS extern bool nubus_populate_procfs; diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051f..915033a75731 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NUMA_H #define _LINUX_NUMA_H +#include <linux/init.h> #include <linux/types.h> #ifdef CONFIG_NODES_SHIFT @@ -12,6 +13,7 @@ #define MAX_NUMNODES (1 << NODES_SHIFT) #define NUMA_NO_NODE (-1) +#define NUMA_NO_MEMBLK (-1) /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO @@ -21,33 +23,32 @@ #endif #ifdef CONFIG_NUMA -#include <linux/printk.h> #include <asm/sparsemem.h> /* Generic implementation available */ -int numa_map_to_online_node(int node); +int numa_nearest_node(int node, unsigned int state); #ifndef memory_add_physaddr_to_nid -static inline int memory_add_physaddr_to_nid(u64 start) -{ - pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", - start); - return 0; -} +int memory_add_physaddr_to_nid(u64 start); #endif + #ifndef phys_to_target_node -static inline int phys_to_target_node(u64 start) +int phys_to_target_node(u64 start); +#endif + +#ifndef numa_fill_memblks +static inline int __init numa_fill_memblks(u64 start, u64 end) { - pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", - start); - return 0; + return NUMA_NO_MEMBLK; } #endif + #else /* !CONFIG_NUMA */ -static inline int numa_map_to_online_node(int node) +static inline int numa_nearest_node(int node, unsigned int state) { return NUMA_NO_NODE; } + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; @@ -58,6 +59,8 @@ static inline int phys_to_target_node(u64 start) } #endif +#define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE) + #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP extern const struct attribute_group arch_node_dev_group; #endif diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index dcb8030062dd..c1d0bc5d9624 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -9,9 +9,9 @@ #include <crypto/kpp.h> struct nvme_dhchap_key { - u8 *key; size_t len; u8 hash; + u8 key[]; }; u32 nvme_auth_get_seqnum(void); @@ -24,10 +24,13 @@ const char *nvme_auth_digest_name(u8 hmac_id); size_t nvme_auth_hmac_hash_len(u8 hmac_id); u8 nvme_auth_hmac_id(const char *hmac_name); +u32 nvme_auth_key_struct_size(u32 key_len); struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, u8 key_hash); void nvme_auth_free_key(struct nvme_dhchap_key *key); -u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn); +struct nvme_dhchap_key *nvme_auth_alloc_key(u32 len, u8 hash); +struct nvme_dhchap_key *nvme_auth_transform_key( + struct nvme_dhchap_key *key, char *nqn); int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key); int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, u8 *challenge, u8 *aug, size_t hlen); diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h new file mode 100644 index 000000000000..e10333d78dbb --- /dev/null +++ b/include/linux/nvme-keyring.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Hannes Reinecke, SUSE Labs + */ + +#ifndef _NVME_KEYRING_H +#define _NVME_KEYRING_H + +#if IS_ENABLED(CONFIG_NVME_KEYRING) + +key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn); + +key_serial_t nvme_keyring_id(void); + +#else + +static inline key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn) +{ + return 0; +} +static inline key_serial_t nvme_keyring_id(void) +{ + return 0; +} +#endif /* !CONFIG_NVME_KEYRING */ +#endif /* _NVME_KEYRING_H */ diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 4dd7e6fe92fb..eb2f04d636c8 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -6,7 +6,11 @@ #ifndef _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H -#define NVME_RDMA_MAX_QUEUE_SIZE 128 +#define NVME_RDMA_IP_PORT 4420 + +#define NVME_RDMA_MAX_QUEUE_SIZE 256 +#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128 +#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128 enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 57ebe1267f7f..e07e8978d691 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -18,6 +18,12 @@ enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, }; +enum nvme_tcp_tls_cipher { + NVME_TCP_TLS_CIPHER_INVALID = 0, + NVME_TCP_TLS_CIPHER_SHA256 = 1, + NVME_TCP_TLS_CIPHER_SHA384 = 2, +}; + enum nvme_tcp_fatal_error_status { NVME_TCP_FES_INVALID_PDU_HDR = 0x01, NVME_TCP_FES_PDU_SEQ_ERR = 0x02, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26dd3f859d9d..425573202295 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -20,12 +20,9 @@ #define NVMF_TRSVCID_SIZE 32 #define NVMF_TRADDR_SIZE 256 #define NVMF_TSAS_SIZE 256 -#define NVMF_AUTH_HASH_LEN 64 #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" -#define NVME_RDMA_IP_PORT 4420 - #define NVME_NSID_ALL 0xffffffff enum nvme_subsys_type { @@ -108,6 +105,13 @@ enum { NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; +/* TSAS SECTYPE for TCP transport */ +enum { + NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ + NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ + NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ +}; + #define NVME_AQ_DEPTH 32 #define NVME_NR_AEN_COMMANDS 1 #define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) @@ -640,6 +644,7 @@ enum { NVME_CMD_EFFECTS_NCC = 1 << 2, NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14), NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), @@ -810,12 +815,6 @@ struct nvme_reservation_status_ext { struct nvme_registered_ctrl_ext regctl_eds[]; }; -enum nvme_async_event_type { - NVME_AER_TYPE_ERROR = 0, - NVME_AER_TYPE_SMART = 1, - NVME_AER_TYPE_NOTICE = 2, -}; - /* I/O commands */ enum nvme_opcode { @@ -1493,6 +1492,9 @@ struct nvmf_disc_rsp_page_entry { __u16 pkey; __u8 resv10[246]; } rdma; + struct tcp { + __u8 sectype; + } tcp; } tsas; }; @@ -1722,7 +1724,7 @@ struct nvmf_auth_dhchap_success1_data { __u8 rsvd2; __u8 rvalid; __u8 rsvd3[7]; - /* 'hl' bytes of response value if 'rvalid' is set */ + /* 'hl' bytes of response value */ __u8 rval[]; }; @@ -1809,7 +1811,7 @@ struct nvme_command { }; }; -static inline bool nvme_is_fabrics(struct nvme_command *cmd) +static inline bool nvme_is_fabrics(const struct nvme_command *cmd) { return cmd->common.opcode == nvme_fabrics_command; } @@ -1828,7 +1830,7 @@ struct nvme_error_slot { __u8 resv2[24]; }; -static inline bool nvme_is_write(struct nvme_command *cmd) +static inline bool nvme_is_write(const struct nvme_command *cmd) { /* * What a mess... diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index fa030d93b768..34c0e58dfa26 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -43,6 +43,8 @@ enum { NVMEM_REMOVE, NVMEM_CELL_ADD, NVMEM_CELL_REMOVE, + NVMEM_LAYOUT_ADD, + NVMEM_LAYOUT_REMOVE, }; #if IS_ENABLED(CONFIG_NVMEM) @@ -79,6 +81,7 @@ int nvmem_device_cell_write(struct nvmem_device *nvmem, struct nvmem_cell_info *info, void *buf); const char *nvmem_dev_name(struct nvmem_device *nvmem); +size_t nvmem_dev_size(struct nvmem_device *nvmem); void nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries); @@ -125,6 +128,12 @@ static inline int nvmem_cell_write(struct nvmem_cell *cell, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_u8(struct device *dev, + const char *cell_id, u8 *val) +{ + return -EOPNOTSUPP; +} + static inline int nvmem_cell_read_u16(struct device *dev, const char *cell_id, u16 *val) { @@ -239,7 +248,6 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id); struct nvmem_device *of_nvmem_device_get(struct device_node *np, const char *name); -struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem); #else static inline struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id) @@ -252,12 +260,6 @@ static inline struct nvmem_device *of_nvmem_device_get(struct device_node *np, { return ERR_PTR(-EOPNOTSUPP); } - -static inline struct device_node * -of_nvmem_layout_get_container(struct nvmem_device *nvmem) -{ - return ERR_PTR(-EOPNOTSUPP); -} #endif /* CONFIG_NVMEM && CONFIG_OF */ #endif /* ifndef _LINUX_NVMEM_CONSUMER_H */ diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index dae26295e6be..f0ba0e03218f 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -9,6 +9,7 @@ #ifndef _LINUX_NVMEM_PROVIDER_H #define _LINUX_NVMEM_PROVIDER_H +#include <linux/device.h> #include <linux/device/driver.h> #include <linux/err.h> #include <linux/errno.h> @@ -82,13 +83,15 @@ struct nvmem_cell_info { * @owner: Pointer to exporter module. Used for refcounting. * @cells: Optional array of pre-defined NVMEM cells. * @ncells: Number of elements in cells. + * @add_legacy_fixed_of_cells: Read fixed NVMEM cells from old OF syntax. + * @fixup_dt_cell_info: Will be called before a cell is added. Can be + * used to modify the nvmem_cell_info. * @keepout: Optional array of keepout ranges (sorted ascending by start). * @nkeepout: Number of elements in the keepout array. * @type: Type of the nvmem storage * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. * @of_node: If given, this will be used instead of the parent's of_node. - * @no_of_node: Device should not use the parent's of_node even if it's !NULL. * @reg_read: Callback to read data. * @reg_write: Callback to write data. * @size: Device size. @@ -112,6 +115,9 @@ struct nvmem_config { struct module *owner; const struct nvmem_cell_info *cells; int ncells; + bool add_legacy_fixed_of_cells; + void (*fixup_dt_cell_info)(struct nvmem_device *nvmem, + struct nvmem_cell_info *cell); const struct nvmem_keepout *keepout; unsigned int nkeepout; enum nvmem_type type; @@ -120,7 +126,6 @@ struct nvmem_config { bool ignore_wp; struct nvmem_layout *layout; struct device_node *of_node; - bool no_of_node; nvmem_reg_read_t reg_read; nvmem_reg_write_t reg_write; int size; @@ -154,15 +159,11 @@ struct nvmem_cell_table { /** * struct nvmem_layout - NVMEM layout definitions * - * @name: Layout name. - * @of_match_table: Open firmware match table. + * @dev: Device-model layout device. + * @nvmem: The underlying NVMEM device * @add_cells: Will be called if a nvmem device is found which * has this layout. The function will add layout * specific cells with nvmem_add_one_cell(). - * @fixup_cell_info: Will be called before a cell is added. Can be - * used to modify the nvmem_cell_info. - * @owner: Pointer to struct module. - * @node: List node. * * A nvmem device can hold a well defined structure which can just be * evaluated during runtime. For example a TLV list, or a list of "name=val" @@ -170,17 +171,15 @@ struct nvmem_cell_table { * cells. */ struct nvmem_layout { - const char *name; - const struct of_device_id *of_match_table; - int (*add_cells)(struct device *dev, struct nvmem_device *nvmem, - struct nvmem_layout *layout); - void (*fixup_cell_info)(struct nvmem_device *nvmem, - struct nvmem_layout *layout, - struct nvmem_cell_info *cell); - - /* private */ - struct module *owner; - struct list_head node; + struct device dev; + struct nvmem_device *nvmem; + int (*add_cells)(struct nvmem_layout *layout); +}; + +struct nvmem_layout_driver { + struct device_driver driver; + int (*probe)(struct nvmem_layout *layout); + void (*remove)(struct nvmem_layout *layout); }; #if IS_ENABLED(CONFIG_NVMEM) @@ -197,13 +196,14 @@ void nvmem_del_cell_table(struct nvmem_cell_table *table); int nvmem_add_one_cell(struct nvmem_device *nvmem, const struct nvmem_cell_info *info); -int __nvmem_layout_register(struct nvmem_layout *layout, struct module *owner); -#define nvmem_layout_register(layout) \ - __nvmem_layout_register(layout, THIS_MODULE) +int nvmem_layout_register(struct nvmem_layout *layout); void nvmem_layout_unregister(struct nvmem_layout *layout); -const void *nvmem_layout_get_match_data(struct nvmem_device *nvmem, - struct nvmem_layout *layout); +int nvmem_layout_driver_register(struct nvmem_layout_driver *drv); +void nvmem_layout_driver_unregister(struct nvmem_layout_driver *drv); +#define module_nvmem_layout_driver(__nvmem_layout_driver) \ + module_driver(__nvmem_layout_driver, nvmem_layout_driver_register, \ + nvmem_layout_driver_unregister) #else @@ -235,17 +235,27 @@ static inline int nvmem_layout_register(struct nvmem_layout *layout) static inline void nvmem_layout_unregister(struct nvmem_layout *layout) {} -static inline const void * -nvmem_layout_get_match_data(struct nvmem_device *nvmem, - struct nvmem_layout *layout) +#endif /* CONFIG_NVMEM */ + +#if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF) + +/** + * of_nvmem_layout_get_container() - Get OF node of layout container + * + * @nvmem: nvmem device + * + * Return: a node pointer with refcount incremented or NULL if no + * container exists. Use of_node_put() on it when done. + */ +struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem); + +#else /* CONFIG_NVMEM && CONFIG_OF */ + +static inline struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem) { return NULL; } -#endif /* CONFIG_NVMEM */ - -#define module_nvmem_layout_driver(__layout_driver) \ - module_driver(__layout_driver, nvmem_layout_register, \ - nvmem_layout_unregister) +#endif /* CONFIG_NVMEM && CONFIG_OF */ #endif /* ifndef _LINUX_NVMEM_PROVIDER_H */ diff --git a/include/linux/objpool.h b/include/linux/objpool.h new file mode 100644 index 000000000000..15aff4a17f0c --- /dev/null +++ b/include/linux/objpool.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_OBJPOOL_H +#define _LINUX_OBJPOOL_H + +#include <linux/types.h> +#include <linux/refcount.h> + +/* + * objpool: ring-array based lockless MPMC queue + * + * Copyright: wuqiang.matt@bytedance.com,mhiramat@kernel.org + * + * objpool is a scalable implementation of high performance queue for + * object allocation and reclamation, such as kretprobe instances. + * + * With leveraging percpu ring-array to mitigate hot spots of memory + * contention, it delivers near-linear scalability for high parallel + * scenarios. The objpool is best suited for the following cases: + * 1) Memory allocation or reclamation are prohibited or too expensive + * 2) Consumers are of different priorities, such as irqs and threads + * + * Limitations: + * 1) Maximum objects (capacity) is fixed after objpool creation + * 2) All pre-allocated objects are managed in percpu ring array, + * which consumes more memory than linked lists + */ + +/** + * struct objpool_slot - percpu ring array of objpool + * @head: head sequence of the local ring array (to retrieve at) + * @tail: tail sequence of the local ring array (to append at) + * @last: the last sequence number marked as ready for retrieve + * @mask: bits mask for modulo capacity to compute array indexes + * @entries: object entries on this slot + * + * Represents a cpu-local array-based ring buffer, its size is specialized + * during initialization of object pool. The percpu objpool node is to be + * allocated from local memory for NUMA system, and to be kept compact in + * continuous memory: CPU assigned number of objects are stored just after + * the body of objpool_node. + * + * Real size of the ring array is far too smaller than the value range of + * head and tail, typed as uint32_t: [0, 2^32), so only lower bits (mask) + * of head and tail are used as the actual position in the ring array. In + * general the ring array is acting like a small sliding window, which is + * always moving forward in the loop of [0, 2^32). + */ +struct objpool_slot { + uint32_t head; + uint32_t tail; + uint32_t last; + uint32_t mask; + void *entries[]; +} __packed; + +struct objpool_head; + +/* + * caller-specified callback for object initial setup, it's only called + * once for each object (just after the memory allocation of the object) + */ +typedef int (*objpool_init_obj_cb)(void *obj, void *context); + +/* caller-specified cleanup callback for objpool destruction */ +typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context); + +/** + * struct objpool_head - object pooling metadata + * @obj_size: object size, aligned to sizeof(void *) + * @nr_objs: total objs (to be pre-allocated with objpool) + * @nr_cpus: local copy of nr_cpu_ids + * @capacity: max objs can be managed by one objpool_slot + * @gfp: gfp flags for kmalloc & vmalloc + * @ref: refcount of objpool + * @flags: flags for objpool management + * @cpu_slots: pointer to the array of objpool_slot + * @release: resource cleanup callback + * @context: caller-provided context + */ +struct objpool_head { + int obj_size; + int nr_objs; + int nr_cpus; + int capacity; + gfp_t gfp; + refcount_t ref; + unsigned long flags; + struct objpool_slot **cpu_slots; + objpool_fini_cb release; + void *context; +}; + +#define OBJPOOL_NR_OBJECT_MAX (1UL << 24) /* maximum numbers of total objects */ +#define OBJPOOL_OBJECT_SIZE_MAX (1UL << 16) /* maximum size of an object */ + +/** + * objpool_init() - initialize objpool and pre-allocated objects + * @pool: the object pool to be initialized, declared by caller + * @nr_objs: total objects to be pre-allocated by this object pool + * @object_size: size of an object (should be > 0) + * @gfp: flags for memory allocation (via kmalloc or vmalloc) + * @context: user context for object initialization callback + * @objinit: object initialization callback for extra setup + * @release: cleanup callback for extra cleanup task + * + * return value: 0 for success, otherwise error code + * + * All pre-allocated objects are to be zeroed after memory allocation. + * Caller could do extra initialization in objinit callback. objinit() + * will be called just after slot allocation and called only once for + * each object. After that the objpool won't touch any content of the + * objects. It's caller's duty to perform reinitialization after each + * pop (object allocation) or do clearance before each push (object + * reclamation). + */ +int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, + gfp_t gfp, void *context, objpool_init_obj_cb objinit, + objpool_fini_cb release); + +/** + * objpool_pop() - allocate an object from objpool + * @pool: object pool + * + * return value: object ptr or NULL if failed + */ +void *objpool_pop(struct objpool_head *pool); + +/** + * objpool_push() - reclaim the object and return back to objpool + * @obj: object ptr to be pushed to objpool + * @pool: object pool + * + * return: 0 or error code (it fails only when user tries to push + * the same object multiple times or wrong "objects" into objpool) + */ +int objpool_push(void *obj, struct objpool_head *pool); + +/** + * objpool_drop() - discard the object and deref objpool + * @obj: object ptr to be discarded + * @pool: object pool + * + * return: 0 if objpool was released; -EAGAIN if there are still + * outstanding objects + * + * objpool_drop is normally for the release of outstanding objects + * after objpool cleanup (objpool_fini). Thinking of this example: + * kretprobe is unregistered and objpool_fini() is called to release + * all remained objects, but there are still objects being used by + * unfinished kretprobes (like blockable function: sys_accept). So + * only when the last outstanding object is dropped could the whole + * objpool be released along with the call of objpool_drop() + */ +int objpool_drop(void *obj, struct objpool_head *pool); + +/** + * objpool_free() - release objpool forcely (all objects to be freed) + * @pool: object pool to be released + */ +void objpool_free(struct objpool_head *pool); + +/** + * objpool_fini() - deref object pool (also releasing unused objects) + * @pool: object pool to be dereferenced + * + * objpool_fini() will try to release all remained free objects and + * then drop an extra reference of the objpool. If all objects are + * already returned to objpool (so called synchronous use cases), + * the objpool itself will be freed together. But if there are still + * outstanding objects (so called asynchronous use cases, such like + * blockable kretprobe), the objpool won't be released until all + * the outstanding objects are dropped, but the caller must assure + * there are no concurrent objpool_push() on the fly. Normally RCU + * is being required to make sure all ongoing objpool_push() must + * be finished before calling objpool_fini(), so does test_objpool, + * kretprobe or rethook + */ +void objpool_fini(struct objpool_head *pool); + +#endif /* _LINUX_OBJPOOL_H */ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 03f82c2c2ebf..b3b8d3dab52d 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -48,13 +48,13 @@ #define ANNOTATE_NOENDBR \ "986: \n\t" \ ".pushsection .discard.noendbr\n\t" \ - ".long 986b - .\n\t" \ + ".long 986b\n\t" \ ".popsection\n\t" #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ + ".long 998b\n\t" \ ".popsection\n\t" #else /* __ASSEMBLY__ */ @@ -66,7 +66,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL \ 999: \ .pushsection .discard.intra_function_calls; \ - .long 999b - .; \ + .long 999b; \ .popsection; /* @@ -118,7 +118,7 @@ .macro ANNOTATE_NOENDBR .Lhere_\@: .pushsection .discard.noendbr - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm @@ -130,7 +130,8 @@ * it will be ignored. */ .macro VALIDATE_UNRET_BEGIN -#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY) +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) .Lhere_\@: .pushsection .discard.validate_unret .long .Lhere_\@ - . @@ -141,7 +142,7 @@ .macro REACHABLE .Lhere_\@: .pushsection .discard.reachable - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm diff --git a/include/linux/of.h b/include/linux/of.h index 6ecde0515677..a0bedd038a05 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -13,6 +13,7 @@ */ #include <linux/types.h> #include <linux/bitops.h> +#include <linux/cleanup.h> #include <linux/errno.h> #include <linux/kobject.h> #include <linux/mod_devicetable.h> @@ -96,10 +97,12 @@ struct of_reconfig_data { struct property *old_prop; }; +extern const struct kobj_type of_node_ktype; +extern const struct fwnode_operations of_fwnode_ops; + /** * of_node_init - initialize a devicetree node * @node: Pointer to device node that has been created by kzalloc() - * @phandle_name: Name of property holding a phandle value * * On return the device_node refcount is set to one. Use of_node_put() * on @node when done to free the memory allocated for it. If the node @@ -107,9 +110,6 @@ struct of_reconfig_data { * whether to free the memory will be done by node->release(), which is * of_node_release(). */ -/* initialize a node */ -extern const struct kobj_type of_node_ktype; -extern const struct fwnode_operations of_fwnode_ops; static inline void of_node_init(struct device_node *node) { #if defined(CONFIG_OF_KOBJ) @@ -135,6 +135,7 @@ static inline struct device_node *of_node_get(struct device_node *node) } static inline void of_node_put(struct device_node *node) { } #endif /* !CONFIG_OF_DYNAMIC */ +DEFINE_FREE(device_node, struct device_node *, if (_T) of_node_put(_T)) /* Pointer for first entry in chain of all nodes. */ extern struct device_node *of_root; @@ -181,11 +182,6 @@ static inline bool is_of_node(const struct fwnode_handle *fwnode) &__of_fwnode_handle_node->fwnode : NULL; \ }) -static inline bool of_have_populated_dt(void) -{ - return of_root != NULL; -} - static inline bool of_node_is_root(const struct device_node *node) { return node && (node->parent == NULL); @@ -295,6 +291,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); +extern struct device_node *of_get_next_reserved_child( + const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible); @@ -363,9 +361,6 @@ extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, int index); extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread); -#define for_each_property_of_node(dn, pp) \ - for (pp = dn->properties; pp != NULL; pp = pp->next) - extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( @@ -403,7 +398,20 @@ extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); -extern int of_machine_is_compatible(const char *compat); +bool of_machine_compatible_match(const char *const *compats); + +/** + * of_machine_is_compatible - Test root of device tree for a given compatible value + * @compat: compatible string to look for in root node's compatible property. + * + * Return: true if the root node has the given value in its compatible property. + */ +static inline bool of_machine_is_compatible(const char *compat) +{ + const char *compats[] = { compat, NULL }; + + return of_machine_compatible_match(compats); +} extern int of_add_property(struct device_node *np, struct property *prop); extern int of_remove_property(struct device_node *np, struct property *prop); @@ -542,6 +550,12 @@ static inline struct device_node *of_get_next_available_child( return NULL; } +static inline struct device_node *of_get_next_reserved_child( + const struct device_node *node, struct device_node *prev) +{ + return NULL; +} + static inline struct device_node *of_find_node_with_property( struct device_node *from, const char *prop_name) { @@ -550,11 +564,6 @@ static inline struct device_node *of_find_node_with_property( #define of_fwnode_handle(node) NULL -static inline bool of_have_populated_dt(void) -{ - return false; -} - static inline struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible) { @@ -809,6 +818,11 @@ static inline int of_remove_property(struct device_node *np, struct property *pr return 0; } +static inline bool of_machine_compatible_match(const char *const *compats) +{ + return false; +} + static inline bool of_console_check(const struct device_node *dn, const char *name, int index) { return false; @@ -893,6 +907,9 @@ static inline int of_prop_val_eq(struct property *p1, struct property *p2) !memcmp(p1->value, p2->value, (size_t)p1->length); } +#define for_each_property_of_node(dn, pp) \ + for (pp = dn->properties; pp != NULL; pp = pp->next) + #if defined(CONFIG_OF) && defined(CONFIG_NUMA) extern int of_node_to_nid(struct device_node *np); #else @@ -1067,6 +1084,22 @@ static inline int of_parse_phandle_with_optional_args(const struct device_node * } /** + * of_phandle_args_equal() - Compare two of_phandle_args + * @a1: First of_phandle_args to compare + * @a2: Second of_phandle_args to compare + * + * Return: True if a1 and a2 are the same (same node pointer, same phandle + * args), false otherwise. + */ +static inline bool of_phandle_args_equal(const struct of_phandle_args *a1, + const struct of_phandle_args *a2) +{ + return a1->np == a2->np && + a1->args_count == a2->args_count && + !memcmp(a1->args, a2->args, sizeof(a1->args[0]) * a1->args_count); +} + +/** * of_property_count_u8_elems - Count the number of u8 elements in a property * * @np: device node from which the property value is to be read. @@ -1429,9 +1462,25 @@ static inline int of_property_read_s32(const struct device_node *np, #define for_each_child_of_node(parent, child) \ for (child = of_get_next_child(parent, NULL); child != NULL; \ child = of_get_next_child(parent, child)) + +#define for_each_child_of_node_scoped(parent, child) \ + for (struct device_node *child __free(device_node) = \ + of_get_next_child(parent, NULL); \ + child != NULL; \ + child = of_get_next_child(parent, child)) + #define for_each_available_child_of_node(parent, child) \ for (child = of_get_next_available_child(parent, NULL); child != NULL; \ child = of_get_next_available_child(parent, child)) +#define for_each_reserved_child_of_node(parent, child) \ + for (child = of_get_next_reserved_child(parent, NULL); child != NULL; \ + child = of_get_next_reserved_child(parent, child)) + +#define for_each_available_child_of_node_scoped(parent, child) \ + for (struct device_node *child __free(device_node) = \ + of_get_next_available_child(parent, NULL); \ + child != NULL; \ + child = of_get_next_available_child(parent, child)) #define for_each_of_cpu_node(cpu) \ for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \ @@ -1580,6 +1629,29 @@ static inline int of_changeset_update_property(struct of_changeset *ocs, { return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop); } + +struct device_node *of_changeset_create_node(struct of_changeset *ocs, + struct device_node *parent, + const char *full_name); +int of_changeset_add_prop_string(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, const char *str); +int of_changeset_add_prop_string_array(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, + const char **str_array, size_t sz); +int of_changeset_add_prop_u32_array(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, + const u32 *array, size_t sz); +static inline int of_changeset_add_prop_u32(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, + const u32 val) +{ + return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1); +} + #else /* CONFIG_OF_DYNAMIC */ static inline int of_reconfig_notifier_register(struct notifier_block *nb) { @@ -1612,6 +1684,21 @@ static inline bool of_device_is_system_power_controller(const struct device_node return of_property_read_bool(np, "system-power-controller"); } +/** + * of_have_populated_dt() - Has DT been populated by bootloader + * + * Return: True if a DTB has been populated by the bootloader and it isn't the + * empty builtin one. False otherwise. + */ +static inline bool of_have_populated_dt(void) +{ +#ifdef CONFIG_OF + return of_property_present(of_root, "compatible"); +#else + return false; +#endif +} + /* * Overlay support */ @@ -1645,7 +1732,7 @@ struct of_overlay_notify_data { #ifdef CONFIG_OF_OVERLAY int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id); + int *ovcs_id, struct device_node *target_base); int of_overlay_remove(int *ovcs_id); int of_overlay_remove_all(void); @@ -1654,8 +1741,8 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else -static inline int of_overlay_fdt_apply(void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id) +static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, + int *ovcs_id, struct device_node *target_base) { return -ENOTSUPP; } diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 2c7a3d4bc775..9042bca5bb84 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -2,10 +2,7 @@ #ifndef _LINUX_OF_DEVICE_H #define _LINUX_OF_DEVICE_H -#include <linux/platform_device.h> -#include <linux/of_platform.h> /* temporary until merge */ - -#include <linux/of.h> +#include <linux/device/driver.h> struct device; struct of_device_id; @@ -40,6 +37,9 @@ static inline int of_dma_configure(struct device *dev, { return of_dma_configure_id(dev, np, force_dma, NULL); } + +void of_device_make_bus_id(struct device *dev); + #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -82,6 +82,9 @@ static inline int of_dma_configure(struct device *dev, { return 0; } + +static inline void of_device_make_bus_id(struct device *dev) {} + #endif /* CONFIG_OF */ #endif /* _LINUX_OF_DEVICE_H */ diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 4d7756087b6b..a4bea62bfa29 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -41,7 +41,7 @@ struct of_endpoint { bool of_graph_is_present(const struct device_node *node); int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint); -int of_graph_get_endpoint_count(const struct device_node *np); +unsigned int of_graph_get_endpoint_count(const struct device_node *np); struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); @@ -68,7 +68,7 @@ static inline int of_graph_parse_endpoint(const struct device_node *node, return -ENOSYS; } -static inline int of_graph_get_endpoint_count(const struct device_node *np) +static inline unsigned int of_graph_get_endpoint_count(const struct device_node *np) { return 0; } diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 9a5e6b410dd2..e61cbbe12dac 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -8,20 +8,19 @@ struct iommu_ops; #ifdef CONFIG_OF_IOMMU -extern const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id); +extern int of_iommu_configure(struct device *dev, struct device_node *master_np, + const u32 *id); extern void of_iommu_get_resv_regions(struct device *dev, struct list_head *list); #else -static inline const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np, - const u32 *id) +static inline int of_iommu_configure(struct device *dev, + struct device_node *master_np, + const u32 *id) { - return NULL; + return -ENODEV; } static inline void of_iommu_get_resv_regions(struct device *dev, diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index d8045bcfc35e..a2ff1ad48f7f 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -7,11 +7,11 @@ */ #include <linux/mod_devicetable.h> -#include <linux/of_device.h> -#include <linux/platform_device.h> struct device; +struct device_node; struct of_device_id; +struct platform_device; /** * struct of_dev_auxdata - lookup table entry for device names & platform_data @@ -127,10 +127,4 @@ static inline int devm_of_platform_populate(struct device *dev) static inline void devm_of_platform_depopulate(struct device *dev) { } #endif -#if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS) -extern void of_platform_register_reconfig_notifier(void); -#else -static inline void of_platform_register_reconfig_notifier(void) { } -#endif - #endif /* _LINUX_OF_PLATFORM_H */ diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 0f4a8903922a..51421fdbb0ba 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -30,9 +30,6 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ - OID_md2WithRSAEncryption, /* 1.2.840.113549.1.1.2 */ - OID_md3WithRSAEncryption, /* 1.2.840.113549.1.1.3 */ - OID_md4WithRSAEncryption, /* 1.2.840.113549.1.1.4 */ OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ @@ -49,11 +46,6 @@ enum OID { OID_smimeCapabilites, /* 1.2.840.113549.1.9.15 */ OID_smimeAuthenticatedAttrs, /* 1.2.840.113549.1.9.16.2.11 */ - /* {iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2)} */ - OID_md2, /* 1.2.840.113549.2.2 */ - OID_md4, /* 1.2.840.113549.2.4 */ - OID_md5, /* 1.2.840.113549.2.5 */ - OID_mskrb5, /* 1.2.840.48018.1.2.2 */ OID_krb5, /* 1.2.840.113554.1.2.2 */ OID_krb5u2u, /* 1.2.840.113554.1.2.2.3 */ @@ -67,6 +59,7 @@ enum OID { OID_msOutlookExpress, /* 1.3.6.1.4.1.311.16.4 */ OID_ntlmssp, /* 1.3.6.1.4.1.311.2.2.10 */ + OID_negoex, /* 1.3.6.1.4.1.311.2.2.30 */ OID_spnego, /* 1.3.6.1.5.5.2 */ @@ -140,6 +133,17 @@ enum OID { OID_TPMImportableKey, /* 2.23.133.10.1.4 */ OID_TPMSealedData, /* 2.23.133.10.1.5 */ + /* CSOR FIPS-202 SHA-3 */ + OID_sha3_256, /* 2.16.840.1.101.3.4.2.8 */ + OID_sha3_384, /* 2.16.840.1.101.3.4.2.9 */ + OID_sha3_512, /* 2.16.840.1.101.3.4.2.10 */ + OID_id_ecdsa_with_sha3_256, /* 2.16.840.1.101.3.4.3.10 */ + OID_id_ecdsa_with_sha3_384, /* 2.16.840.1.101.3.4.3.11 */ + OID_id_ecdsa_with_sha3_512, /* 2.16.840.1.101.3.4.3.12 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_256, /* 2.16.840.1.101.3.4.3.14 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_384, /* 2.16.840.1.101.3.4.3.15 */ + OID_id_rsassa_pkcs1_v1_5_with_sha3_512, /* 2.16.840.1.101.3.4.3.16 */ + OID__NR }; diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 5581dbd3bd34..ea8fb31379e3 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -6,11 +6,6 @@ * An MCS like lock especially tailored for optimistic spinning for sleeping * lock implementations (mutex, rwsem, etc). */ -struct optimistic_spin_node { - struct optimistic_spin_node *next, *prev; - int locked; /* 1 if lock acquired */ - int cpu; /* encoded CPU # + 1 value */ -}; struct optimistic_spin_queue { /* diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f9b60313eaea..0c7e3dcfe867 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -31,8 +31,10 @@ * credit to Christian Biere. */ #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) -#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) -#define type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_max(t) __type_max(typeof(t)) +#define __type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define type_min(t) __type_min(typeof(t)) /* * Avoids triggering -Wtype-limits compilation warning, @@ -57,46 +59,123 @@ static inline bool __must_check __must_check_overflow(bool overflow) * @b: second addend * @d: pointer to store sum * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted addition, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * sum has overflowed or been truncated. + * *@d holds the results of the attempted addition, regardless of whether + * wrap-around occurred. */ #define check_add_overflow(a, b, d) \ __must_check_overflow(__builtin_add_overflow(a, b, d)) /** + * wrapping_add() - Intentionally perform a wrapping addition + * @type: type for result of calculation + * @a: first addend + * @b: second addend + * + * Return the potentially wrapped-around addition without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_add(type, a, b) \ + ({ \ + type __val; \ + __builtin_add_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_add() - Intentionally perform a wrapping increment assignment + * @var: variable to be incremented + * @offset: amount to add + * + * Increments @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_add(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_add(typeof(var), *__ptr, offset); \ + }) + +/** * check_sub_overflow() - Calculate subtraction with overflow checking * @a: minuend; value to subtract from * @b: subtrahend; value to subtract from @a * @d: pointer to store difference * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted subtraction, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * difference has underflowed or been truncated. + * *@d holds the results of the attempted subtraction, regardless of whether + * wrap-around occurred. */ #define check_sub_overflow(a, b, d) \ __must_check_overflow(__builtin_sub_overflow(a, b, d)) /** + * wrapping_sub() - Intentionally perform a wrapping subtraction + * @type: type for result of calculation + * @a: minuend; value to subtract from + * @b: subtrahend; value to subtract from @a + * + * Return the potentially wrapped-around subtraction without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_sub(type, a, b) \ + ({ \ + type __val; \ + __builtin_sub_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign + * @var: variable to be decremented + * @offset: amount to subtract + * + * Decrements @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_sub(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \ + }) + +/** * check_mul_overflow() - Calculate multiplication with overflow checking * @a: first factor * @b: second factor * @d: pointer to store product * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted multiplication, but is not - * considered "safe for use" on a non-zero return value, which indicates - * that the product has overflowed or been truncated. + * *@d holds the results of the attempted multiplication, regardless of whether + * wrap-around occurred. */ #define check_mul_overflow(a, b, d) \ __must_check_overflow(__builtin_mul_overflow(a, b, d)) /** + * wrapping_mul() - Intentionally perform a wrapping multiplication + * @type: type for result of calculation + * @a: first factor + * @b: second factor + * + * Return the potentially wrapped-around multiplication without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_mul(type, a, b) \ + ({ \ + type __val; \ + __builtin_mul_overflow(a, b, &__val); \ + __val; \ + }) + +/** * check_shl_overflow() - Calculate a left-shifted value and check overflow * @a: Value to be shifted * @s: How many bits left to shift @@ -120,7 +199,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) typeof(a) _a = a; \ typeof(s) _s = s; \ typeof(d) _d = d; \ - u64 _a_full = _a; \ + unsigned long long _a_full = _a; \ unsigned int _to_shift = \ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ *_d = (_a_full << _to_shift); \ @@ -130,10 +209,10 @@ static inline bool __must_check __must_check_overflow(bool overflow) #define __overflows_type_constexpr(x, T) ( \ is_unsigned_type(typeof(x)) ? \ - (x) > type_max(typeof(T)) : \ + (x) > type_max(T) : \ is_unsigned_type(typeof(T)) ? \ - (x) < 0 || (x) > type_max(typeof(T)) : \ - (x) < type_min(typeof(T)) || (x) > type_max(typeof(T))) + (x) < 0 || (x) > type_max(T) : \ + (x) < type_min(T) || (x) > type_max(T)) #define __overflows_type(x, T) ({ \ typeof(T) v = 0; \ @@ -309,4 +388,56 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) #define struct_size_t(type, member, count) \ struct_size((type *)NULL, member, count) +/** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: initializer expression (could be empty for no init). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ + _Static_assert(__builtin_constant_p(count), \ + "onstack flex array members require compile-time const count"); \ + union { \ + u8 bytes[struct_size_t(type, member, count)]; \ + type obj; \ + } name##_u initializer; \ + type *name = (type *)&name##_u + +/** + * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member, when it does not have a __counted_by annotation. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @type structure with a trailing + * flexible array member. + * Use __struct_size(@name) to get compile-time size of it afterwards. + */ +#define DEFINE_RAW_FLEX(type, name, member, count) \ + _DEFINE_FLEX(type, name, member, count, = {}) + +/** + * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member. + * + * @TYPE: structure type name, including "struct" keyword. + * @NAME: Name for a variable to define. + * @MEMBER: Name of the array member. + * @COUNTER: Name of the __counted_by member. + * @COUNT: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @TYPE structure with a trailing + * flexible array member. + * Use __struct_size(@NAME) to get compile-time size of it afterwards. + */ +#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/padata.h b/include/linux/padata.h index 495b16b6b4d7..0146daf34430 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -137,6 +137,7 @@ struct padata_shell { * appropriate for one worker thread to do at once. * @max_threads: Max threads to use for the job, actual number may be less * depending on task size and minimum chunk size. + * @numa_aware: Distribute jobs to different nodes with CPU in a round robin fashion. */ struct padata_mt_job { void (*thread_fn)(unsigned long start, unsigned long end, void *arg); @@ -146,6 +147,7 @@ struct padata_mt_job { unsigned long align; unsigned long min_chunk; int max_threads; + bool numa_aware; }; /** @@ -178,10 +180,6 @@ struct padata_instance { #ifdef CONFIG_PADATA extern void __init padata_init(void); -#else -static inline void __init padata_init(void) {} -#endif - extern struct padata_instance *padata_alloc(const char *name); extern void padata_free(struct padata_instance *pinst); extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); @@ -192,4 +190,12 @@ extern void padata_do_serial(struct padata_priv *padata); extern void __init padata_do_multithreaded(struct padata_mt_job *job); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, cpumask_var_t cpumask); +#else +static inline void __init padata_init(void) {} +static inline void __init padata_do_multithreaded(struct padata_mt_job *job) +{ + job->thread_fn(job->start, job->start + job->size, job->fn_arg); +} +#endif + #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 92a2063a0a23..4bf1c25fd1dc 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -99,13 +99,15 @@ */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ + PG_writeback, /* Page is under writeback */ PG_referenced, PG_uptodate, PG_dirty, PG_lru, + PG_head, /* Must be in bit 6 */ + PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, - PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_error, PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ @@ -113,8 +115,6 @@ enum pageflags { PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ - PG_writeback, /* Page is under writeback */ - PG_head, /* A head page */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ @@ -171,15 +171,6 @@ enum pageflags { /* Remapped by swiotlb-xen. */ PG_xen_remapped = PG_owner_priv_1, -#ifdef CONFIG_MEMORY_FAILURE - /* - * Compound pages. Stored in first tail page's flags. - * Indicates that at least one subpage is hwpoisoned in the - * THP. - */ - PG_has_hwpoisoned = PG_error, -#endif - /* non-lru isolated movable page */ PG_isolated = PG_reclaim, @@ -190,6 +181,16 @@ enum pageflags { /* For self-hosted memmap pages */ PG_vmemmap_self_hosted = PG_owner_priv_1, #endif + + /* + * Flags only valid for compound pages. Stored in first tail page's + * flags word. Cannot use the first 8 flags or any flag marked as + * PF_ANY. + */ + + /* At least one page in this folio has the hwpoison flag set */ + PG_has_hwpoisoned = PG_error, + PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) @@ -235,7 +236,7 @@ static inline const struct page *page_fixed_fake_head(const struct page *page) } #endif -static __always_inline int page_is_fake_head(struct page *page) +static __always_inline int page_is_fake_head(const struct page *page) { return page_fixed_fake_head(page) != page; } @@ -279,12 +280,12 @@ static inline unsigned long _compound_head(const struct page *page) */ #define folio_page(folio, n) nth_page(&(folio)->page, n) -static __always_inline int PageTail(struct page *page) +static __always_inline int PageTail(const struct page *page) { return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } -static __always_inline int PageCompound(struct page *page) +static __always_inline int PageCompound(const struct page *page) { return test_bit(PG_head, &page->flags) || READ_ONCE(page->compound_head) & 1; @@ -304,6 +305,16 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static const unsigned long *const_folio_flags(const struct folio *folio, + unsigned n) +{ + const struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; @@ -326,9 +337,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) * for compound page all operations related to the page flag applied to * head page. * - * PF_ONLY_HEAD: - * for compound page, callers only ever operate on the head page. - * * PF_NO_TAIL: * modifications of the page flag must be done on small or head pages, * checks can be done on tail pages too. @@ -344,9 +352,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) page; }) #define PF_ANY(page, enforce) PF_POISONED_CHECK(page) #define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) -#define PF_ONLY_HEAD(page, enforce) ({ \ - VM_BUG_ON_PGFLAGS(PageTail(page), page); \ - PF_POISONED_CHECK(page); }) #define PF_NO_TAIL(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ PF_POISONED_CHECK(compound_head(page)); }) @@ -360,59 +365,81 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) /* Which page is the flag stored in */ #define FOLIO_PF_ANY 0 #define FOLIO_PF_HEAD 0 -#define FOLIO_PF_ONLY_HEAD 0 #define FOLIO_PF_NO_TAIL 0 #define FOLIO_PF_NO_COMPOUND 0 #define FOLIO_PF_SECOND 1 +#define FOLIO_HEAD_PAGE 0 +#define FOLIO_SECOND_PAGE 1 + /* * Macros to create function definitions for page flags */ +#define FOLIO_TEST_FLAG(name, page) \ +static __always_inline bool folio_test_##name(const struct folio *folio) \ +{ return test_bit(PG_##name, const_folio_flags(folio, page)); } + +#define FOLIO_SET_FLAG(name, page) \ +static __always_inline void folio_set_##name(struct folio *folio) \ +{ set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void folio_clear_##name(struct folio *folio) \ +{ clear_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_SET_FLAG(name, page) \ +static __always_inline void __folio_set_##name(struct folio *folio) \ +{ __set_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void __folio_clear_##name(struct folio *folio) \ +{ __clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_SET_FLAG(name, page) \ +static __always_inline bool folio_test_set_##name(struct folio *folio) \ +{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_CLEAR_FLAG(name, page) \ +static __always_inline bool folio_test_clear_##name(struct folio *folio) \ +{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_FLAG(name, page) \ +FOLIO_TEST_FLAG(name, page) \ +FOLIO_SET_FLAG(name, page) \ +FOLIO_CLEAR_FLAG(name, page) + #define TESTPAGEFLAG(uname, lname, policy) \ -static __always_inline bool folio_test_##lname(struct folio *folio) \ -{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ -static __always_inline int Page##uname(struct page *page) \ +FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ +static __always_inline int Page##uname(const struct page *page) \ { return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void folio_set_##lname(struct folio *folio) \ -{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void SetPage##uname(struct page *page) \ { set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void folio_clear_##lname(struct folio *folio) \ -{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void ClearPage##uname(struct page *page) \ { clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void __folio_set_##lname(struct folio *folio) \ -{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +__FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void __SetPage##uname(struct page *page) \ { __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void __folio_clear_##lname(struct folio *folio) \ -{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void __ClearPage##uname(struct page *page) \ { __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ -static __always_inline \ -bool folio_test_set_##lname(struct folio *folio) \ -{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestSetPage##uname(struct page *page) \ { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ -static __always_inline \ -bool folio_test_clear_##lname(struct folio *folio) \ -{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } @@ -430,30 +457,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) +#define FOLIO_TEST_FLAG_FALSE(name) \ +static inline bool folio_test_##name(const struct folio *folio) \ +{ return false; } +#define FOLIO_SET_FLAG_NOOP(name) \ +static inline void folio_set_##name(struct folio *folio) { } +#define FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void folio_clear_##name(struct folio *folio) { } +#define __FOLIO_SET_FLAG_NOOP(name) \ +static inline void __folio_set_##name(struct folio *folio) { } +#define __FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void __folio_clear_##name(struct folio *folio) { } +#define FOLIO_TEST_SET_FLAG_FALSE(name) \ +static inline bool folio_test_set_##name(struct folio *folio) \ +{ return false; } +#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ +static inline bool folio_test_clear_##name(struct folio *folio) \ +{ return false; } + +#define FOLIO_FLAG_FALSE(name) \ +FOLIO_TEST_FLAG_FALSE(name) \ +FOLIO_SET_FLAG_NOOP(name) \ +FOLIO_CLEAR_FLAG_NOOP(name) + #define TESTPAGEFLAG_FALSE(uname, lname) \ -static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ +FOLIO_TEST_FLAG_FALSE(lname) \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_set_##lname(struct folio *folio) { } \ +FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_clear_##lname(struct folio *folio) { } \ +FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void __folio_clear_##lname(struct folio *folio) { } \ +__FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } #define TESTSETFLAG_FALSE(uname, lname) \ -static inline bool folio_test_set_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_SET_FLAG_FALSE(lname) \ static inline int TestSetPage##uname(struct page *page) { return 0; } #define TESTCLEARFLAG_FALSE(uname, lname) \ -static inline bool folio_test_clear_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ @@ -463,7 +511,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) -PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) +FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) @@ -530,13 +578,13 @@ PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP -static __always_inline bool folio_test_swapcache(struct folio *folio) +static __always_inline bool folio_test_swapcache(const struct folio *folio) { return folio_test_swapbacked(folio) && - test_bit(PG_swapcache, folio_flags(folio, 0)); + test_bit(PG_swapcache, const_folio_flags(folio, 0)); } -static __always_inline bool PageSwapCache(struct page *page) +static __always_inline bool PageSwapCache(const struct page *page) { return folio_test_swapcache(page_folio(page)); } @@ -581,10 +629,10 @@ PAGEFLAG_FALSE(HWPoison, hwpoison) #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) -TESTPAGEFLAG(Young, young, PF_ANY) -SETPAGEFLAG(Young, young, PF_ANY) -TESTCLEARFLAG(Young, young, PF_ANY) -PAGEFLAG(Idle, idle, PF_ANY) +FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_FLAG(idle, FOLIO_HEAD_PAGE) #endif /* @@ -635,22 +683,22 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) */ #define PAGE_MAPPING_DAX_SHARED ((void *)0x1) -static __always_inline bool folio_mapping_flags(struct folio *folio) +static __always_inline bool folio_mapping_flags(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageMappingFlags(struct page *page) +static __always_inline int PageMappingFlags(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline bool folio_test_anon(struct folio *folio) +static __always_inline bool folio_test_anon(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; } -static __always_inline bool PageAnon(struct page *page) +static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); } @@ -661,7 +709,7 @@ static __always_inline bool __folio_test_movable(const struct folio *folio) PAGE_MAPPING_MOVABLE; } -static __always_inline int __PageMovable(struct page *page) +static __always_inline int __PageMovable(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_MOVABLE; @@ -674,13 +722,13 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline bool folio_test_ksm(struct folio *folio) +static __always_inline bool folio_test_ksm(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } -static __always_inline bool PageKsm(struct page *page) +static __always_inline bool PageKsm(const struct page *page) { return folio_test_ksm(page_folio(page)); } @@ -691,6 +739,25 @@ TESTPAGEFLAG_FALSE(Ksm, ksm) u64 stable_page_flags(struct page *page); /** + * folio_xor_flags_has_waiters - Change some folio flags. + * @folio: The folio. + * @mask: Bits set in this word will be changed. + * + * This must only be used for flags which are changed with the folio + * lock held. For example, it is unsafe to use for PG_dirty as that + * can be set without the folio lock held. It can also only be used + * on flags which are in the range 0-6 as some of the implementations + * only affect those bits. + * + * Return: Whether there are tasks waiting on the folio. + */ +static inline bool folio_xor_flags_has_waiters(struct folio *folio, + unsigned long mask) +{ + return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0)); +} + +/** * folio_test_uptodate - Is this folio up to date? * @folio: The folio. * @@ -700,9 +767,9 @@ u64 stable_page_flags(struct page *page); * some of the bytes in it may be; see the is_partially_uptodate() * address_space operation. */ -static inline bool folio_test_uptodate(struct folio *folio) +static inline bool folio_test_uptodate(const struct folio *folio) { - bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); + bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0)); /* * Must ensure that the data we read out of the folio is loaded * _after_ we've loaded folio->flags to check the uptodate bit. @@ -717,7 +784,7 @@ static inline bool folio_test_uptodate(struct folio *folio) return ret; } -static inline int PageUptodate(struct page *page) +static inline int PageUptodate(const struct page *page) { return folio_test_uptodate(page_folio(page)); } @@ -751,25 +818,20 @@ static __always_inline void SetPageUptodate(struct page *page) CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -bool __folio_start_writeback(struct folio *folio, bool keep_write); -bool set_page_writeback(struct page *page); +void __folio_start_writeback(struct folio *folio, bool keep_write); +void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) #define folio_start_writeback_keepwrite(folio) \ __folio_start_writeback(folio, true) -static inline bool test_set_page_writeback(struct page *page) -{ - return set_page_writeback(page); -} - -static __always_inline bool folio_test_head(struct folio *folio) +static __always_inline bool folio_test_head(const struct folio *folio) { - return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY)); + return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY)); } -static __always_inline int PageHead(struct page *page) +static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); @@ -785,7 +847,7 @@ CLEARPAGEFLAG(Head, head, PF_ANY) * * Return: True if the folio is larger than one page. */ -static inline bool folio_test_large(struct folio *folio) +static inline bool folio_test_large(const struct folio *folio) { return folio_test_head(folio); } @@ -806,17 +868,13 @@ static inline void ClearPageCompound(struct page *page) BUG_ON(!PageHead(page)); ClearPageHead(page); } +PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND) +#else +TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) #endif #define PG_head_mask ((1UL << PG_head)) -#ifdef CONFIG_HUGETLB_PAGE -int PageHuge(struct page *page); -bool folio_test_hugetlb(struct folio *folio); -#else -TESTPAGEFLAG_FALSE(Huge, hugetlb) -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -826,23 +884,18 @@ TESTPAGEFLAG_FALSE(Huge, hugetlb) * hugetlbfs pages, but not normal pages. PageTransHuge() can only be * called only in the core VM paths where hugetlbfs pages can't exist. */ -static inline int PageTransHuge(struct page *page) +static inline int PageTransHuge(const struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); return PageHead(page); } -static inline bool folio_test_transhuge(struct folio *folio) -{ - return folio_test_head(folio); -} - /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ -static inline int PageTransCompound(struct page *page) +static inline int PageTransCompound(const struct page *page) { return PageCompound(page); } @@ -852,7 +905,7 @@ static inline int PageTransCompound(struct page *page) * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ -static inline int PageTransTail(struct page *page) +static inline int PageTransTail(const struct page *page) { return PageTail(page); } @@ -878,49 +931,57 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* - * Check if a page is currently marked HWPoisoned. Note that this check is - * best effort only and inherently racy: there is no way to synchronize with - * failing hardware. - */ -static inline bool is_page_hwpoison(struct page *page) -{ - if (PageHWPoison(page)) - return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); -} - -/* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of page_mapcount() won't be + * low bits so that an underflow or overflow of _mapcount won't be * mistaken for a page type value. */ #define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of page_mapcount */ +/* Reserve 0x0000007f to catch underflows of _mapcount */ #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400 +#define PG_hugetlb 0x00000800 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) +#define folio_test_type(folio, flag) \ + ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) static inline int page_type_has_type(unsigned int page_type) { return (int)page_type < PAGE_MAPCOUNT_RESERVE; } -static inline int page_has_type(struct page *page) +static inline int page_has_type(const struct page *page) { return page_type_has_type(page->page_type); } -#define PAGE_TYPE_OPS(uname, lname) \ -static __always_inline int Page##uname(struct page *page) \ +#define FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline bool folio_test_##fname(const struct folio *folio)\ +{ \ + return folio_test_type(folio, PG_##lname); \ +} \ +static __always_inline void __folio_set_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ + folio->page.page_type &= ~PG_##lname; \ +} \ +static __always_inline void __folio_clear_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ + folio->page.page_type |= PG_##lname; \ +} + +#define PAGE_TYPE_OPS(uname, lname, fname) \ +FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline int Page##uname(const struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ @@ -939,7 +1000,7 @@ static __always_inline void __ClearPage##uname(struct page *page) \ * PageBuddy() indicates that the page is free and in the buddy system * (see mm/page_alloc.c). */ -PAGE_TYPE_OPS(Buddy, buddy) +PAGE_TYPE_OPS(Buddy, buddy, buddy) /* * PageOffline() indicates that the page is logically offline although the @@ -963,7 +1024,7 @@ PAGE_TYPE_OPS(Buddy, buddy) * pages should check PageOffline() and synchronize with such drivers using * page_offline_freeze()/page_offline_thaw(). */ -PAGE_TYPE_OPS(Offline, offline) +PAGE_TYPE_OPS(Offline, offline, offline) extern void page_offline_freeze(void); extern void page_offline_thaw(void); @@ -973,18 +1034,49 @@ extern void page_offline_end(void); /* * Marks pages in use as page tables. */ -PAGE_TYPE_OPS(Table, table) +PAGE_TYPE_OPS(Table, table, pgtable) /* * Marks guardpages used with debug_pagealloc. */ -PAGE_TYPE_OPS(Guard, guard) +PAGE_TYPE_OPS(Guard, guard, guard) + +#ifdef CONFIG_HUGETLB_PAGE +FOLIO_TYPE_OPS(hugetlb, hugetlb) +#else +FOLIO_TEST_FLAG_FALSE(hugetlb) +#endif + +/** + * PageHuge - Determine if the page belongs to hugetlbfs + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs pages, false for anon pages or pages + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) +{ + return folio_test_hugetlb(page_folio(page)); +} + +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) +{ + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); +} extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); -static __always_inline int PageAnonExclusive(struct page *page) +static __always_inline int PageAnonExclusive(const struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); @@ -1040,6 +1132,14 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) #define PAGE_FLAGS_CHECK_AT_PREP \ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) +/* + * Flags stored in the second page of a compound page. They may overlap + * the CHECK_AT_FREE flags above, so need to be cleared. + */ +#define PAGE_FLAGS_SECOND \ + (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ + 1UL << PG_large_rmappable) + #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) /** @@ -1049,19 +1149,18 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) * Determine if a page has private stuff, indicating that release routines * should be invoked upon it. */ -static inline int page_has_private(struct page *page) +static inline int page_has_private(const struct page *page) { return !!(page->flags & PAGE_FLAGS_PRIVATE); } -static inline bool folio_has_private(struct folio *folio) +static inline bool folio_has_private(const struct folio *folio) { return page_has_private(&folio->page); } #undef PF_ANY #undef PF_HEAD -#undef PF_ONLY_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND #undef PF_SECOND diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index c141ea9a95ef..8cd858d912c4 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -4,7 +4,7 @@ #include <linux/atomic.h> #include <linux/cache.h> -#include <linux/kernel.h> +#include <linux/limits.h> #include <asm/page.h> struct page_counter { diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 67314f648aeb..be98564191e6 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -8,6 +8,7 @@ struct pglist_data; +#ifdef CONFIG_PAGE_EXTENSION /** * struct page_ext_operations - per page_ext client operations * @offset: Offset to the client's data within page_ext. Offset is returned to @@ -29,8 +30,6 @@ struct page_ext_operations { bool need_shared_flags; }; -#ifdef CONFIG_PAGE_EXTENSION - /* * The page_ext_flags users must set need_shared_flags to true. */ @@ -82,6 +81,12 @@ static inline void page_ext_init(void) extern struct page_ext *page_ext_get(struct page *page); extern void page_ext_put(struct page_ext *page_ext); +static inline void *page_ext_data(struct page_ext *page_ext, + struct page_ext_operations *ops) +{ + return (void *)(page_ext) + ops->offset; +} + static inline struct page_ext *page_ext_next(struct page_ext *curr) { void *next = curr; diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 5cb7bd2078ec..d8f344840643 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -144,9 +144,4 @@ static inline void set_page_idle(struct page *page) { folio_set_idle(page_folio(page)); } - -static inline void clear_page_idle(struct page *page) -{ - folio_clear_idle(page_folio(page)); -} #endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 119a0c9d2a8b..debdc25f08b9 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -11,7 +11,8 @@ extern struct page_ext_operations page_owner_ops; extern void __reset_page_owner(struct page *page, unsigned short order); extern void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask); -extern void __split_page_owner(struct page *page, unsigned int nr); +extern void __split_page_owner(struct page *page, int old_order, + int new_order); extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(const struct page *page); @@ -31,10 +32,11 @@ static inline void set_page_owner(struct page *page, __set_page_owner(page, order, gfp_mask); } -static inline void split_page_owner(struct page *page, unsigned int nr) +static inline void split_page_owner(struct page *page, int old_order, + int new_order) { if (static_branch_unlikely(&page_owner_inited)) - __split_page_owner(page, nr); + __split_page_owner(page, old_order, new_order); } static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) { @@ -56,11 +58,11 @@ static inline void reset_page_owner(struct page *page, unsigned short order) { } static inline void set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask) + unsigned short order, gfp_t gfp_mask) { } -static inline void split_page_owner(struct page *page, - unsigned short order) +static inline void split_page_owner(struct page *page, int old_order, + int new_order) { } static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 01e16c7696ec..6722941c7cb8 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -14,18 +14,13 @@ extern struct static_key_true page_table_check_disabled; extern struct page_ext_operations page_table_check_ops; void __page_table_check_zero(struct page *page, unsigned int order); -void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t pte); -void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr, - pmd_t pmd); -void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr, - pud_t pud); -void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); -void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd); -void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud); +void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte); +void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd); +void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud); +void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, + unsigned int nr); +void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd); +void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud); void __page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd); @@ -46,61 +41,55 @@ static inline void page_table_check_free(struct page *page, unsigned int order) __page_table_check_zero(page, order); } -static inline void page_table_check_pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t pte) +static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pte_clear(mm, addr, pte); + __page_table_check_pte_clear(mm, pte); } -static inline void page_table_check_pmd_clear(struct mm_struct *mm, - unsigned long addr, pmd_t pmd) +static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pmd_clear(mm, addr, pmd); + __page_table_check_pmd_clear(mm, pmd); } -static inline void page_table_check_pud_clear(struct mm_struct *mm, - unsigned long addr, pud_t pud) +static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pud_clear(mm, addr, pud); + __page_table_check_pud_clear(mm, pud); } -static inline void page_table_check_pte_set(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, - pte_t pte) +static inline void page_table_check_ptes_set(struct mm_struct *mm, + pte_t *ptep, pte_t pte, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pte_set(mm, addr, ptep, pte); + __page_table_check_ptes_set(mm, ptep, pte, nr); } -static inline void page_table_check_pmd_set(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp, +static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pmd_set(mm, addr, pmdp, pmd); + __page_table_check_pmd_set(mm, pmdp, pmd); } -static inline void page_table_check_pud_set(struct mm_struct *mm, - unsigned long addr, pud_t *pudp, +static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pud_set(mm, addr, pudp, pud); + __page_table_check_pud_set(mm, pudp, pud); } static inline void page_table_check_pte_clear_range(struct mm_struct *mm, @@ -123,35 +112,29 @@ static inline void page_table_check_free(struct page *page, unsigned int order) { } -static inline void page_table_check_pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t pte) +static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) { } -static inline void page_table_check_pmd_clear(struct mm_struct *mm, - unsigned long addr, pmd_t pmd) +static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd) { } -static inline void page_table_check_pud_clear(struct mm_struct *mm, - unsigned long addr, pud_t pud) +static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) { } -static inline void page_table_check_pte_set(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, - pte_t pte) +static inline void page_table_check_ptes_set(struct mm_struct *mm, + pte_t *ptep, pte_t pte, unsigned int nr) { } -static inline void page_table_check_pmd_set(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp, +static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd) { } -static inline void page_table_check_pud_set(struct mm_struct *mm, - unsigned long addr, pud_t *pudp, +static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud) { } diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e83c4c095041..3f2409b968ec 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,14 +41,14 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER) +#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #else /* CONFIG_HUGETLB_PAGE */ /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ -#define pageblock_order MAX_ORDER +#define pageblock_order MAX_PAGE_ORDER #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 716953ee1ebd..2df35e65557d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -203,6 +203,10 @@ enum mapping_flags { /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, AS_LARGE_FOLIO_SUPPORT = 6, + AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ + AS_STABLE_WRITES, /* must wait for writeback before modifying + folio contents */ + AS_UNMOVABLE, /* The mapping cannot be moved, ever */ }; /** @@ -273,6 +277,52 @@ static inline int mapping_use_writeback_tags(struct address_space *mapping) return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } +static inline bool mapping_release_always(const struct address_space *mapping) +{ + return test_bit(AS_RELEASE_ALWAYS, &mapping->flags); +} + +static inline void mapping_set_release_always(struct address_space *mapping) +{ + set_bit(AS_RELEASE_ALWAYS, &mapping->flags); +} + +static inline void mapping_clear_release_always(struct address_space *mapping) +{ + clear_bit(AS_RELEASE_ALWAYS, &mapping->flags); +} + +static inline bool mapping_stable_writes(const struct address_space *mapping) +{ + return test_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_set_stable_writes(struct address_space *mapping) +{ + set_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_clear_stable_writes(struct address_space *mapping) +{ + clear_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_set_unmovable(struct address_space *mapping) +{ + /* + * It's expected unmovable mappings are also unevictable. Compaction + * migrate scanner (isolate_migratepages_block()) relies on this to + * reduce page locking. + */ + set_bit(AS_UNEVICTABLE, &mapping->flags); + set_bit(AS_UNMOVABLE, &mapping->flags); +} + +static inline bool mapping_unmovable(struct address_space *mapping) +{ + return test_bit(AS_UNMOVABLE, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; @@ -373,23 +423,31 @@ static inline struct address_space *folio_file_mapping(struct folio *folio) return folio->mapping; } -static inline struct address_space *page_file_mapping(struct page *page) -{ - return folio_file_mapping(page_folio(page)); -} - -/* - * For file cache pages, return the address_space, otherwise return NULL +/** + * folio_flush_mapping - Find the file mapping this folio belongs to. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Anonymous folios return NULL, even if they're in + * the swap cache. Other kinds of folio also return NULL. + * + * This is ONLY used by architecture cache flushing code. If you aren't + * writing cache flushing code, you want either folio_mapping() or + * folio_file_mapping(). */ -static inline struct address_space *page_mapping_file(struct page *page) +static inline struct address_space *folio_flush_mapping(struct folio *folio) { - struct folio *folio = page_folio(page); - if (unlikely(folio_test_swapcache(folio))) return NULL; + return folio_mapping(folio); } +static inline struct address_space *page_file_mapping(struct page *page) +{ + return folio_file_mapping(page_folio(page)); +} + /** * folio_inode - Get the host inode for this folio. * @folio: The folio. @@ -470,6 +528,19 @@ static inline void *detach_page_private(struct page *page) return folio_detach_private(page_folio(page)); } +/* + * There are some parts of the kernel which assume that PMD entries + * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, + * limit the maximum allocation order to PMD size. I'm not aware of any + * assumptions about maximum order if THP are disabled, but 8 seems like + * a good order (that's 1MB if you're using 4kB pages) + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#else +#define MAX_PAGECACHE_ORDER 8 +#endif + #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); #else @@ -501,22 +572,69 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan); -#define FGP_ACCESSED 0x00000001 -#define FGP_LOCK 0x00000002 -#define FGP_CREAT 0x00000004 -#define FGP_WRITE 0x00000008 -#define FGP_NOFS 0x00000010 -#define FGP_NOWAIT 0x00000020 -#define FGP_FOR_MMAP 0x00000040 -#define FGP_STABLE 0x00000080 +/** + * typedef fgf_t - Flags for getting folios from the page cache. + * + * Most users of the page cache will not need to use these flags; + * there are convenience functions such as filemap_get_folio() and + * filemap_lock_folio(). For users which need more control over exactly + * what is done with the folios, these flags to __filemap_get_folio() + * are available. + * + * * %FGP_ACCESSED - The folio will be marked accessed. + * * %FGP_LOCK - The folio is returned locked. + * * %FGP_CREAT - If no folio is present then a new folio is allocated, + * added to the page cache and the VM's LRU list. The folio is + * returned locked. + * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the + * folio is already in cache. If the folio was allocated, unlock it + * before returning so the caller can do the same dance. + * * %FGP_WRITE - The folio will be written to by the caller. + * * %FGP_NOFS - __GFP_FS will get cleared in gfp. + * * %FGP_NOWAIT - Don't block on the folio lock. + * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) + * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin() + * implementation. + */ +typedef unsigned int __bitwise fgf_t; + +#define FGP_ACCESSED ((__force fgf_t)0x00000001) +#define FGP_LOCK ((__force fgf_t)0x00000002) +#define FGP_CREAT ((__force fgf_t)0x00000004) +#define FGP_WRITE ((__force fgf_t)0x00000008) +#define FGP_NOFS ((__force fgf_t)0x00000010) +#define FGP_NOWAIT ((__force fgf_t)0x00000020) +#define FGP_FOR_MMAP ((__force fgf_t)0x00000040) +#define FGP_STABLE ((__force fgf_t)0x00000080) +#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) +/** + * fgf_set_order - Encode a length in the fgf_t flags. + * @size: The suggested size of the folio to create. + * + * The caller of __filemap_get_folio() can use this to suggest a preferred + * size for the folio that is created. If there is already a folio at + * the index, it will be returned, no matter what its size. If a folio + * is freshly created, it may be of a different size than requested + * due to alignment constraints, memory pressure, or the presence of + * other folios at nearby indices. + */ +static inline fgf_t fgf_set_order(size_t size) +{ + unsigned int shift = ilog2(size); + + if (shift <= PAGE_SHIFT) + return 0; + return (__force fgf_t)((shift - PAGE_SHIFT) << 26); +} + void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp); + fgf_t fgp_flags, gfp_t gfp); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp); + fgf_t fgp_flags, gfp_t gfp); /** * filemap_get_folio - Find and get a folio. @@ -590,7 +708,7 @@ static inline struct page *find_get_page(struct address_space *mapping, } static inline struct page *find_get_page_flags(struct address_space *mapping, - pgoff_t offset, int fgp_flags) + pgoff_t offset, fgf_t fgp_flags) { return pagecache_get_page(mapping, offset, fgp_flags, 0); } @@ -705,9 +823,6 @@ static inline pgoff_t folio_next_index(struct folio *folio) */ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) { - /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (folio_test_hugetlb(folio)) - return &folio->page; return folio_page(folio, index & (folio_nr_pages(folio) - 1)); } @@ -723,9 +838,6 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) */ static inline bool folio_contains(struct folio *folio, pgoff_t index) { - /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (folio_test_hugetlb(folio)) - return folio->index == index; return index - folio_index(folio) < folio_nr_pages(folio); } @@ -783,10 +895,9 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping, } /* - * Get index of the page within radix-tree (but not for hugetlb pages). - * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) + * Get the offset in PAGE_SIZE (even for hugetlb pages). */ -static inline pgoff_t page_to_index(struct page *page) +static inline pgoff_t page_to_pgoff(struct page *page) { struct page *head; @@ -801,19 +912,6 @@ static inline pgoff_t page_to_index(struct page *page) return head->index + page - head; } -extern pgoff_t hugetlb_basepage_index(struct page *page); - -/* - * Get the offset in PAGE_SIZE (even for hugetlb pages). - * (TODO: hugetlb pages should have ->index in PAGE_SIZE) - */ -static inline pgoff_t page_to_pgoff(struct page *page) -{ - if (unlikely(PageHuge(page))) - return hugetlb_basepage_index(page); - return page_to_index(page); -} - /* * Return byte-offset into filesystem object for page. */ @@ -850,24 +948,16 @@ static inline loff_t folio_file_pos(struct folio *folio) /* * Get the offset in PAGE_SIZE (even for hugetlb folios). - * (TODO: hugetlb folios should have ->index in PAGE_SIZE) */ static inline pgoff_t folio_pgoff(struct folio *folio) { - if (unlikely(folio_test_hugetlb(folio))) - return hugetlb_basepage_index(&folio->page); return folio->index; } -extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, - unsigned long address); - static inline pgoff_t linear_page_index(struct vm_area_struct *vma, unsigned long address) { pgoff_t pgoff; - if (unlikely(is_vm_hugetlb_page(vma))) - return linear_hugepage_index(vma, address); pgoff = (address - vma->vm_start) >> PAGE_SHIFT; pgoff += vma->vm_pgoff; return pgoff; @@ -900,8 +990,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, void __folio_lock(struct folio *folio); int __folio_lock_killable(struct folio *folio); -bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, - unsigned int flags); +vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf); void unlock_page(struct page *page); void folio_unlock(struct folio *folio); @@ -1005,11 +1094,13 @@ static inline int folio_lock_killable(struct folio *folio) * Return value and mmap_lock implications depend on flags; see * __folio_lock_or_retry(). */ -static inline bool folio_lock_or_retry(struct folio *folio, - struct mm_struct *mm, unsigned int flags) +static inline vm_fault_t folio_lock_or_retry(struct folio *folio, + struct vm_fault *vmf) { might_sleep(); - return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags); + if (!folio_trylock(folio)) + return __folio_lock_or_retry(folio, vmf); + return 0; } /* @@ -1044,11 +1135,7 @@ static inline void wait_on_page_locked(struct page *page) folio_wait_locked(page_folio(page)); } -static inline int wait_on_page_locked_killable(struct page *page) -{ - return folio_wait_locked_killable(page_folio(page)); -} - +void folio_end_read(struct folio *folio, bool success); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 87cc678adc85..5d3a0cccc6bf 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -11,8 +11,8 @@ #include <linux/types.h> -/* 15 pointers + header align the folio_batch structure to a power of two */ -#define PAGEVEC_SIZE 15 +/* 31 pointers + header align the folio_batch structure to a power of two */ +#define PAGEVEC_SIZE 31 struct folio; @@ -27,6 +27,7 @@ struct folio; */ struct folio_batch { unsigned char nr; + unsigned char i; bool percpu_pvec_drained; struct folio *folios[PAGEVEC_SIZE]; }; @@ -40,12 +41,14 @@ struct folio_batch { static inline void folio_batch_init(struct folio_batch *fbatch) { fbatch->nr = 0; + fbatch->i = 0; fbatch->percpu_pvec_drained = false; } static inline void folio_batch_reinit(struct folio_batch *fbatch) { fbatch->nr = 0; + fbatch->i = 0; } static inline unsigned int folio_batch_count(struct folio_batch *fbatch) @@ -75,6 +78,21 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch, return folio_batch_space(fbatch); } +/** + * folio_batch_next - Return the next folio to process. + * @fbatch: The folio batch being processed. + * + * Use this function to implement a queue of folios. + * + * Return: The next folio in the queue, or NULL if the queue is empty. + */ +static inline struct folio *folio_batch_next(struct folio_batch *fbatch) +{ + if (fbatch->i == fbatch->nr) + return NULL; + return fbatch->folios[fbatch->i++]; +} + void __folio_batch_release(struct folio_batch *pvec); static inline void folio_batch_release(struct folio_batch *fbatch) diff --git a/include/linux/parport.h b/include/linux/parport.h index 999eddd619b7..fff39bc30629 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -180,8 +180,6 @@ struct ieee1284_info { struct semaphore irq; }; -#define PARPORT_NAME_MAX_LEN 15 - /* A parallel port */ struct parport { unsigned long base; /* base address */ diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 6b1301e2498e..3a4860bd2758 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -93,6 +93,6 @@ extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */ #if IS_ENABLED(CONFIG_PCI_HOST_COMMON) /* for DT-based PCI controllers that support ECAM */ int pci_host_common_probe(struct platform_device *pdev); -int pci_host_common_remove(struct platform_device *pdev); +void pci_host_common_remove(struct platform_device *pdev); #endif #endif diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 5cb694031072..cc2f70d061c8 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -19,13 +19,6 @@ enum pci_epc_interface_type { SECONDARY_INTERFACE, }; -enum pci_epc_irq_type { - PCI_EPC_IRQ_UNKNOWN, - PCI_EPC_IRQ_LEGACY, - PCI_EPC_IRQ_MSI, - PCI_EPC_IRQ_MSIX, -}; - static inline const char * pci_epc_interface_string(enum pci_epc_interface_type type) { @@ -79,7 +72,7 @@ struct pci_epc_ops { u16 interrupts, enum pci_barno, u32 offset); int (*get_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int (*raise_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - enum pci_epc_irq_type type, u16 interrupt_num); + unsigned int type, u16 interrupt_num); int (*map_msi_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr, u8 interrupt_num, u32 entry_size, u32 *msi_data, @@ -122,7 +115,7 @@ struct pci_epc_mem { * struct pci_epc - represents the PCI EPC device * @dev: PCI EPC device * @pci_epf: list of endpoint functions present in this EPC device - * list_lock: Mutex for protecting pci_epf list + * @list_lock: Mutex for protecting pci_epf list * @ops: function pointers for performing endpoint operations * @windows: array of address space of the endpoint controller * @mem: first window of the endpoint controller, which corresponds to @@ -153,15 +146,44 @@ struct pci_epc { }; /** + * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC. + * @BAR_FIXED: The BAR mask is fixed by the hardware. + * @BAR_RESERVED: The BAR should not be touched by an EPF driver. + */ +enum pci_epc_bar_type { + BAR_PROGRAMMABLE = 0, + BAR_FIXED, + BAR_RESERVED, +}; + +/** + * struct pci_epc_bar_desc - hardware description for a BAR + * @type: the type of the BAR + * @fixed_size: the fixed size, only applicable if type is BAR_FIXED_MASK. + * @only_64bit: if true, an EPF driver is not allowed to choose if this BAR + * should be configured as 32-bit or 64-bit, the EPF driver must + * configure this BAR as 64-bit. Additionally, the BAR succeeding + * this BAR must be set to type BAR_RESERVED. + * + * only_64bit should not be set on a BAR of type BAR_RESERVED. + * (If BARx is a 64-bit BAR that an EPF driver is not allowed to + * touch, then both BARx and BARx+1 must be set to type + * BAR_RESERVED.) + */ +struct pci_epc_bar_desc { + enum pci_epc_bar_type type; + u64 fixed_size; + bool only_64bit; +}; + +/** * struct pci_epc_features - features supported by a EPC device per function * @linkup_notifier: indicate if the EPC device can notify EPF driver on link up * @core_init_notifier: indicate cores that can notify about their availability * for initialization * @msi_capable: indicate if the endpoint function has MSI capability * @msix_capable: indicate if the endpoint function has MSI-X capability - * @reserved_bar: bitmap to indicate reserved BAR unavailable to function driver - * @bar_fixed_64bit: bitmap to indicate fixed 64bit BARs - * @bar_fixed_size: Array specifying the size supported by each BAR + * @bar: array specifying the hardware description for each BAR * @align: alignment size required for BAR buffer allocation */ struct pci_epc_features { @@ -169,9 +191,7 @@ struct pci_epc_features { unsigned int core_init_notifier : 1; unsigned int msi_capable : 1; unsigned int msix_capable : 1; - u8 reserved_bar; - u8 bar_fixed_64bit; - u64 bar_fixed_size[PCI_STD_NUM_BARS]; + struct pci_epc_bar_desc bar[PCI_STD_NUM_BARS]; size_t align; }; @@ -229,7 +249,7 @@ int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr, u8 interrupt_num, u32 entry_size, u32 *msi_data, u32 *msi_addr_offset); int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - enum pci_epc_irq_type type, u16 interrupt_num); + unsigned int type, u16 interrupt_num); int pci_epc_start(struct pci_epc *epc); void pci_epc_stop(struct pci_epc *epc); const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc, diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 3f44b6aec477..adee6a1b35db 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -15,6 +15,7 @@ #include <linux/pci.h> struct pci_epf; +struct pci_epc_features; enum pci_epc_interface_type; enum pci_barno { @@ -68,7 +69,7 @@ struct pci_epf_ops { }; /** - * struct pci_epf_event_ops - Callbacks for capturing the EPC events + * struct pci_epc_event_ops - Callbacks for capturing the EPC events * @core_init: Callback for the EPC initialization complete event * @link_up: Callback for the EPC link up event * @link_down: Callback for the EPC link down event @@ -98,7 +99,7 @@ struct pci_epf_driver { void (*remove)(struct pci_epf *epf); struct device_driver driver; - struct pci_epf_ops *ops; + const struct pci_epf_ops *ops; struct module *owner; struct list_head epf_group; const struct pci_epf_device_id *id_table; @@ -216,7 +217,8 @@ int __pci_epf_register_driver(struct pci_epf_driver *driver, struct module *owner); void pci_epf_unregister_driver(struct pci_epf_driver *driver); void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, - size_t align, enum pci_epc_interface_type type); + const struct pci_epc_features *epc_features, + enum pci_epc_interface_type type); void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, enum pci_epc_interface_type type); int pci_epf_bind(struct pci_epf *epf); diff --git a/include/linux/pci.h b/include/linux/pci.h index c69a2cc1f412..16493426a04f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -23,7 +23,7 @@ #ifndef LINUX_PCI_H #define LINUX_PCI_H - +#include <linux/args.h> #include <linux/mod_devicetable.h> #include <linux/types.h> @@ -366,8 +366,8 @@ struct pci_dev { pci_power_t current_state; /* Current operating state. In ACPI, this is D0-D3, D0 being fully functional, and D3 being off. */ - unsigned int imm_ready:1; /* Supports Immediate Readiness */ u8 pm_cap; /* PM capability offset */ + unsigned int imm_ready:1; /* Supports Immediate Readiness */ unsigned int pme_support:5; /* Bitmask of states from which PME# can be generated */ unsigned int pme_poll:1; /* Poll device's PME status bit */ @@ -390,11 +390,11 @@ struct pci_dev { unsigned int d3hot_delay; /* D3hot->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ + u16 l1ss; /* L1SS Capability pointer */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ - u16 l1ss; /* L1SS Capability pointer */ #endif unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */ unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ @@ -464,12 +464,13 @@ struct pci_dev { unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ + unsigned int rom_attr_enabled:1; /* Display of ROM attribute enabled? */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ + spinlock_t pcie_cap_lock; /* Protects RMW ops in capability accessors */ u32 saved_config_space[16]; /* Config space saved at suspend time */ struct hlist_head saved_cap_space; - int rom_attr_enabled; /* Display of ROM attribute enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ @@ -712,6 +713,31 @@ static inline bool pci_is_bridge(struct pci_dev *dev) dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; } +/** + * pci_is_vga - check if the PCI device is a VGA device + * @pdev: PCI device + * + * The PCI Code and ID Assignment spec, r1.15, secs 1.4 and 1.1, define + * VGA Base Class and Sub-Classes: + * + * 03 00 PCI_CLASS_DISPLAY_VGA VGA-compatible or 8514-compatible + * 00 01 PCI_CLASS_NOT_DEFINED_VGA VGA-compatible (before Class Code) + * + * Return true if the PCI device is a VGA device and uses the legacy VGA + * resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], [io 0x3c0-0x3df] and + * aliases). + */ +static inline bool pci_is_vga(struct pci_dev *pdev) +{ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + return true; + + if ((pdev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA) + return true; + + return false; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else @@ -860,7 +886,6 @@ struct module; /** * struct pci_driver - PCI driver structure - * @node: List of driver structures. * @name: Driver name. * @id_table: Pointer to table of device IDs the driver is * interested in. Most drivers should export this @@ -915,7 +940,6 @@ struct module; * own I/O address space. */ struct pci_driver { - struct list_head node; const char *name; const struct pci_device_id *id_table; /* Must be non-NULL for probe to be called */ int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ @@ -1048,11 +1072,13 @@ enum { PCI_SCAN_ALL_PCIE_DEVS = 0x00000040, /* Scan all, not just dev 0 */ }; -#define PCI_IRQ_LEGACY (1 << 0) /* Allow legacy interrupts */ +#define PCI_IRQ_INTX (1 << 0) /* Allow INTx interrupts */ #define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ #define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ #define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ +#define PCI_IRQ_LEGACY PCI_IRQ_INTX /* Deprecated! Use PCI_IRQ_INTX */ + /* These external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI @@ -1145,6 +1171,7 @@ int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); struct pci_dev *pci_dev_get(struct pci_dev *dev); void pci_dev_put(struct pci_dev *dev); +DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) void pci_remove_bus(struct pci_bus *b); void pci_stop_and_remove_bus_device(struct pci_dev *dev); void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev); @@ -1180,6 +1207,8 @@ struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn); struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); +struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from); + int pci_dev_present(const struct pci_device_id *ids); int pci_bus_read_config_byte(struct pci_bus *bus, unsigned int devfn, @@ -1212,16 +1241,47 @@ int pci_read_config_dword(const struct pci_dev *dev, int where, u32 *val); int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val); int pci_write_config_word(const struct pci_dev *dev, int where, u16 val); int pci_write_config_dword(const struct pci_dev *dev, int where, u32 val); +void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos, + u32 clear, u32 set); int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val); int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val); int pcie_capability_write_dword(struct pci_dev *dev, int pos, u32 val); -int pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, - u16 clear, u16 set); +int pcie_capability_clear_and_set_word_unlocked(struct pci_dev *dev, int pos, + u16 clear, u16 set); +int pcie_capability_clear_and_set_word_locked(struct pci_dev *dev, int pos, + u16 clear, u16 set); int pcie_capability_clear_and_set_dword(struct pci_dev *dev, int pos, u32 clear, u32 set); +/** + * pcie_capability_clear_and_set_word - RMW accessor for PCI Express Capability Registers + * @dev: PCI device structure of the PCI Express device + * @pos: PCI Express Capability Register + * @clear: Clear bitmask + * @set: Set bitmask + * + * Perform a Read-Modify-Write (RMW) operation using @clear and @set + * bitmasks on PCI Express Capability Register at @pos. Certain PCI Express + * Capability Registers are accessed concurrently in RMW fashion, hence + * require locking which is handled transparently to the caller. + */ +static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev, + int pos, + u16 clear, u16 set) +{ + switch (pos) { + case PCI_EXP_LNKCTL: + case PCI_EXP_RTCTL: + return pcie_capability_clear_and_set_word_locked(dev, pos, + clear, set); + default: + return pcie_capability_clear_and_set_word_unlocked(dev, pos, + clear, set); + } +} + static inline int pcie_capability_set_word(struct pci_dev *dev, int pos, u16 set) { @@ -1308,6 +1368,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps); u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +int pcie_link_speed_mbps(struct pci_dev *pdev); void pcie_print_link_status(struct pci_dev *dev); int pcie_reset_flr(struct pci_dev *dev, bool probe); int pcie_flr(struct pci_dev *dev); @@ -1361,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev, struct pci_saved_state **state); int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); @@ -1403,7 +1465,6 @@ void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge); void pci_assign_unassigned_bus_resources(struct pci_bus *bus); void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus); int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type); -void pdev_enable_device(struct pci_dev *); int pci_enable_resources(struct pci_dev *, int mask); void pci_assign_irq(struct pci_dev *dev); struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res); @@ -1565,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); @@ -1595,6 +1658,8 @@ struct msix_entry { u16 entry; /* Driver uses to specify entry, OS writes */ }; +struct msi_domain_template; + #ifdef CONFIG_PCI_MSI int pci_msi_vec_count(struct pci_dev *dev); void pci_disable_msi(struct pci_dev *dev); @@ -1627,6 +1692,11 @@ void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); +bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, + unsigned int hwsize, void *data); +struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc); +void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1690,6 +1760,25 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, { return cpu_possible_mask; } + +static inline bool pci_create_ims_domain(struct pci_dev *pdev, + const struct msi_domain_template *template, + unsigned int hwsize, void *data) +{ return false; } + +static inline struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, + union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc) +{ + struct msi_map map = { .index = -ENOSYS, }; + + return map; +} + +static inline void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map) +{ +} + #endif /** @@ -1748,6 +1837,7 @@ extern bool pcie_ports_native; int pci_disable_link_state(struct pci_dev *pdev, int state); int pci_disable_link_state_locked(struct pci_dev *pdev, int state); int pci_enable_link_state(struct pci_dev *pdev, int state); +int pci_enable_link_state_locked(struct pci_dev *pdev, int state); void pcie_no_aspm(void); bool pcie_aspm_support_enabled(void); bool pcie_aspm_enabled(struct pci_dev *pdev); @@ -1758,6 +1848,8 @@ static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { return 0; } static inline int pci_enable_link_state(struct pci_dev *pdev, int state) { return 0; } +static inline int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } @@ -1790,6 +1882,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev); void pci_dev_lock(struct pci_dev *dev); int pci_dev_trylock(struct pci_dev *dev); void pci_dev_unlock(struct pci_dev *dev); +DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), @@ -1895,6 +1988,9 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from) { return NULL; } +static inline struct pci_dev *pci_get_base_class(unsigned int class, + struct pci_dev *from) +{ return NULL; } static inline int pci_dev_present(const struct pci_device_id *ids) { return 0; } @@ -1932,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return 0; } +static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ return 0; } static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) { return 0; } static inline pci_power_t pci_choose_state(struct pci_dev *dev, @@ -2043,14 +2141,14 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma); (pci_resource_end((dev), (bar)) ? \ resource_size(pci_resource_n((dev), (bar))) : 0) -#define __pci_dev_for_each_res0(dev, res, ...) \ - for (unsigned int __b = 0; \ - res = pci_resource_n(dev, __b), __b < PCI_NUM_RESOURCES; \ +#define __pci_dev_for_each_res0(dev, res, ...) \ + for (unsigned int __b = 0; \ + __b < PCI_NUM_RESOURCES && (res = pci_resource_n(dev, __b)); \ __b++) -#define __pci_dev_for_each_res1(dev, res, __b) \ - for (__b = 0; \ - res = pci_resource_n(dev, __b), __b < PCI_NUM_RESOURCES; \ +#define __pci_dev_for_each_res1(dev, res, __b) \ + for (__b = 0; \ + __b < PCI_NUM_RESOURCES && (res = pci_resource_n(dev, __b)); \ __b++) #define pci_dev_for_each_resource(dev, res, ...) \ @@ -2260,6 +2358,11 @@ int pcibios_alloc_irq(struct pci_dev *dev); void pcibios_free_irq(struct pci_dev *dev); resource_size_t pcibios_default_alignment(void); +#if !defined(HAVE_PCI_MMAP) && !defined(ARCH_GENERIC_PCI_MMAP_RESOURCE) +extern int pci_create_resource_files(struct pci_dev *dev); +extern void pci_remove_resource_files(struct pci_dev *dev); +#endif + #if defined(CONFIG_PCI_MMCONFIG) || defined(CONFIG_ACPI_MCFG) void __init pci_mmcfg_early_init(void); void __init pci_mmcfg_late_init(void); @@ -2414,6 +2517,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) return NULL; } +static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +{ + return dev->error_state == pci_channel_io_perm_failure; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, @@ -2582,14 +2690,6 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif -struct msi_domain_template; - -bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, - unsigned int hwsize, void *data); -struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, - const struct irq_affinity_desc *affdesc); -void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); - #include <linux/dma-mapping.h> #define pci_printk(level, pdev, fmt, arg...) \ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2dc75df1437f..c547d1d4feb1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -180,6 +180,8 @@ #define PCI_DEVICE_ID_BERKOM_A4T 0xffa4 #define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8 +#define PCI_VENDOR_ID_ITTIM 0x0b48 + #define PCI_VENDOR_ID_COMPAQ 0x0e11 #define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508 #define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc @@ -576,7 +578,12 @@ #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb +#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 +#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb +#define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3 0x12bb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 +#define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b +#define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 @@ -1760,6 +1767,10 @@ #define PCI_SUBDEVICE_ID_AT_2700FX 0x2701 #define PCI_SUBDEVICE_ID_AT_2701FX 0x2703 +#define PCI_VENDOR_ID_ASIX 0x125b +#define PCI_DEVICE_ID_ASIX_AX99100 0x9100 +#define PCI_DEVICE_ID_ASIX_AX99100_LB 0x9110 + #define PCI_VENDOR_ID_ESS 0x125d #define PCI_DEVICE_ID_ESS_ESS1968 0x1968 #define PCI_DEVICE_ID_ESS_ESS1978 0x1978 @@ -2595,6 +2606,8 @@ #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 +#define PCI_VENDOR_ID_ALIBABA 0x1ded + #define PCI_VENDOR_ID_TEHUTI 0x1fc9 #define PCI_DEVICE_ID_TEHUTI_3009 0x3009 #define PCI_DEVICE_ID_TEHUTI_3010 0x3010 @@ -2644,6 +2657,7 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 +#define PCI_DEVICE_ID_INTEL_HDA_CML_LP 0x02c8 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 @@ -2659,8 +2673,10 @@ #define PCI_DEVICE_ID_INTEL_82424 0x0483 #define PCI_DEVICE_ID_INTEL_82378 0x0484 #define PCI_DEVICE_ID_INTEL_82425 0x0486 +#define PCI_DEVICE_ID_INTEL_HDA_CML_H 0x06c8 #define PCI_DEVICE_ID_INTEL_MRST_SD0 0x0807 #define PCI_DEVICE_ID_INTEL_MRST_SD1 0x0808 +#define PCI_DEVICE_ID_INTEL_HDA_OAKTRAIL 0x080a #define PCI_DEVICE_ID_INTEL_MFD_SD 0x0820 #define PCI_DEVICE_ID_INTEL_MFD_SDIO1 0x0821 #define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822 @@ -2670,15 +2686,19 @@ #define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095e #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 +#define PCI_DEVICE_ID_INTEL_HDA_HSW_0 0x0a0c +#define PCI_DEVICE_ID_INTEL_HDA_HSW_2 0x0c0c #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60 +#define PCI_DEVICE_ID_INTEL_HDA_HSW_3 0x0d0c +#define PCI_DEVICE_ID_INTEL_HDA_BYT 0x0f04 +#define PCI_DEVICE_ID_INTEL_SST_BYT 0x0f28 #define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 #define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085 #define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108f #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 +#define PCI_DEVICE_ID_INTEL_SST_TNG 0x119a #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 -#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 -#define PCI_DEVICE_ID_INTEL_7205_0 0x255d #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e #define PCI_DEVICE_ID_INTEL_82371FB_1 0x1230 @@ -2704,20 +2724,26 @@ #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_BRIDGE 0x1576 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI 0x1577 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE 0x1578 +#define PCI_DEVICE_ID_INTEL_HDA_BDW 0x160c #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 #define PCI_DEVICE_ID_INTEL_QAT_C3XXX 0x19e2 #define PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF 0x19e3 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 #define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 +#define PCI_DEVICE_ID_INTEL_HDA_CPT 0x1c20 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN 0x1c41 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f +#define PCI_DEVICE_ID_INTEL_HDA_PBG 0x1d20 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 +#define PCI_DEVICE_ID_INTEL_HDA_PPT 0x1e20 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI 0x1e31 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN 0x1e40 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX 0x1e5f #define PCI_DEVICE_ID_INTEL_VMD_201D 0x201d +#define PCI_DEVICE_ID_INTEL_HDA_BSW 0x2284 +#define PCI_DEVICE_ID_INTEL_SST_BSW 0x22a8 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN 0x2310 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX 0x231f #define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 @@ -2772,6 +2798,8 @@ #define PCI_DEVICE_ID_INTEL_82850_HB 0x2530 #define PCI_DEVICE_ID_INTEL_82860_HB 0x2531 #define PCI_DEVICE_ID_INTEL_E7501_MCH 0x254c +#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 +#define PCI_DEVICE_ID_INTEL_7205_0 0x255d #define PCI_DEVICE_ID_INTEL_82845G_HB 0x2560 #define PCI_DEVICE_ID_INTEL_82845G_IG 0x2562 #define PCI_DEVICE_ID_INTEL_82865_HB 0x2570 @@ -2793,12 +2821,14 @@ #define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640 #define PCI_DEVICE_ID_INTEL_ICH6_1 0x2641 #define PCI_DEVICE_ID_INTEL_ICH6_2 0x2642 +#define PCI_DEVICE_ID_INTEL_HDA_ICH6 0x2668 #define PCI_DEVICE_ID_INTEL_ICH6_16 0x266a #define PCI_DEVICE_ID_INTEL_ICH6_17 0x266d #define PCI_DEVICE_ID_INTEL_ICH6_18 0x266e #define PCI_DEVICE_ID_INTEL_ICH6_19 0x266f #define PCI_DEVICE_ID_INTEL_ESB2_0 0x2670 #define PCI_DEVICE_ID_INTEL_ESB2_14 0x2698 +#define PCI_DEVICE_ID_INTEL_HDA_ESB2 0x269a #define PCI_DEVICE_ID_INTEL_ESB2_17 0x269b #define PCI_DEVICE_ID_INTEL_ESB2_18 0x269e #define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 @@ -2806,11 +2836,12 @@ #define PCI_DEVICE_ID_INTEL_3000_HB 0x2778 #define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27a0 #define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27a2 +#define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 #define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8 #define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9 -#define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 #define PCI_DEVICE_ID_INTEL_TGP_LPC 0x27bc #define PCI_DEVICE_ID_INTEL_ICH7_31 0x27bd +#define PCI_DEVICE_ID_INTEL_HDA_ICH7 0x27d8 #define PCI_DEVICE_ID_INTEL_ICH7_17 0x27da #define PCI_DEVICE_ID_INTEL_ICH7_19 0x27dd #define PCI_DEVICE_ID_INTEL_ICH7_20 0x27de @@ -2821,17 +2852,20 @@ #define PCI_DEVICE_ID_INTEL_ICH8_3 0x2814 #define PCI_DEVICE_ID_INTEL_ICH8_4 0x2815 #define PCI_DEVICE_ID_INTEL_ICH8_5 0x283e +#define PCI_DEVICE_ID_INTEL_HDA_ICH8 0x284b #define PCI_DEVICE_ID_INTEL_ICH8_6 0x2850 #define PCI_DEVICE_ID_INTEL_VMD_28C0 0x28c0 #define PCI_DEVICE_ID_INTEL_ICH9_0 0x2910 -#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917 #define PCI_DEVICE_ID_INTEL_ICH9_2 0x2912 #define PCI_DEVICE_ID_INTEL_ICH9_3 0x2913 #define PCI_DEVICE_ID_INTEL_ICH9_4 0x2914 -#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919 -#define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930 #define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916 +#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917 #define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918 +#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919 +#define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930 +#define PCI_DEVICE_ID_INTEL_HDA_ICH9_0 0x293e +#define PCI_DEVICE_ID_INTEL_HDA_ICH9_1 0x293f #define PCI_DEVICE_ID_INTEL_I7_MCR 0x2c18 #define PCI_DEVICE_ID_INTEL_I7_MC_TAD 0x2c19 #define PCI_DEVICE_ID_INTEL_I7_MC_RAS 0x2c1a @@ -2848,8 +2882,8 @@ #define PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR 0x2c31 #define PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK 0x2c32 #define PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC 0x2c33 -#define PCI_DEVICE_ID_INTEL_I7_NONCORE 0x2c41 #define PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT 0x2c40 +#define PCI_DEVICE_ID_INTEL_I7_NONCORE 0x2c41 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE 0x2c50 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT 0x2c51 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2 0x2c70 @@ -2883,6 +2917,7 @@ #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2 0x2db1 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2 0x2db2 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2 0x2db3 +#define PCI_DEVICE_ID_INTEL_HDA_GML 0x3198 #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340 #define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429 #define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a @@ -2893,12 +2928,13 @@ #define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431 #define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432 #define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 +#define PCI_DEVICE_ID_INTEL_HDA_ICL_LP 0x34c8 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 -#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c -#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 #define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582 +#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c +#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e #define PCI_DEVICE_ID_INTEL_E7520_MCH 0x3590 #define PCI_DEVICE_ID_INTEL_E7320_MCH 0x3592 #define PCI_DEVICE_ID_INTEL_MCH_PA 0x3595 @@ -2908,11 +2944,11 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC 0x3599 #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e +#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b +#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_I7300_MCH_ERR 0x360c #define PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 0x360f #define PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 0x3610 -#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b -#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 #define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 #define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 @@ -2925,14 +2961,19 @@ #define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 #define PCI_DEVICE_ID_INTEL_QAT_C62X 0x37c8 #define PCI_DEVICE_ID_INTEL_QAT_C62X_VF 0x37c9 +#define PCI_DEVICE_ID_INTEL_HDA_ICL_N 0x38c8 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 #define PCI_DEVICE_ID_INTEL_ICH10_3 0x3a1a #define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30 +#define PCI_DEVICE_ID_INTEL_HDA_ICH10_0 0x3a3e #define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 +#define PCI_DEVICE_ID_INTEL_HDA_ICH10_1 0x3a6e #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN 0x3b00 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX 0x3b1f +#define PCI_DEVICE_ID_INTEL_HDA_5_3400_SERIES_0 0x3b56 +#define PCI_DEVICE_ID_INTEL_HDA_5_3400_SERIES_1 0x3b57 #define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20 #define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21 #define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22 @@ -2943,16 +2984,12 @@ #define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27 #define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e #define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f -#define PCI_DEVICE_ID_INTEL_UNC_HA 0x3c46 -#define PCI_DEVICE_ID_INTEL_UNC_IMC0 0x3cb0 -#define PCI_DEVICE_ID_INTEL_UNC_IMC1 0x3cb1 -#define PCI_DEVICE_ID_INTEL_UNC_IMC2 0x3cb4 -#define PCI_DEVICE_ID_INTEL_UNC_IMC3 0x3cb5 #define PCI_DEVICE_ID_INTEL_UNC_QPI0 0x3c41 #define PCI_DEVICE_ID_INTEL_UNC_QPI1 0x3c42 #define PCI_DEVICE_ID_INTEL_UNC_R2PCIE 0x3c43 #define PCI_DEVICE_ID_INTEL_UNC_R3QPI0 0x3c44 #define PCI_DEVICE_ID_INTEL_UNC_R3QPI1 0x3c45 +#define PCI_DEVICE_ID_INTEL_UNC_HA 0x3c46 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */ @@ -2964,17 +3001,40 @@ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */ +#define PCI_DEVICE_ID_INTEL_UNC_IMC0 0x3cb0 +#define PCI_DEVICE_ID_INTEL_UNC_IMC1 0x3cb1 +#define PCI_DEVICE_ID_INTEL_UNC_IMC2 0x3cb4 +#define PCI_DEVICE_ID_INTEL_UNC_IMC3 0x3cb5 #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */ #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */ +#define PCI_DEVICE_ID_INTEL_HDA_ICL_H 0x3dc8 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 #define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035 #define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036 +#define PCI_DEVICE_ID_INTEL_HDA_TGL_H 0x43c8 +#define PCI_DEVICE_ID_INTEL_HDA_DG1 0x490d +#define PCI_DEVICE_ID_INTEL_HDA_EHL_0 0x4b55 +#define PCI_DEVICE_ID_INTEL_HDA_EHL_3 0x4b58 +#define PCI_DEVICE_ID_INTEL_HDA_JSL_N 0x4dc8 +#define PCI_DEVICE_ID_INTEL_HDA_DG2_0 0x4f90 +#define PCI_DEVICE_ID_INTEL_HDA_DG2_1 0x4f91 +#define PCI_DEVICE_ID_INTEL_HDA_DG2_2 0x4f92 #define PCI_DEVICE_ID_INTEL_EP80579_0 0x5031 #define PCI_DEVICE_ID_INTEL_EP80579_1 0x5032 +#define PCI_DEVICE_ID_INTEL_HDA_ADL_P 0x51c8 +#define PCI_DEVICE_ID_INTEL_HDA_ADL_PS 0x51c9 +#define PCI_DEVICE_ID_INTEL_HDA_RPL_P_0 0x51ca +#define PCI_DEVICE_ID_INTEL_HDA_RPL_P_1 0x51cb +#define PCI_DEVICE_ID_INTEL_HDA_ADL_M 0x51cc +#define PCI_DEVICE_ID_INTEL_HDA_ADL_PX 0x51cd +#define PCI_DEVICE_ID_INTEL_HDA_RPL_M 0x51ce +#define PCI_DEVICE_ID_INTEL_HDA_RPL_PX 0x51cf +#define PCI_DEVICE_ID_INTEL_HDA_ADL_N 0x54c8 +#define PCI_DEVICE_ID_INTEL_HDA_APL 0x5a98 #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 #define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 @@ -3008,8 +3068,14 @@ #define PCI_DEVICE_ID_INTEL_82443GX_0 0x71a0 #define PCI_DEVICE_ID_INTEL_82443GX_2 0x71a2 #define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601 +#define PCI_DEVICE_ID_INTEL_HDA_ARL 0x7728 +#define PCI_DEVICE_ID_INTEL_HDA_RPL_S 0x7a50 +#define PCI_DEVICE_ID_INTEL_HDA_ADL_S 0x7ad0 +#define PCI_DEVICE_ID_INTEL_HDA_MTL 0x7e28 +#define PCI_DEVICE_ID_INTEL_HDA_ARL_S 0x7f50 #define PCI_DEVICE_ID_INTEL_SCH_LPC 0x8119 #define PCI_DEVICE_ID_INTEL_SCH_IDE 0x811a +#define PCI_DEVICE_ID_INTEL_HDA_POULSBO 0x811b #define PCI_DEVICE_ID_INTEL_E6XX_CU 0x8183 #define PCI_DEVICE_ID_INTEL_ITC_LPC 0x8186 #define PCI_DEVICE_ID_INTEL_82454GX 0x84c4 @@ -3018,9 +3084,31 @@ #define PCI_DEVICE_ID_INTEL_82454NX 0x84cb #define PCI_DEVICE_ID_INTEL_84460GX 0x84ea #define PCI_DEVICE_ID_INTEL_IXP4XX 0x8500 +#define PCI_DEVICE_ID_INTEL_HDA_LPT 0x8c20 +#define PCI_DEVICE_ID_INTEL_HDA_9_SERIES 0x8ca0 +#define PCI_DEVICE_ID_INTEL_HDA_WBG_0 0x8d20 +#define PCI_DEVICE_ID_INTEL_HDA_WBG_1 0x8d21 #define PCI_DEVICE_ID_INTEL_IXP2800 0x9004 +#define PCI_DEVICE_ID_INTEL_HDA_LKF 0x98c8 #define PCI_DEVICE_ID_INTEL_VMD_9A0B 0x9a0b +#define PCI_DEVICE_ID_INTEL_HDA_LPT_LP_0 0x9c20 +#define PCI_DEVICE_ID_INTEL_HDA_LPT_LP_1 0x9c21 +#define PCI_DEVICE_ID_INTEL_HDA_WPT_LP 0x9ca0 +#define PCI_DEVICE_ID_INTEL_HDA_SKL_LP 0x9d70 +#define PCI_DEVICE_ID_INTEL_HDA_KBL_LP 0x9d71 +#define PCI_DEVICE_ID_INTEL_HDA_CNL_LP 0x9dc8 +#define PCI_DEVICE_ID_INTEL_HDA_TGL_LP 0xa0c8 +#define PCI_DEVICE_ID_INTEL_HDA_SKL 0xa170 +#define PCI_DEVICE_ID_INTEL_HDA_KBL 0xa171 +#define PCI_DEVICE_ID_INTEL_HDA_LBG_0 0xa1f0 +#define PCI_DEVICE_ID_INTEL_HDA_LBG_1 0xa270 +#define PCI_DEVICE_ID_INTEL_HDA_KBL_H 0xa2f0 +#define PCI_DEVICE_ID_INTEL_HDA_CNL_H 0xa348 +#define PCI_DEVICE_ID_INTEL_HDA_CML_S 0xa3f0 +#define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 +#define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 +#define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 #define PCI_VENDOR_ID_WANGXUN 0x8088 diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index ff99cf7a5d0d..da3a6c30f6d2 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -20,12 +20,20 @@ #define DW_AN_C37_1000BASEX 4 #define DW_10GBASER 5 +/* device vendor OUI */ +#define DW_OUI_WX 0x0018fc80 + +/* dev_flag */ +#define DW_DEV_TXGBE BIT(0) + struct xpcs_id; struct dw_xpcs { struct mdio_device *mdiodev; const struct xpcs_id *id; struct phylink_pcs pcs; + phy_interface_t interface; + int dev_flag; }; int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index bcba7fda3cc9..4b4e9a98b37b 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -818,6 +818,367 @@ struct pds_vdpa_set_features_cmd { __le64 features; }; +#define PDS_LM_DEVICE_STATE_LENGTH 65536 +#define PDS_LM_CHECK_DEVICE_STATE_LENGTH(X) \ + PDS_CORE_SIZE_CHECK(union, PDS_LM_DEVICE_STATE_LENGTH, X) + +/* + * enum pds_lm_cmd_opcode - Live Migration Device commands + */ +enum pds_lm_cmd_opcode { + PDS_LM_CMD_HOST_VF_STATUS = 1, + + /* Device state commands */ + PDS_LM_CMD_STATE_SIZE = 16, + PDS_LM_CMD_SUSPEND = 18, + PDS_LM_CMD_SUSPEND_STATUS = 19, + PDS_LM_CMD_RESUME = 20, + PDS_LM_CMD_SAVE = 21, + PDS_LM_CMD_RESTORE = 22, + + /* Dirty page tracking commands */ + PDS_LM_CMD_DIRTY_STATUS = 32, + PDS_LM_CMD_DIRTY_ENABLE = 33, + PDS_LM_CMD_DIRTY_DISABLE = 34, + PDS_LM_CMD_DIRTY_READ_SEQ = 35, + PDS_LM_CMD_DIRTY_WRITE_ACK = 36, +}; + +/** + * struct pds_lm_cmd - generic command + * @opcode: Opcode + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Structure padding to 60 Bytes + */ +struct pds_lm_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[56]; +}; + +/** + * struct pds_lm_state_size_cmd - STATE_SIZE command + * @opcode: Opcode + * @rsvd: Word boundary padding + * @vf_id: VF id + */ +struct pds_lm_state_size_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_state_size_comp - STATE_SIZE command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the desc ring for which this is the completion + * @size: Size of the device state + * @rsvd2: Word boundary padding + * @color: Color bit + */ +struct pds_lm_state_size_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + __le64 size; + u8 rsvd2[11]; + } __packed; + u8 color; +}; + +enum pds_lm_suspend_resume_type { + PDS_LM_SUSPEND_RESUME_TYPE_FULL = 0, + PDS_LM_SUSPEND_RESUME_TYPE_P2P = 1, +}; + +/** + * struct pds_lm_suspend_cmd - SUSPEND command + * @opcode: Opcode PDS_LM_CMD_SUSPEND + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of suspend (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_suspend_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_suspend_status_cmd - SUSPEND status command + * @opcode: Opcode PDS_AQ_CMD_LM_SUSPEND_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of suspend (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_suspend_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_resume_cmd - RESUME command + * @opcode: Opcode PDS_LM_CMD_RESUME + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of resume (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_resume_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_sg_elem - Transmit scatter-gather (SG) descriptor element + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + * @rsvd: Word boundary padding + */ +struct pds_lm_sg_elem { + __le64 addr; + __le32 len; + __le16 rsvd[2]; +}; + +/** + * struct pds_lm_save_cmd - SAVE command + * @opcode: Opcode PDS_LM_CMD_SAVE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Word boundary padding + * @sgl_addr: IOVA address of the SGL to dma the device state + * @num_sge: Total number of SG elements + */ +struct pds_lm_save_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[4]; + __le64 sgl_addr; + __le32 num_sge; +} __packed; + +/** + * struct pds_lm_restore_cmd - RESTORE command + * @opcode: Opcode PDS_LM_CMD_RESTORE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Word boundary padding + * @sgl_addr: IOVA address of the SGL to dma the device state + * @num_sge: Total number of SG elements + */ +struct pds_lm_restore_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[4]; + __le64 sgl_addr; + __le32 num_sge; +} __packed; + +/** + * union pds_lm_dev_state - device state information + * @words: Device state words + */ +union pds_lm_dev_state { + __le32 words[PDS_LM_DEVICE_STATE_LENGTH / sizeof(__le32)]; +}; + +enum pds_lm_host_vf_status { + PDS_LM_STA_NONE = 0, + PDS_LM_STA_IN_PROGRESS, + PDS_LM_STA_MAX, +}; + +/** + * struct pds_lm_dirty_region_info - Memory region info for STATUS and ENABLE + * @dma_base: Base address of the DMA-contiguous memory region + * @page_count: Number of pages in the memory region + * @page_size_log2: Log2 page size in the memory region + * @rsvd: Word boundary padding + */ +struct pds_lm_dirty_region_info { + __le64 dma_base; + __le32 page_count; + u8 page_size_log2; + u8 rsvd[3]; +}; + +/** + * struct pds_lm_dirty_status_cmd - DIRTY_STATUS command + * @opcode: Opcode PDS_LM_CMD_DIRTY_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @max_regions: Capacity of the region info buffer + * @rsvd2: Word boundary padding + * @regions_dma: DMA address of the region info buffer + * + * The minimum of max_regions (from the command) and num_regions (from the + * completion) of struct pds_lm_dirty_region_info will be written to + * regions_dma. + * + * The max_regions may be zero, in which case regions_dma is ignored. In that + * case, the completion will only report the maximum number of regions + * supported by the device, and the number of regions currently enabled. + */ +struct pds_lm_dirty_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 max_regions; + u8 rsvd2[3]; + __le64 regions_dma; +} __packed; + +/** + * enum pds_lm_dirty_bmp_type - Type of dirty page bitmap + * @PDS_LM_DIRTY_BMP_TYPE_NONE: No bitmap / disabled + * @PDS_LM_DIRTY_BMP_TYPE_SEQ_ACK: Seq/Ack bitmap representation + */ +enum pds_lm_dirty_bmp_type { + PDS_LM_DIRTY_BMP_TYPE_NONE = 0, + PDS_LM_DIRTY_BMP_TYPE_SEQ_ACK = 1, +}; + +/** + * struct pds_lm_dirty_status_comp - STATUS command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the desc ring for which this is the completion + * @max_regions: Maximum number of regions supported by the device + * @num_regions: Number of regions currently enabled + * @bmp_type: Type of dirty bitmap representation + * @rsvd2: Word boundary padding + * @bmp_type_mask: Mask of supported bitmap types, bit index per type + * @rsvd3: Word boundary padding + * @color: Color bit + * + * This completion descriptor is used for STATUS, ENABLE, and DISABLE. + */ +struct pds_lm_dirty_status_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 max_regions; + u8 num_regions; + u8 bmp_type; + u8 rsvd2; + __le32 bmp_type_mask; + u8 rsvd3[3]; + u8 color; +}; + +/** + * struct pds_lm_dirty_enable_cmd - DIRTY_ENABLE command + * @opcode: Opcode PDS_LM_CMD_DIRTY_ENABLE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @bmp_type: Type of dirty bitmap representation + * @num_regions: Number of entries in the region info buffer + * @rsvd2: Word boundary padding + * @regions_dma: DMA address of the region info buffer + * + * The num_regions must be nonzero, and less than or equal to the maximum + * number of regions supported by the device. + * + * The memory regions should not overlap. + * + * The information should be initialized by the driver. The device may modify + * the information on successful completion, such as by size-aligning the + * number of pages in a region. + * + * The modified number of pages will be greater than or equal to the page count + * given in the enable command, and at least as coarsly aligned as the given + * value. For example, the count might be aligned to a multiple of 64, but + * if the value is already a multiple of 128 or higher, it will not change. + * If the driver requires its own minimum alignment of the number of pages, the + * driver should account for that already in the region info of this command. + * + * This command uses struct pds_lm_dirty_status_comp for its completion. + */ +struct pds_lm_dirty_enable_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 bmp_type; + u8 num_regions; + u8 rsvd2[2]; + __le64 regions_dma; +} __packed; + +/** + * struct pds_lm_dirty_disable_cmd - DIRTY_DISABLE command + * @opcode: Opcode PDS_LM_CMD_DIRTY_DISABLE + * @rsvd: Word boundary padding + * @vf_id: VF id + * + * Dirty page tracking will be disabled. This may be called in any state, as + * long as dirty page tracking is supported by the device, to ensure that dirty + * page tracking is disabled. + * + * This command uses struct pds_lm_dirty_status_comp for its completion. On + * success, num_regions will be zero. + */ +struct pds_lm_dirty_disable_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_dirty_seq_ack_cmd - DIRTY_READ_SEQ or _WRITE_ACK command + * @opcode: Opcode PDS_LM_CMD_DIRTY_[READ_SEQ|WRITE_ACK] + * @rsvd: Word boundary padding + * @vf_id: VF id + * @off_bytes: Byte offset in the bitmap + * @len_bytes: Number of bytes to transfer + * @num_sge: Number of DMA scatter gather elements + * @rsvd2: Word boundary padding + * @sgl_addr: DMA address of scatter gather list + * + * Read bytes from the SEQ bitmap, or write bytes into the ACK bitmap. + * + * This command treats the entire bitmap as a byte buffer. It does not + * distinguish between guest memory regions. The driver should refer to the + * number of pages in each region, according to PDS_LM_CMD_DIRTY_STATUS, to + * determine the region boundaries in the bitmap. Each region will be + * represented by exactly the number of bits as the page count for that region, + * immediately following the last bit of the previous region. + */ +struct pds_lm_dirty_seq_ack_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + __le32 off_bytes; + __le32 len_bytes; + __le16 num_sge; + u8 rsvd2[2]; + __le64 sgl_addr; +} __packed; + +/** + * struct pds_lm_host_vf_status_cmd - HOST_VF_STATUS command + * @opcode: Opcode PDS_LM_CMD_HOST_VF_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @status: Current LM status of host VF driver (enum pds_lm_host_status) + */ +struct pds_lm_host_vf_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 status; +}; + union pds_core_adminq_cmd { u8 opcode; u8 bytes[64]; @@ -844,6 +1205,17 @@ union pds_core_adminq_cmd { struct pds_vdpa_vq_init_cmd vdpa_vq_init; struct pds_vdpa_vq_reset_cmd vdpa_vq_reset; + struct pds_lm_suspend_cmd lm_suspend; + struct pds_lm_suspend_status_cmd lm_suspend_status; + struct pds_lm_resume_cmd lm_resume; + struct pds_lm_state_size_cmd lm_state_size; + struct pds_lm_save_cmd lm_save; + struct pds_lm_restore_cmd lm_restore; + struct pds_lm_host_vf_status_cmd lm_host_vf_status; + struct pds_lm_dirty_status_cmd lm_dirty_status; + struct pds_lm_dirty_enable_cmd lm_dirty_enable; + struct pds_lm_dirty_disable_cmd lm_dirty_disable; + struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack; }; union pds_core_adminq_comp { @@ -868,6 +1240,9 @@ union pds_core_adminq_comp { struct pds_vdpa_vq_init_comp vdpa_vq_init; struct pds_vdpa_vq_reset_comp vdpa_vq_reset; + + struct pds_lm_state_size_comp lm_state_size; + struct pds_lm_dirty_status_comp lm_dirty_status; }; #ifndef __CHECKER__ diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 435c8e8161c2..30581e2e04cc 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -34,16 +34,19 @@ enum pds_core_vif_types { #define PDS_DEV_TYPE_CORE_STR "Core" #define PDS_DEV_TYPE_VDPA_STR "vDPA" -#define PDS_DEV_TYPE_VFIO_STR "VFio" +#define PDS_DEV_TYPE_VFIO_STR "vfio" #define PDS_DEV_TYPE_ETH_STR "Eth" #define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_LM_STR "LM" #define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR +#define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR + +struct pdsc; int pdsc_register_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb); void *pdsc_get_pf_struct(struct pci_dev *vf_pdev); -int pds_client_register(struct pci_dev *pf_pdev, char *devname); -int pds_client_unregister(struct pci_dev *pf_pdev, u16 client_id); +int pds_client_register(struct pdsc *pf, char *devname); +int pds_client_unregister(struct pdsc *pf, u16 client_id); #endif /* _PDS_COMMON_H_ */ diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h index e838a2b90440..17a87c1a55d7 100644 --- a/include/linux/pds/pds_core_if.h +++ b/include/linux/pds/pds_core_if.h @@ -79,6 +79,7 @@ enum pds_core_status_code { PDS_RC_EVFID = 31, /* VF ID does not exist */ PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */ PDS_RC_ECLIENT = 33, /* No such client id */ + PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */ }; /** diff --git a/include/linux/peci.h b/include/linux/peci.h index 06e6ef935297..90e241458ef6 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -42,13 +42,13 @@ struct peci_controller_ops { */ struct peci_controller { struct device dev; - struct peci_controller_ops *ops; + const struct peci_controller_ops *ops; struct mutex bus_lock; /* held for the duration of xfer */ u8 id; }; struct peci_controller *devm_peci_controller_add(struct device *parent, - struct peci_controller_ops *ops); + const struct peci_controller_ops *ops); static inline struct peci_controller *to_peci_controller(void *d) { @@ -58,7 +58,6 @@ static inline struct peci_controller *to_peci_controller(void *d) /** * struct peci_device - PECI device * @dev: device object to register PECI device to the device model - * @controller: manages the bus segment hosting this PECI device * @info: PECI device characteristics * @info.family: device family * @info.model: device model diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b3b458442330..8c677f185901 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -35,6 +35,12 @@ #define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \ PCPU_MIN_ALLOC_SHIFT) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#define PERCPU_DYNAMIC_SIZE_SHIFT 12 +#else +#define PERCPU_DYNAMIC_SIZE_SHIFT 10 +#endif + /* * Percpu allocator can serve percpu allocations before slab is * initialized which allows slab to depend on the percpu allocator. @@ -42,7 +48,7 @@ * for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than * PERCPU_DYNAMIC_EARLY_SIZE. */ -#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10) +#define PERCPU_DYNAMIC_EARLY_SIZE (20 << PERCPU_DYNAMIC_SIZE_SHIFT) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy @@ -56,9 +62,9 @@ * intelligent way to determine this would be nice. */ #if BITS_PER_LONG > 32 -#define PERCPU_DYNAMIC_RESERVE (28 << 10) +#define PERCPU_DYNAMIC_RESERVE (28 << PERCPU_DYNAMIC_SIZE_SHIFT) #else -#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#define PERCPU_DYNAMIC_RESERVE (20 << PERCPU_DYNAMIC_SIZE_SHIFT) #endif extern void *pcpu_base_addr; @@ -126,6 +132,7 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); +extern size_t pcpu_alloc_size(void __percpu *__pdata); DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 75b73c83bc9d..3a44dd1e33d2 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -30,22 +30,35 @@ struct percpu_counter { extern int percpu_counter_batch; -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, - struct lock_class_key *key); +int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, + gfp_t gfp, u32 nr_counters, + struct lock_class_key *key); -#define percpu_counter_init(fbc, value, gfp) \ +#define percpu_counter_init_many(fbc, value, gfp, nr_counters) \ ({ \ static struct lock_class_key __key; \ \ - __percpu_counter_init(fbc, value, gfp, &__key); \ + __percpu_counter_init_many(fbc, value, gfp, nr_counters,\ + &__key); \ }) -void percpu_counter_destroy(struct percpu_counter *fbc); + +#define percpu_counter_init(fbc, value, gfp) \ + percpu_counter_init_many(fbc, value, gfp, 1) + +void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters); +static inline void percpu_counter_destroy(struct percpu_counter *fbc) +{ + percpu_counter_destroy_many(fbc, 1); +} + void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); +bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, + s64 amount, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) @@ -58,6 +71,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } +static inline bool +percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount) +{ + return __percpu_counter_limited_add(fbc, limit, amount, + percpu_counter_batch); +} + /* * With percpu_counter_add_local() and percpu_counter_sub_local(), counts * are accumulated in local per cpu counter and not in fbc->count until @@ -116,11 +136,27 @@ struct percpu_counter { s64 count; }; +static inline int percpu_counter_init_many(struct percpu_counter *fbc, + s64 amount, gfp_t gfp, + u32 nr_counters) +{ + u32 i; + + for (i = 0; i < nr_counters; i++) + fbc[i].count = amount; + + return 0; +} + static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp) { - fbc->count = amount; - return 0; + return percpu_counter_init_many(fbc, amount, gfp, 1); +} + +static inline void percpu_counter_destroy_many(struct percpu_counter *fbc, + u32 nr_counters) +{ } static inline void percpu_counter_destroy(struct percpu_counter *fbc) @@ -158,6 +194,27 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) local_irq_restore(flags); } +static inline bool +percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount) +{ + unsigned long flags; + bool good = false; + s64 count; + + if (amount == 0) + return true; + + local_irq_save(flags); + count = fbc->count + amount; + if ((amount > 0 && count <= limit) || + (amount < 0 && count >= limit)) { + fbc->count = count; + good = true; + } + local_irq_restore(flags); + return good; +} + /* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ static inline void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index a0801f68762b..b3b34f6670cf 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -60,12 +60,6 @@ struct pmu_hw_events { DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); /* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ - raw_spinlock_t pmu_lock; - - /* * When using percpu IRQs, we need a percpu dev_id. Place it here as we * already have to allocate this struct per cpu. */ @@ -187,5 +181,28 @@ void armpmu_free_irq(int irq, int cpu); #endif /* CONFIG_ARM_PMU */ #define ARMV8_SPE_PDEV_NAME "arm,spe-v1" +#define ARMV8_TRBE_PDEV_NAME "arm,trbe" + +/* Why does everything I do descend into this? */ +#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ + (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi + +#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ + __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) + +#define GEN_PMU_FORMAT_ATTR(name) \ + PMU_FORMAT_ATTR(name, \ + _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ + ATTR_CFG_FLD_##name##_LO, \ + ATTR_CFG_FLD_##name##_HI)) + +#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ + ((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0)) + +#define ATTR_CFG_GET_FLD(attr, name) \ + _ATTR_CFG_GET_FLD(attr, \ + ATTR_CFG_FLD_##name##_CFG, \ + ATTR_CFG_FLD_##name##_LO, \ + ATTR_CFG_FLD_##name##_HI) #endif /* __ARM_PMU_H__ */ diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index e3899bd77f5c..46377e134d67 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -215,45 +215,54 @@ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ #define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ -#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */ +/* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \ + ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \ + ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \ + ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP) /* * PMOVSR: counters overflow flag status reg */ -#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK +#define ARMV8_PMU_OVSR_P GENMASK(30, 0) +#define ARMV8_PMU_OVSR_C BIT(31) +/* Mask for writable bits is both P and C fields */ +#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C) /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ -#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_TH GENMASK_ULL(43, 32) /* arm64 only */ +#define ARMV8_PMU_EVTYPE_TC GENMASK_ULL(63, 61) /* arm64 only */ /* * Event filters for PMUv3 */ -#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29) +#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26) /* * PMUSERENR: user enable reg */ -#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ #define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ #define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* Mask for writable bits */ +#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \ + ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER) /* PMMIR_EL1.SLOTS mask */ -#define ARMV8_PMU_SLOTS_MASK 0xff - -#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 -#define ARMV8_PMU_BUS_SLOTS_MASK 0xff -#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 -#define ARMV8_PMU_BUS_WIDTH_MASK 0xf +#define ARMV8_PMU_SLOTS GENMASK(7, 0) +#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8) +#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16) +#define ARMV8_PMU_THWIDTH GENMASK(23, 20) /* * This code is really good diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 43fc892aa7d9..43282e22ebe1 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -6,8 +6,8 @@ * */ -#ifndef _ASM_RISCV_PERF_EVENT_H -#define _ASM_RISCV_PERF_EVENT_H +#ifndef _RISCV_PMU_H +#define _RISCV_PMU_H #include <linux/perf_event.h> #include <linux/ptrace.h> @@ -21,7 +21,7 @@ #define RISCV_MAX_COUNTERS 64 #define RISCV_OP_UNSUPP (-EOPNOTSUPP) -#define RISCV_PMU_PDEV_NAME "riscv-pmu" +#define RISCV_PMU_SBI_PDEV_NAME "riscv-pmu-sbi" #define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy" #define RISCV_PMU_STOP_FLAG_RESET 1 @@ -55,6 +55,10 @@ struct riscv_pmu { void (*ctr_start)(struct perf_event *event, u64 init_val); void (*ctr_stop)(struct perf_event *event, unsigned long flag); int (*event_map)(struct perf_event *event, u64 *config); + void (*event_init)(struct perf_event *event); + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm); + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm); + uint8_t (*csr_index)(struct perf_event *event); struct cpu_hw_events __percpu *hw_events; struct hlist_node node; @@ -81,4 +85,4 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); #endif /* CONFIG_RISCV_PMU */ -#endif /* _ASM_RISCV_PERF_EVENT_H */ +#endif /* _RISCV_PMU_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2166a69e3bf2..d2a15c0c6f8a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -288,10 +288,9 @@ struct perf_event_pmu_context; #define PERF_PMU_CAP_EXTENDED_REGS 0x0008 #define PERF_PMU_CAP_EXCLUSIVE 0x0010 #define PERF_PMU_CAP_ITRACE 0x0020 -#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x0040 -#define PERF_PMU_CAP_NO_EXCLUDE 0x0080 -#define PERF_PMU_CAP_AUX_OUTPUT 0x0100 -#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200 +#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 +#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 +#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 struct perf_output_handle; @@ -445,7 +444,8 @@ struct pmu { /* * Will return the value for perf_event_mmap_page::index for this event, - * if no implementation is provided it will default to: event->hw.idx + 1. + * if no implementation is provided it will default to 0 (see + * perf_event_idx_default). */ int (*event_idx) (struct perf_event *event); /*optional */ @@ -704,6 +704,7 @@ struct perf_event { /* The cumulative AND of all event_caps for events in this group. */ int group_caps; + unsigned int group_generation; struct perf_event *group_leader; /* * event->pmu will always point to pmu in which this event belongs. @@ -842,11 +843,11 @@ struct perf_event { }; /* - * ,-----------------------[1:n]----------------------. - * V V - * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event - * ^ ^ | | - * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-' + * ,-----------------------[1:n]------------------------. + * V V + * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event + * | | + * `--[n:1]-> pmu <-[1:n]--' * * * struct perf_event_pmu_context lifetime is refcount based and RCU freed @@ -864,6 +865,9 @@ struct perf_event { * ctx->mutex pinning the configuration. Since we hold a reference on * group_leader (through the filedesc) it can't go away, therefore it's * associated pmu_ctx must exist and cannot change due to ctx->mutex. + * + * perf_event holds a refcount on perf_event_context + * perf_event holds a refcount on perf_event_pmu_context */ struct perf_event_pmu_context { struct pmu *pmu; @@ -878,6 +882,7 @@ struct perf_event_pmu_context { unsigned int embedded : 1; unsigned int nr_events; + unsigned int nr_cgroups; atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; @@ -1138,6 +1143,15 @@ static inline bool branch_sample_priv(const struct perf_event *event) return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; } +static inline bool branch_sample_counters(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS; +} + +static inline bool branch_sample_call_stack(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} struct perf_sample_data { /* @@ -1172,6 +1186,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + u64 *br_stack_cntr; union perf_sample_weight weight; union perf_mem_data_src data_src; u64 txn; @@ -1194,7 +1209,8 @@ struct perf_sample_data { PERF_MEM_S(LVL, NA) |\ PERF_MEM_S(SNOOP, NA) |\ PERF_MEM_S(LOCK, NA) |\ - PERF_MEM_S(TLB, NA)) + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) @@ -1248,7 +1264,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, static inline void perf_sample_save_brstack(struct perf_sample_data *data, struct perf_event *event, - struct perf_branch_stack *brs) + struct perf_branch_stack *brs, + u64 *brs_cntr) { int size = sizeof(u64); /* nr */ @@ -1256,7 +1273,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, size += sizeof(u64); size += brs->nr * sizeof(struct perf_branch_entry); + /* + * The extension space for counters is appended after the + * struct perf_branch_stack. It is used to store the occurrences + * of events of each branch. + */ + if (brs_cntr) + size += brs->nr * sizeof(u64); + data->br_stack = brs; + data->br_stack_cntr = brs_cntr; data->dyn_size += size; data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; } @@ -1316,15 +1342,31 @@ extern int perf_event_output(struct perf_event *event, struct pt_regs *regs); static inline bool -is_default_overflow_handler(struct perf_event *event) +__is_default_overflow_handler(perf_overflow_handler_t overflow_handler) { - if (likely(event->overflow_handler == perf_event_output_forward)) + if (likely(overflow_handler == perf_event_output_forward)) return true; - if (unlikely(event->overflow_handler == perf_event_output_backward)) + if (unlikely(overflow_handler == perf_event_output_backward)) return true; return false; } +#define is_default_overflow_handler(event) \ + __is_default_overflow_handler((event)->overflow_handler) + +#ifdef CONFIG_BPF_SYSCALL +static inline bool uses_default_overflow_handler(struct perf_event *event) +{ + if (likely(is_default_overflow_handler(event))) + return true; + + return __is_default_overflow_handler(event->orig_overflow_handler); +} +#else +#define uses_default_overflow_handler(event) \ + is_default_overflow_handler(event) +#endif + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, @@ -1556,7 +1598,7 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -int perf_proc_update_handler(struct ctl_table *table, int write, +int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); @@ -1860,10 +1902,6 @@ extern void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now); -#ifdef CONFIG_MMU -extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); -#endif - /* * Snapshot branch stack on software events. * diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5063b482e34f..85fc7554cd52 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -5,6 +5,9 @@ #include <linux/pfn.h> #include <asm/pgtable.h> +#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT) +#define PUD_ORDER (PUD_SHIFT - PAGE_SHIFT) + #ifndef __ASSEMBLY__ #ifdef CONFIG_MMU @@ -63,7 +66,6 @@ static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } -#define pte_index pte_index #ifndef pmd_index static inline unsigned long pmd_index(unsigned long address) @@ -99,7 +101,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address))) #define pte_unmap(pte) do { \ kunmap_local((pte)); \ - /* rcu_read_unlock() to be added later */ \ + rcu_read_unlock(); \ } while (0) #else static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address) @@ -108,10 +110,12 @@ static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address) } static inline void pte_unmap(pte_t *pte) { - /* rcu_read_unlock() to be added later */ + rcu_read_unlock(); } #endif +void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable); + /* Find an entry in the second-level page table.. */ #ifndef pmd_offset static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) @@ -180,6 +184,101 @@ static inline int pmd_young(pmd_t pmd) } #endif +#ifndef pmd_dirty +static inline int pmd_dirty(pmd_t pmd) +{ + return 0; +} +#endif + +/* + * A facility to provide lazy MMU batching. This allows PTE updates and + * page invalidations to be delayed until a call to leave lazy MMU mode + * is issued. Some architectures may benefit from doing this, and it is + * beneficial for both shadow and direct mode hypervisors, which may batch + * the PTE updates which happen during this window. Note that using this + * interface requires that read hazards be removed from the code. A read + * hazard could result in the direct mode hypervisor case, since the actual + * write to the page tables may not yet have taken place, so reads though + * a raw PTE pointer after it has been modified are not guaranteed to be + * up to date. This mode can only be entered and left under the protection of + * the page table locks for all page tables which may be modified. In the UP + * case, this is required so that preemption is disabled, and in the SMP case, + * it must synchronize the delayed page table writes properly on other CPUs. + */ +#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE +#define arch_enter_lazy_mmu_mode() do {} while (0) +#define arch_leave_lazy_mmu_mode() do {} while (0) +#define arch_flush_lazy_mmu_mode() do {} while (0) +#endif + +#ifndef pte_batch_hint +/** + * pte_batch_hint - Number of pages that can be added to batch without scanning. + * @ptep: Page table pointer for the entry. + * @pte: Page table entry. + * + * Some architectures know that a set of contiguous ptes all map the same + * contiguous memory with the same permissions. In this case, it can provide a + * hint to aid pte batching without the core code needing to scan every pte. + * + * An architecture implementation may ignore the PTE accessed state. Further, + * the dirty state must apply atomically to all the PTEs described by the hint. + * + * May be overridden by the architecture, else pte_batch_hint is always 1. + */ +static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) +{ + return 1; +} +#endif + +#ifndef pte_advance_pfn +static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) +{ + return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); +} +#endif + +#define pte_next_pfn(pte) pte_advance_pfn(pte, 1) + +#ifndef set_ptes +/** + * set_ptes - Map consecutive pages to a contiguous range of addresses. + * @mm: Address space to map the pages into. + * @addr: Address to map the first page at. + * @ptep: Page table pointer for the first entry. + * @pte: Page table entry for the first page. + * @nr: Number of pages to map. + * + * When nr==1, initial state of pte may be present or not present, and new state + * may be present or not present. When nr>1, initial state of all ptes must be + * not present, and new state must be present. + * + * May be overridden by the architecture, or the architecture can define + * set_pte() and PFN_PTE_SHIFT. + * + * Context: The caller holds the page table lock. The pages all belong + * to the same folio. The PTEs are all in the same PMD. + */ +static inline void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + page_table_check_ptes_set(mm, ptep, pte, nr); + + arch_enter_lazy_mmu_mode(); + for (;;) { + set_pte(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = pte_next_pfn(pte); + } + arch_leave_lazy_mmu_mode(); +} +#endif +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -226,6 +325,27 @@ static inline pmd_t pmdp_get(pmd_t *pmdp) } #endif +#ifndef pudp_get +static inline pud_t pudp_get(pud_t *pudp) +{ + return READ_ONCE(*pudp); +} +#endif + +#ifndef p4dp_get +static inline p4d_t p4dp_get(p4d_t *p4dp) +{ + return READ_ONCE(*p4dp); +} +#endif + +#ifndef pgdp_get +static inline pgd_t pgdp_get(pgd_t *pgdp) +{ + return READ_ONCE(*pgdp); +} +#endif + #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, @@ -309,7 +429,21 @@ static inline bool arch_has_hw_nonleaf_pmd_young(void) */ static inline bool arch_has_hw_pte_young(void) { - return false; + return IS_ENABLED(CONFIG_ARCH_HAS_HW_PTE_YOUNG); +} +#endif + +#ifndef arch_check_zapped_pte +static inline void arch_check_zapped_pte(struct vm_area_struct *vma, + pte_t pte) +{ +} +#endif + +#ifndef arch_check_zapped_pmd +static inline void arch_check_zapped_pmd(struct vm_area_struct *vma, + pmd_t pmd) +{ } #endif @@ -320,7 +454,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, { pte_t pte = ptep_get(ptep); pte_clear(mm, address, ptep); - page_table_check_pte_clear(mm, address, pte); + page_table_check_pte_clear(mm, pte); return pte; } #endif @@ -390,6 +524,7 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) return pmd; } #define pmdp_get_lockless pmdp_get_lockless +#define pmdp_get_lockless_sync() tlb_remove_table_sync_one() #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */ @@ -408,6 +543,9 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) { return pmdp_get(pmdp); } +static inline void pmdp_get_lockless_sync(void) +{ +} #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -419,7 +557,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, pmd_t pmd = *pmdp; pmd_clear(pmdp); - page_table_check_pmd_clear(mm, address, pmd); + page_table_check_pmd_clear(mm, pmd); return pmd; } @@ -432,7 +570,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, pud_t pud = *pudp; pud_clear(pudp); - page_table_check_pud_clear(mm, address, pud); + page_table_check_pud_clear(mm, pud); return pud; } @@ -450,11 +588,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL -static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, +static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, int full) { - return pudp_huge_get_and_clear(mm, address, pudp); + return pudp_huge_get_and_clear(vma->vm_mm, address, pudp); } #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -468,6 +606,76 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, } #endif +#ifndef get_and_clear_full_ptes +/** + * get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the + * returned PTE. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, unsigned int nr, int full) +{ + pte_t pte, tmp_pte; + + pte = ptep_get_and_clear_full(mm, addr, ptep, full); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + return pte; +} +#endif + +#ifndef clear_full_ptes +/** + * clear_full_ptes - Clear present PTEs that map consecutive pages of the same + * folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_get_and_clear_full(). + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + for (;;) { + ptep_get_and_clear_full(mm, addr, ptep, full); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif /* * If two threads concurrently fault at the same page, the thread that @@ -515,6 +723,20 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, pud_t *pudp); #endif +#ifndef pte_mkwrite +static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma) +{ + return pte_mkwrite_novma(pte); +} +#endif + +#if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite) +static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) +{ + return pmd_mkwrite_novma(pmd); +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT struct mm_struct; static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) @@ -524,6 +746,37 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres } #endif +#ifndef wrprotect_ptes +/** + * wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same + * folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to write-protect. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_set_wrprotect(). + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + for (;;) { + ptep_set_wrprotect(mm, addr, ptep); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif + /* * On some architectures hardware does not set page access bit when accessing * memory page, it is responsibility of software setting this bit. It brings @@ -558,6 +811,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, #endif #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +#ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long address, pud_t *pudp) { @@ -571,6 +825,7 @@ static inline void pudp_set_wrprotect(struct mm_struct *mm, { BUILD_BUG(); } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ #endif @@ -693,11 +948,14 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { return pmd_val(pmd_a) == pmd_val(pmd_b); } +#endif +#ifndef pud_same static inline int pud_same(pud_t pud_a, pud_t pud_b) { return pud_val(pud_a) == pud_val(pud_b); } +#define pud_same pud_same #endif #ifndef __HAVE_ARCH_P4D_SAME @@ -1041,27 +1299,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #endif /* - * A facility to provide lazy MMU batching. This allows PTE updates and - * page invalidations to be delayed until a call to leave lazy MMU mode - * is issued. Some architectures may benefit from doing this, and it is - * beneficial for both shadow and direct mode hypervisors, which may batch - * the PTE updates which happen during this window. Note that using this - * interface requires that read hazards be removed from the code. A read - * hazard could result in the direct mode hypervisor case, since the actual - * write to the page tables may not yet have taken place, so reads though - * a raw PTE pointer after it has been modified are not guaranteed to be - * up to date. This mode can only be entered and left under the protection of - * the page table locks for all page tables which may be modified. In the UP - * case, this is required so that preemption is disabled, and in the SMP case, - * it must synchronize the delayed page table writes properly on other CPUs. - */ -#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define arch_enter_lazy_mmu_mode() do {} while (0) -#define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_flush_lazy_mmu_mode() do {} while (0) -#endif - -/* * A facility to provide batching of the reload of page tables and * other process state with the actual context switch code for * paravirtualized guests. By convention, only one of the batched @@ -1322,12 +1559,16 @@ static inline int pud_trans_unstable(pud_t *pud) #ifndef CONFIG_NUMA_BALANCING /* - * Technically a PTE can be PROTNONE even when not doing NUMA balancing but - * the only case the kernel cares is for NUMA balancing and is only ever set - * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked - * _PAGE_PROTNONE so by default, implement the helper as "always no". It - * is the responsibility of the caller to distinguish between PROT_NONE - * protections and NUMA hinting fault protections. + * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is + * perfectly valid to indicate "no" in that case, which is why our default + * implementation defaults to "always no". + * + * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE + * page protection due to NUMA hinting. NUMA hinting faults only apply in + * accessible VMAs. + * + * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault, + * looking at the VMA accessibility is sufficient. */ static inline int pte_protnone(pte_t pte) { @@ -1499,6 +1740,9 @@ typedef unsigned int pgtbl_mod_mask; #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE) #endif +#ifndef has_transparent_pud_hugepage +#define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) +#endif /* * On some architectures it depends on the mm if the p4d/pud or pmd * layer of the page table hierarchy is folded or not. @@ -1533,16 +1777,16 @@ typedef unsigned int pgtbl_mod_mask; * Only meaningful when called on a valid entry. */ #ifndef pgd_leaf -#define pgd_leaf(x) 0 +#define pgd_leaf(x) false #endif #ifndef p4d_leaf -#define p4d_leaf(x) 0 +#define p4d_leaf(x) false #endif #ifndef pud_leaf -#define pud_leaf(x) 0 +#define pud_leaf(x) false #endif #ifndef pmd_leaf -#define pmd_leaf(x) 0 +#define pmd_leaf(x) false #endif #ifndef pgd_leaf_size diff --git a/include/linux/phy.h b/include/linux/phy.h index 11c1e91563d4..3f68b8239bb1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -30,6 +30,7 @@ #include <linux/refcount.h> #include <linux/atomic.h> +#include <net/eee.h> #define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ SUPPORTED_TP | \ @@ -54,6 +55,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) #define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) @@ -65,6 +67,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; #define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) #define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features) +#define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features) extern const int phy_basic_ports_array[3]; extern const int phy_fibre_port_array[1]; @@ -110,6 +113,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface * @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax + * @PHY_INTERFACE_MODE_PSGMII: Penta SGMII * @PHY_INTERFACE_MODE_QSGMII: Quad SGMII * @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII * @PHY_INTERFACE_MODE_100BASEX: 100 BaseX @@ -147,6 +151,7 @@ typedef enum { PHY_INTERFACE_MODE_XGMII, PHY_INTERFACE_MODE_XLGMII, PHY_INTERFACE_MODE_MOCA, + PHY_INTERFACE_MODE_PSGMII, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, PHY_INTERFACE_MODE_100BASEX, @@ -254,6 +259,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "xlgmii"; case PHY_INTERFACE_MODE_MOCA: return "moca"; + case PHY_INTERFACE_MODE_PSGMII: + return "psgmii"; case PHY_INTERFACE_MODE_QSGMII: return "qsgmii"; case PHY_INTERFACE_MODE_TRGMII: @@ -298,6 +305,7 @@ static inline const char *phy_modes(phy_interface_t interface) #define MII_BUS_ID_SIZE 61 struct device; +struct kernel_hwtstamp_config; struct phylink; struct sfp_bus; struct sfp_upstream_ops; @@ -322,7 +330,9 @@ struct mdio_bus_stats { /** * struct phy_package_shared - Shared information in PHY packages - * @addr: Common PHY address used to combine PHYs in one package + * @base_addr: Base PHY address of PHY package used to combine PHYs + * in one package and for offset calculation of phy_package_read/write + * @np: Pointer to the Device Node if PHY package defined in DT * @refcnt: Number of PHYs connected to this shared data * @flags: Initialization of PHY package * @priv_size: Size of the shared private data @priv @@ -333,7 +343,9 @@ struct mdio_bus_stats { * phy_package_leave(). */ struct phy_package_shared { - int addr; + u8 base_addr; + /* With PHY package defined in DT this points to the PHY package node */ + struct device_node *np; refcount_t refcnt; unsigned long flags; size_t priv_size; @@ -563,7 +575,6 @@ struct macsec_ops; * - Bits [31:24] are reserved for defining generic * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) - * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY * @sfp_bus_attached: Flag indicating whether the SFP bus has been attached * @sfp_bus: SFP bus attached to this PHY's fiber port @@ -584,6 +595,8 @@ struct macsec_ops; * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes * @eee_enabled: Flag indicating whether the EEE feature is enabled + * @enable_tx_lpi: When True, MAC should transmit LPI to PHY + * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited @@ -600,6 +613,8 @@ struct macsec_ops; * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, * requiring a rerun of the interrupt handler after resume * @interface: enum phy_interface_t value + * @possible_interfaces: bitmap if interface modes that the attached PHY + * will switch between depending on media speed. * @skb: Netlink message for cable diagnostics * @nest: Netlink nest used for cable diagnostics * @ehdr: nNtlink header for cable diagnostics @@ -631,7 +646,7 @@ struct phy_device { /* Information about the PHY type */ /* And management functions */ - struct phy_driver *drv; + const struct phy_driver *drv; struct device_link *devlink; @@ -669,6 +684,7 @@ struct phy_device { u32 dev_flags; phy_interface_t interface; + DECLARE_PHY_INTERFACE_MASK(possible_interfaces); /* * forced speed & duplex (no autoneg) @@ -690,7 +706,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); /* used with phy_speed_down */ __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); - /* used for eee validation */ + /* used for eee validation and configuration*/ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); bool eee_enabled; @@ -700,6 +716,8 @@ struct phy_device { /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; + bool enable_tx_lpi; + struct eee_config eee_cfg; #ifdef CONFIG_LED_TRIGGER_PHY struct phy_led_trigger *phy_led_triggers; @@ -844,6 +862,15 @@ struct phy_plca_status { bool pst; }; +/* Modes for PHY LED configuration */ +enum phy_led_modes { + PHY_LED_ACTIVE_LOW = 0, + PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1, + + /* keep it last */ + __PHY_LED_MODES_NUM, +}; + /** * struct phy_led: An LED driven by the PHY * @@ -1104,6 +1131,52 @@ struct phy_driver { int (*led_blink_set)(struct phy_device *dev, u8 index, unsigned long *delay_on, unsigned long *delay_off); + /** + * @led_hw_is_supported: Can the HW support the given rules. + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @rules The core is interested in these rules + * + * Return 0 if yes, -EOPNOTSUPP if not, or an error code. + */ + int (*led_hw_is_supported)(struct phy_device *dev, u8 index, + unsigned long rules); + /** + * @led_hw_control_set: Set the HW to control the LED + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @rules The rules used to control the LED + * + * Returns 0, or a an error code. + */ + int (*led_hw_control_set)(struct phy_device *dev, u8 index, + unsigned long rules); + /** + * @led_hw_control_get: Get how the HW is controlling the LED + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @rules Pointer to the rules used to control the LED + * + * Set *@rules to how the HW is currently blinking. Returns 0 + * on success, or a error code if the current blinking cannot + * be represented in rules, or some other error happens. + */ + int (*led_hw_control_get)(struct phy_device *dev, u8 index, + unsigned long *rules); + + /** + * @led_polarity_set: Set the LED polarity modes + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @modes: bitmap of LED polarity modes + * + * Configure LED with all the required polarity modes in @modes + * to make it correctly turn ON or OFF. + * + * Returns 0, or an error code. + */ + int (*led_polarity_set)(struct phy_device *dev, int index, + unsigned long modes); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -1522,9 +1595,11 @@ static inline bool phy_has_txtstamp(struct phy_device *phydev) return phydev && phydev->mii_ts && phydev->mii_ts->txtstamp; } -static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr) +static inline int phy_hwtstamp(struct phy_device *phydev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { - return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr); + return phydev->mii_ts->hwtstamp(phydev->mii_ts, cfg, extack); } static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb, @@ -1698,6 +1773,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_config_aneg(struct phy_device *phydev); +int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); @@ -1731,10 +1807,6 @@ int phy_start_cable_test_tdr(struct phy_device *phydev, } #endif -int phy_cable_test_result(struct phy_device *phydev, u8 pair, u16 result); -int phy_cable_test_fault_length(struct phy_device *phydev, u8 pair, - u16 cm); - static inline void phy_device_reset(struct phy_device *phydev, int value) { mdio_device_reset(&phydev->mdio, value); @@ -1811,7 +1883,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 37 */ int genphy_c37_config_aneg(struct phy_device *phydev); -int genphy_c37_read_status(struct phy_device *phydev); +int genphy_c37_read_status(struct phy_device *phydev, bool *changed); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); @@ -1826,6 +1898,8 @@ int genphy_c45_an_config_aneg(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); +int genphy_c45_pma_read_ext_abilities(struct phy_device *phydev); +int genphy_c45_pma_baset1_read_abilities(struct phy_device *phydev); int genphy_c45_read_eee_abilities(struct phy_device *phydev); int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); @@ -1844,9 +1918,9 @@ int genphy_c45_plca_get_status(struct phy_device *phydev, int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, unsigned long *lp, bool *is_enabled); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); @@ -1896,8 +1970,10 @@ int phy_get_rate_matching(struct phy_device *phydev, void phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_advertise_supported(struct phy_device *phydev); +void phy_advertise_eee_all(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_support_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); @@ -1924,8 +2000,8 @@ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); -int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); -int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); +int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); +int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); @@ -1934,10 +2010,13 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev, int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); -int phy_package_join(struct phy_device *phydev, int addr, size_t priv_size); +int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size); +int of_phy_package_join(struct phy_device *phydev, size_t priv_size); void phy_package_leave(struct phy_device *phydev); int devm_phy_package_join(struct device *dev, struct phy_device *phydev, - int addr, size_t priv_size); + int base_addr, size_t priv_size); +int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, + size_t priv_size); int __init mdio_bus_init(void); void mdio_bus_exit(void); @@ -1954,48 +2033,89 @@ int phy_ethtool_set_plca_cfg(struct phy_device *phydev, int phy_ethtool_get_plca_status(struct phy_device *phydev, struct phy_plca_status *plca_st); -static inline int phy_package_read(struct phy_device *phydev, u32 regnum) +int __phy_hwtstamp_get(struct phy_device *phydev, + struct kernel_hwtstamp_config *config); +int __phy_hwtstamp_set(struct phy_device *phydev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); + +static inline int phy_package_address(struct phy_device *phydev, + unsigned int addr_offset) { struct phy_package_shared *shared = phydev->shared; + u8 base_addr = shared->base_addr; - if (!shared) + if (addr_offset >= PHY_MAX_ADDR - base_addr) return -EIO; - return mdiobus_read(phydev->mdio.bus, shared->addr, regnum); + /* we know that addr will be in the range 0..31 and thus the + * implicit cast to a signed int is not a problem. + */ + return base_addr + addr_offset; } -static inline int __phy_package_read(struct phy_device *phydev, u32 regnum) +static inline int phy_package_read(struct phy_device *phydev, + unsigned int addr_offset, u32 regnum) { - struct phy_package_shared *shared = phydev->shared; + int addr = phy_package_address(phydev, addr_offset); - if (!shared) - return -EIO; + if (addr < 0) + return addr; - return __mdiobus_read(phydev->mdio.bus, shared->addr, regnum); + return mdiobus_read(phydev->mdio.bus, addr, regnum); +} + +static inline int __phy_package_read(struct phy_device *phydev, + unsigned int addr_offset, u32 regnum) +{ + int addr = phy_package_address(phydev, addr_offset); + + if (addr < 0) + return addr; + + return __mdiobus_read(phydev->mdio.bus, addr, regnum); } static inline int phy_package_write(struct phy_device *phydev, - u32 regnum, u16 val) + unsigned int addr_offset, u32 regnum, + u16 val) { - struct phy_package_shared *shared = phydev->shared; + int addr = phy_package_address(phydev, addr_offset); - if (!shared) - return -EIO; + if (addr < 0) + return addr; - return mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val); + return mdiobus_write(phydev->mdio.bus, addr, regnum, val); } static inline int __phy_package_write(struct phy_device *phydev, - u32 regnum, u16 val) + unsigned int addr_offset, u32 regnum, + u16 val) { - struct phy_package_shared *shared = phydev->shared; + int addr = phy_package_address(phydev, addr_offset); - if (!shared) - return -EIO; + if (addr < 0) + return addr; - return __mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val); + return __mdiobus_write(phydev->mdio.bus, addr, regnum, val); } +int __phy_package_read_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum); + +int phy_package_read_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum); + +int __phy_package_write_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum, u16 val); + +int phy_package_write_mmd(struct phy_device *phydev, + unsigned int addr_offset, int devad, + u32 regnum, u16 val); + static inline bool __phy_package_set_once(struct phy_device *phydev, unsigned int b) { @@ -2017,7 +2137,7 @@ static inline bool phy_package_probe_once(struct phy_device *phydev) return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE); } -extern struct bus_type mdio_bus_type; +extern const struct bus_type mdio_bus_type; struct mdio_board_info { const char *bus_id; diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index f6d607ef0e80..03cd5bae92d3 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -122,6 +122,11 @@ struct phy_ops { union phy_configure_opts *opts); int (*reset)(struct phy *phy); int (*calibrate)(struct phy *phy); + + /* notify phy connect status change */ + int (*connect)(struct phy *phy, int port); + int (*disconnect)(struct phy *phy, int port); + void (*release)(struct phy *phy); struct module *owner; }; @@ -176,7 +181,7 @@ struct phy_provider { struct module *owner; struct list_head list; struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); }; /** @@ -243,6 +248,8 @@ static inline enum phy_mode phy_get_mode(struct phy *phy) } int phy_reset(struct phy *phy); int phy_calibrate(struct phy *phy); +int phy_notify_connect(struct phy *phy, int port); +int phy_notify_disconnect(struct phy *phy, int port); static inline int phy_get_bus_width(struct phy *phy) { return phy->attrs.bus_width; @@ -265,7 +272,7 @@ void phy_put(struct device *dev, struct phy *phy); void devm_phy_put(struct device *dev, struct phy *phy); struct phy *of_phy_get(struct device_node *np, const char *con_id); struct phy *of_phy_simple_xlate(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); struct phy *phy_create(struct device *dev, struct device_node *node, const struct phy_ops *ops); struct phy *devm_phy_create(struct device *dev, struct device_node *node, @@ -275,11 +282,11 @@ void devm_phy_destroy(struct device *dev, struct phy *phy); struct phy_provider *__of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); struct phy_provider *__devm_of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); void of_phy_provider_unregister(struct phy_provider *phy_provider); void devm_of_phy_provider_unregister(struct device *dev, struct phy_provider *phy_provider); @@ -396,6 +403,20 @@ static inline int phy_calibrate(struct phy *phy) return -ENOSYS; } +static inline int phy_notify_connect(struct phy *phy, int index) +{ + if (!phy) + return 0; + return -ENOSYS; +} + +static inline int phy_notify_disconnect(struct phy *phy, int index) +{ + if (!phy) + return 0; + return -ENOSYS; +} + static inline int phy_configure(struct phy *phy, union phy_configure_opts *opts) { @@ -479,7 +500,7 @@ static inline struct phy *of_phy_get(struct device_node *np, const char *con_id) } static inline struct phy *of_phy_simple_xlate(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { return ERR_PTR(-ENOSYS); } @@ -509,7 +530,7 @@ static inline void devm_phy_destroy(struct device *dev, struct phy *phy) static inline struct phy_provider *__of_phy_provider_register( struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } @@ -517,7 +538,7 @@ static inline struct phy_provider *__of_phy_provider_register( static inline struct phy_provider *__devm_of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 70998e6dd6fd..6ca51e0080ec 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -26,6 +26,7 @@ void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); +int tegra_xusb_padctl_get_port_number(struct phy *phy); int tegra_xusb_padctl_enable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy, enum usb_device_speed speed); int tegra_xusb_padctl_disable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy); diff --git a/include/linux/phylib_stubs.h b/include/linux/phylib_stubs.h new file mode 100644 index 000000000000..1279f48c8a70 --- /dev/null +++ b/include/linux/phylib_stubs.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Stubs for the Network PHY library + */ + +#include <linux/rtnetlink.h> + +struct kernel_hwtstamp_config; +struct netlink_ext_ack; +struct phy_device; + +#if IS_ENABLED(CONFIG_PHYLIB) + +extern const struct phylib_stubs *phylib_stubs; + +struct phylib_stubs { + int (*hwtstamp_get)(struct phy_device *phydev, + struct kernel_hwtstamp_config *config); + int (*hwtstamp_set)(struct phy_device *phydev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); +}; + +static inline int phy_hwtstamp_get(struct phy_device *phydev, + struct kernel_hwtstamp_config *config) +{ + /* phylib_register_stubs() and phylib_unregister_stubs() + * also run under rtnl_lock(). + */ + ASSERT_RTNL(); + + if (!phylib_stubs) + return -EOPNOTSUPP; + + return phylib_stubs->hwtstamp_get(phydev, config); +} + +static inline int phy_hwtstamp_set(struct phy_device *phydev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + /* phylib_register_stubs() and phylib_unregister_stubs() + * also run under rtnl_lock(). + */ + ASSERT_RTNL(); + + if (!phylib_stubs) + return -EOPNOTSUPP; + + return phylib_stubs->hwtstamp_set(phydev, config, extack); +} + +#else + +static inline int phy_hwtstamp_get(struct phy_device *phydev, + struct kernel_hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int phy_hwtstamp_set(struct phy_device *phydev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +#endif diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 1817940a3418..9a57deefcb07 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -9,6 +9,7 @@ struct device_node; struct ethtool_cmd; struct fwnode_handle; struct net_device; +struct phylink; enum { MLO_PAUSE_NONE, @@ -98,72 +99,6 @@ static inline bool phylink_autoneg_inband(unsigned int mode) } /** - * phylink_pcs_neg_mode() - helper to determine PCS inband mode - * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. - * @interface: interface mode to be used - * @advertising: adertisement ethtool link mode mask - * - * Determines the negotiation mode to be used by the PCS, and returns - * one of: - * - * - %PHYLINK_PCS_NEG_NONE: interface mode does not support inband - * - %PHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY) - * will be used. - * - %PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autoneg - * disabled - * - %PHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled - * - * Note: this is for cases where the PCS itself is involved in negotiation - * (e.g. Clause 37, SGMII and similar) not Clause 73. - */ -static inline unsigned int phylink_pcs_neg_mode(unsigned int mode, - phy_interface_t interface, - const unsigned long *advertising) -{ - unsigned int neg_mode; - - switch (interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_QUSGMII: - case PHY_INTERFACE_MODE_USXGMII: - /* These protocols are designed for use with a PHY which - * communicates its negotiation result back to the MAC via - * inband communication. Note: there exist PHYs that run - * with SGMII but do not send the inband data. - */ - if (!phylink_autoneg_inband(mode)) - neg_mode = PHYLINK_PCS_NEG_OUTBAND; - else - neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; - break; - - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - /* 1000base-X is designed for use media-side for Fibre - * connections, and thus the Autoneg bit needs to be - * taken into account. We also do this for 2500base-X - * as well, but drivers may not support this, so may - * need to override this. - */ - if (!phylink_autoneg_inband(mode)) - neg_mode = PHYLINK_PCS_NEG_OUTBAND; - else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - advertising)) - neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; - else - neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED; - break; - - default: - neg_mode = PHYLINK_PCS_NEG_NONE; - break; - } - - return neg_mode; -} - -/** * struct phylink_link_state - link state structure * @advertising: ethtool bitmask containing advertised link modes * @lp_advertising: ethtool bitmask containing link partner advertised link @@ -200,8 +135,6 @@ enum phylink_op_type { * struct phylink_config - PHYLINK configuration structure * @dev: a pointer to a struct device associated with the MAC * @type: operation type of PHYLINK instance - * @legacy_pre_march2020: driver has not been updated for March 2020 updates - * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM. @@ -215,7 +148,6 @@ enum phylink_op_type { struct phylink_config { struct device *dev; enum phylink_op_type type; - bool legacy_pre_march2020; bool poll_fixed_state; bool mac_managed_pm; bool ovr_an_inband; @@ -225,35 +157,31 @@ struct phylink_config { unsigned long mac_capabilities; }; +void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); + /** * struct phylink_mac_ops - MAC operations structure. - * @validate: Validate and update the link configuration. + * @mac_get_caps: Get MAC capabilities for interface mode. * @mac_select_pcs: Select a PCS for the interface mode. - * @mac_pcs_get_state: Read the current link state from the hardware. * @mac_prepare: prepare for a major reconfiguration of the interface. * @mac_config: configure the MAC for the selected mode and state. * @mac_finish: finish a major reconfiguration of the interface. - * @mac_an_restart: restart 802.3z BaseX autonegotiation. * @mac_link_down: take the link down. * @mac_link_up: allow the link to come up. * * The individual methods are described more fully below. */ struct phylink_mac_ops { - void (*validate)(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state); + unsigned long (*mac_get_caps)(struct phylink_config *config, + phy_interface_t interface); struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config, phy_interface_t interface); - void (*mac_pcs_get_state)(struct phylink_config *config, - struct phylink_link_state *state); int (*mac_prepare)(struct phylink_config *config, unsigned int mode, phy_interface_t iface); void (*mac_config)(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); int (*mac_finish)(struct phylink_config *config, unsigned int mode, phy_interface_t iface); - void (*mac_an_restart)(struct phylink_config *config); void (*mac_link_down)(struct phylink_config *config, unsigned int mode, phy_interface_t interface); void (*mac_link_up)(struct phylink_config *config, @@ -264,39 +192,17 @@ struct phylink_mac_ops { #if 0 /* For kernel-doc purposes only. */ /** - * validate - Validate and update the link configuration + * mac_get_caps: Get MAC capabilities for interface mode. * @config: a pointer to a &struct phylink_config. - * @supported: ethtool bitmask for supported link modes. - * @state: a pointer to a &struct phylink_link_state. - * - * Clear bits in the @supported and @state->advertising masks that - * are not supportable by the MAC. - * - * Note that the PHY may be able to transform from one connection - * technology to another, so, eg, don't clear 1000BaseX just - * because the MAC is unable to BaseX mode. This is more about - * clearing unsupported speeds and duplex settings. The port modes - * should not be cleared; phylink_set_port_modes() will help with this. + * @interface: PHY interface mode. * - * When @config->supported_interfaces has been set, phylink will iterate - * over the supported interfaces to determine the full capability of the - * MAC. The validation function must not print errors if @state->interface - * is set to an unexpected value. - * - * When @config->supported_interfaces is empty, phylink will call this - * function with @state->interface set to %PHY_INTERFACE_MODE_NA, and - * expects the MAC driver to return all supported link modes. - * - * If the @state->interface mode is not supported, then the @supported - * mask must be cleared. - * - * This member is optional; if not set, the generic validator will be - * used making use of @config->mac_capabilities and - * @config->supported_interfaces to determine which link modes are - * supported. + * Optional method. When not provided, config->mac_capabilities will be used. + * When implemented, this returns the MAC capabilities for the specified + * interface mode where there is some special handling required by the MAC + * driver (e.g. not supporting half-duplex in certain interface modes.) */ -void validate(struct phylink_config *config, unsigned long *supported, - struct phylink_link_state *state); +unsigned long mac_get_caps(struct phylink_config *config, + phy_interface_t interface); /** * mac_select_pcs: Select a PCS for the interface mode. * @config: a pointer to a &struct phylink_config. @@ -314,25 +220,6 @@ struct phylink_pcs *mac_select_pcs(struct phylink_config *config, phy_interface_t interface); /** - * mac_pcs_get_state() - Read the current inband link state from the hardware - * @config: a pointer to a &struct phylink_config. - * @state: a pointer to a &struct phylink_link_state. - * - * Read the current inband link state from the MAC PCS, reporting the - * current speed in @state->speed, duplex mode in @state->duplex, pause - * mode in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits, - * negotiation completion state in @state->an_complete, and link up state - * in @state->link. If possible, @state->lp_advertising should also be - * populated. - * - * Note: This is a legacy method. This function will not be called unless - * legacy_pre_march2020 is set in &struct phylink_config and there is no - * PCS attached. - */ -void mac_pcs_get_state(struct phylink_config *config, - struct phylink_link_state *state); - -/** * mac_prepare() - prepare to change the PHY interface mode * @config: a pointer to a &struct phylink_config. * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. @@ -368,17 +255,9 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * - * Note: For legacy March 2020 drivers (drivers with legacy_pre_march2020 set - * in their &phylnk_config and which don't have a PCS), this function will be - * called on each link up event, and to also change the in-band advert. For - * non-legacy drivers, it will only be called to reconfigure the MAC for a - * "major" change in e.g. interface mode. It will not be called for changes - * in speed, duplex or pause modes or to change the in-band advertisement. - * In any case, it is strongly preferred that speed, duplex and pause settings - * are handled in the mac_link_up() method and not in this method. - * - * (this requires a rewrite - please refer to mac_link_up() for situations - * where the PCS and MAC are not tightly integrated.) + * This will only be called to reconfigure the MAC for a "major" change in + * e.g. interface mode. It will not be called for changes in speed, duplex + * or pause modes or to change the in-band advertisement. * * In all negotiation modes, as defined by @mode, @state->pause indicates the * pause settings which should be applied as follows. If %MLO_PAUSE_AN is not @@ -410,7 +289,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * 1000base-X or Cisco SGMII mode depending on the @state->interface * mode). In both cases, link state management (whether the link * is up or not) is performed by the MAC, and reported via the - * mac_pcs_get_state() callback. Changes in link state must be made + * pcs_get_state() callback. Changes in link state must be made * by calling phylink_mac_change(). * * Interface mode specific details are mentioned below. @@ -459,16 +338,6 @@ int mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t iface); /** - * mac_an_restart() - restart 802.3z BaseX autonegotiation - * @config: a pointer to a &struct phylink_config. - * - * Note: This is a legacy method. This function will not be called unless - * legacy_pre_march2020 is set in &struct phylink_config and there is no - * PCS attached. - */ -void mac_an_restart(struct phylink_config *config); - -/** * mac_link_down() - take the link down * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode @@ -520,14 +389,19 @@ struct phylink_pcs_ops; /** * struct phylink_pcs - PHYLINK PCS instance * @ops: a pointer to the &struct phylink_pcs_ops structure + * @phylink: pointer to &struct phylink_config * @neg_mode: provide PCS neg mode via "mode" argument * @poll: poll the PCS for link changes * * This structure is designed to be embedded within the PCS private data, * and will be passed between phylink and the PCS. + * + * The @phylink member is private to phylink and must not be touched by + * the PCS driver. */ struct phylink_pcs { const struct phylink_pcs_ops *ops; + struct phylink *phylink; bool neg_mode; bool poll; }; @@ -535,6 +409,10 @@ struct phylink_pcs { /** * struct phylink_pcs_ops - MAC PCS operations structure. * @pcs_validate: validate the link configuration. + * @pcs_enable: enable the PCS. + * @pcs_disable: disable the PCS. + * @pcs_pre_config: pre-mac_config method (for errata) + * @pcs_post_config: post-mac_config method (for arrata) * @pcs_get_state: read the current MAC PCS link state from the hardware. * @pcs_config: configure the MAC PCS for the selected mode and state. * @pcs_an_restart: restart 802.3z BaseX autonegotiation. @@ -544,6 +422,12 @@ struct phylink_pcs { struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); + int (*pcs_enable)(struct phylink_pcs *pcs); + void (*pcs_disable)(struct phylink_pcs *pcs); + void (*pcs_pre_config)(struct phylink_pcs *pcs, + phy_interface_t interface); + int (*pcs_post_config)(struct phylink_pcs *pcs, + phy_interface_t interface); void (*pcs_get_state)(struct phylink_pcs *pcs, struct phylink_link_state *state); int (*pcs_config)(struct phylink_pcs *pcs, unsigned int neg_mode, @@ -574,6 +458,18 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); /** + * pcs_enable() - enable the PCS. + * @pcs: a pointer to a &struct phylink_pcs. + */ +int pcs_enable(struct phylink_pcs *pcs); + +/** + * pcs_disable() - disable the PCS. + * @pcs: a pointer to a &struct phylink_pcs. + */ +void pcs_disable(struct phylink_pcs *pcs); + +/** * pcs_get_state() - Read the current inband link state from the hardware * @pcs: a pointer to a &struct phylink_pcs. * @state: a pointer to a &struct phylink_link_state. @@ -584,9 +480,6 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. - * - * When present, this overrides mac_pcs_get_state() in &struct - * phylink_mac_ops. */ void pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state); @@ -615,7 +508,7 @@ void pcs_get_state(struct phylink_pcs *pcs, * * The %neg_mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested - * against the %PHYLINK_PCS_NEG_* definitions. + * against the PHYLINK_PCS_NEG_* definitions. */ int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, @@ -645,23 +538,12 @@ void pcs_an_restart(struct phylink_pcs *pcs); * * The %mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested - * against the %PHYLINK_PCS_NEG_* definitions. + * against the PHYLINK_PCS_NEG_* definitions. */ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); #endif -void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps); -unsigned long phylink_get_capabilities(phy_interface_t interface, - unsigned long mac_capabilities, - int rate_matching); -void phylink_validate_mask_caps(unsigned long *supported, - struct phylink_link_state *state, - unsigned long caps); -void phylink_generic_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state); - struct phylink *phylink_create(struct phylink_config *, const struct fwnode_handle *, phy_interface_t, @@ -677,6 +559,7 @@ int phylink_fwnode_phy_connect(struct phylink *pl, void phylink_disconnect_phy(struct phylink *); void phylink_mac_change(struct phylink *, bool up); +void phylink_pcs_change(struct phylink_pcs *, bool up); void phylink_start(struct phylink *); void phylink_stop(struct phylink *); @@ -698,8 +581,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); int phylink_init_eee(struct phylink *, bool); -int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); -int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); +int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee); +int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); int phylink_speed_down(struct phylink *pl, bool sync); int phylink_speed_up(struct phylink *pl); diff --git a/include/linux/pid.h b/include/linux/pid.h index 653a527574c4..a3aad9b4074c 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -2,18 +2,12 @@ #ifndef _LINUX_PID_H #define _LINUX_PID_H +#include <linux/pid_types.h> #include <linux/rculist.h> -#include <linux/wait.h> +#include <linux/rcupdate.h> #include <linux/refcount.h> - -enum pid_type -{ - PIDTYPE_PID, - PIDTYPE_TGID, - PIDTYPE_PGID, - PIDTYPE_SID, - PIDTYPE_MAX, -}; +#include <linux/sched.h> +#include <linux/wait.h> /* * What is struct pid? @@ -51,6 +45,8 @@ enum pid_type * find_pid_ns() using the int nr and struct pid_namespace *ns. */ +#define RESERVED_PIDS 300 + struct upid { int nr; struct pid_namespace *ns; @@ -61,6 +57,8 @@ struct pid refcount_t count; unsigned int level; spinlock_t lock; + struct dentry *stashed; + u64 ino; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; @@ -72,15 +70,13 @@ struct pid extern struct pid init_struct_pid; -extern const struct file_operations pidfd_fops; - struct file; -extern struct pid *pidfd_pid(const struct file *file); +struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); -int pidfd_create(struct pid *pid, unsigned int flags); int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) { @@ -110,9 +106,6 @@ extern void exchange_tids(struct task_struct *task, struct task_struct *old); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); -struct pid_namespace; -extern struct pid_namespace init_pid_ns; - extern int pid_max; extern int pid_max_min, pid_max_max; @@ -215,4 +208,127 @@ pid_t pid_vnr(struct pid *pid); } \ task = tg___; \ } while_each_pid_task(pid, type, task) + +static inline struct pid *task_pid(struct task_struct *task) +{ + return task->thread_pid; +} + +/* + * the helpers to get the task's different pids as they are seen + * from various namespaces + * + * task_xid_nr() : global id, i.e. the id seen from the init namespace; + * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of + * current. + * task_xid_nr_ns() : id seen from the ns specified; + * + * see also pid_nr() etc in include/linux/pid.h + */ +pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); + +static inline pid_t task_pid_nr(struct task_struct *tsk) +{ + return tsk->pid; +} + +static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); +} + +static inline pid_t task_pid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); +} + + +static inline pid_t task_tgid_nr(struct task_struct *tsk) +{ + return tsk->tgid; +} + +/** + * pid_alive - check that a task structure is not stale + * @p: Task structure to be checked. + * + * Test if a process is not yet dead (at most zombie state) + * If pid_alive fails, then pointers within the task structure + * can be stale and must not be dereferenced. + * + * Return: 1 if the process is alive. 0 otherwise. + */ +static inline int pid_alive(const struct task_struct *p) +{ + return p->thread_pid != NULL; +} + +static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); +} + +static inline pid_t task_pgrp_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); +} + + +static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); +} + +static inline pid_t task_session_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); +} + +static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); +} + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); +} + +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + +/* Obsolete, do not use: */ +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return task_pgrp_nr_ns(tsk, &init_pid_ns); +} + +/** + * is_global_init - check if a task structure is init. Since init + * is free to have sub-threads we need to check tgid. + * @tsk: Task structure to be checked. + * + * Check if a task structure is the first user space task the kernel created. + * + * Return: 1 if the task structure is init. 0 otherwise. + */ +static inline int is_global_init(struct task_struct *tsk) +{ + return task_tgid_nr(tsk) == 1; +} + #endif /* _LINUX_PID_H */ diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index c758809d5bcf..f9f9931e02d6 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -17,18 +17,10 @@ struct fs_pin; #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) -/* - * sysctl for vm.memfd_noexec - * 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL - * acts like MFD_EXEC was set. - * 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL - * acts like MFD_NOEXEC_SEAL was set. - * 2: memfd_create() without MFD_NOEXEC_SEAL will be - * rejected. - */ -#define MEMFD_NOEXEC_SCOPE_EXEC 0 -#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 -#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 +/* modes for vm.memfd_noexec sysctl */ +#define MEMFD_NOEXEC_SCOPE_EXEC 0 /* MFD_EXEC implied if unset */ +#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 /* MFD_NOEXEC_SEAL implied if unset */ +#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 /* same as 1, except MFD_EXEC rejected */ #endif struct pid_namespace { @@ -47,7 +39,6 @@ struct pid_namespace { int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) - /* sysctl for vm.memfd_noexec */ int memfd_noexec_scope; #endif } __randomize_layout; @@ -64,6 +55,23 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } +#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) +static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns) +{ + int scope = MEMFD_NOEXEC_SCOPE_EXEC; + + for (; ns; ns = ns->parent) + scope = max(scope, READ_ONCE(ns->memfd_noexec_scope)); + + return scope; +} +#else +static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns) +{ + return 0; +} +#endif + extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct user_namespace *user_ns, struct pid_namespace *ns); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); @@ -78,6 +86,11 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } +static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns) +{ + return 0; +} + static inline struct pid_namespace *copy_pid_ns(unsigned long flags, struct user_namespace *user_ns, struct pid_namespace *ns) { diff --git a/include/linux/pid_types.h b/include/linux/pid_types.h new file mode 100644 index 000000000000..c2aee1d91dcf --- /dev/null +++ b/include/linux/pid_types.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PID_TYPES_H +#define _LINUX_PID_TYPES_H + +enum pid_type { + PIDTYPE_PID, + PIDTYPE_TGID, + PIDTYPE_PGID, + PIDTYPE_SID, + PIDTYPE_MAX, +}; + +struct pid_namespace; +extern struct pid_namespace init_pid_ns; + +#endif /* _LINUX_PID_TYPES_H */ diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h new file mode 100644 index 000000000000..75bdf9807802 --- /dev/null +++ b/include/linux/pidfs.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PID_FS_H +#define _LINUX_PID_FS_H + +struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); +void __init pidfs_init(void); + +#endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 4729d54e8995..73de70362b98 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -17,6 +17,7 @@ #include <linux/pinctrl/pinctrl-state.h> struct device; +struct gpio_chip; /* This struct is private to the core and should be regarded as a cookie */ struct pinctrl; @@ -25,27 +26,30 @@ struct pinctrl_state; #ifdef CONFIG_PINCTRL /* External interface to pin control */ -extern bool pinctrl_gpio_can_use_line(unsigned gpio); -extern int pinctrl_gpio_request(unsigned gpio); -extern void pinctrl_gpio_free(unsigned gpio); -extern int pinctrl_gpio_direction_input(unsigned gpio); -extern int pinctrl_gpio_direction_output(unsigned gpio); -extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config); - -extern struct pinctrl * __must_check pinctrl_get(struct device *dev); -extern void pinctrl_put(struct pinctrl *p); -extern struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, - const char *name); -extern int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); - -extern struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); -extern void devm_pinctrl_put(struct pinctrl *p); -extern int pinctrl_select_default_state(struct device *dev); +bool pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset); +int pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset); +void pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset); +int pinctrl_gpio_direction_input(struct gpio_chip *gc, + unsigned int offset); +int pinctrl_gpio_direction_output(struct gpio_chip *gc, + unsigned int offset); +int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config); + +struct pinctrl * __must_check pinctrl_get(struct device *dev); +void pinctrl_put(struct pinctrl *p); +struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, + const char *name); +int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); + +struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); +void devm_pinctrl_put(struct pinctrl *p); +int pinctrl_select_default_state(struct device *dev); #ifdef CONFIG_PM -extern int pinctrl_pm_select_default_state(struct device *dev); -extern int pinctrl_pm_select_sleep_state(struct device *dev); -extern int pinctrl_pm_select_idle_state(struct device *dev); +int pinctrl_pm_select_default_state(struct device *dev); +int pinctrl_pm_select_sleep_state(struct device *dev); +int pinctrl_pm_select_idle_state(struct device *dev); #else static inline int pinctrl_pm_select_default_state(struct device *dev) { @@ -63,31 +67,38 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev) #else /* !CONFIG_PINCTRL */ -static inline bool pinctrl_gpio_can_use_line(unsigned gpio) +static inline bool +pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset) { return true; } -static inline int pinctrl_gpio_request(unsigned gpio) +static inline int +pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline void pinctrl_gpio_free(unsigned gpio) +static inline void +pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset) { } -static inline int pinctrl_gpio_direction_input(unsigned gpio) +static inline int +pinctrl_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline int pinctrl_gpio_direction_output(unsigned gpio) +static inline int +pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset) { return 0; } -static inline int pinctrl_gpio_set_config(unsigned gpio, unsigned long config) +static inline int +pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config) { return 0; } diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index 0639b36f43c5..673e96df453b 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -11,7 +11,7 @@ #ifndef __LINUX_PINCTRL_MACHINE_H #define __LINUX_PINCTRL_MACHINE_H -#include <linux/kernel.h> /* ARRAY_SIZE() */ +#include <linux/array_size.h> #include <linux/pinctrl/pinctrl-state.h> @@ -47,7 +47,7 @@ struct pinctrl_map_mux { struct pinctrl_map_configs { const char *group_or_pin; unsigned long *configs; - unsigned num_configs; + unsigned int num_configs; }; /** @@ -154,13 +154,13 @@ struct pinctrl_map; #ifdef CONFIG_PINCTRL extern int pinctrl_register_mappings(const struct pinctrl_map *map, - unsigned num_maps); + unsigned int num_maps); extern void pinctrl_unregister_mappings(const struct pinctrl_map *map); extern void pinctrl_provide_dummies(void); #else static inline int pinctrl_register_mappings(const struct pinctrl_map *map, - unsigned num_maps) + unsigned int num_maps) { return 0; } diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index d74b7a4ea154..a65d3d078e58 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -193,17 +193,17 @@ struct pinconf_generic_params { int pinconf_generic_dt_subnode_to_map(struct pinctrl_dev *pctldev, struct device_node *np, struct pinctrl_map **map, - unsigned *reserved_maps, unsigned *num_maps, + unsigned int *reserved_maps, unsigned int *num_maps, enum pinctrl_map_type type); int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev, struct device_node *np_config, struct pinctrl_map **map, - unsigned *num_maps, enum pinctrl_map_type type); + unsigned int *num_maps, enum pinctrl_map_type type); void pinconf_generic_dt_free_map(struct pinctrl_dev *pctldev, - struct pinctrl_map *map, unsigned num_maps); + struct pinctrl_map *map, unsigned int num_maps); static inline int pinconf_generic_dt_node_to_map_group(struct pinctrl_dev *pctldev, struct device_node *np_config, struct pinctrl_map **map, - unsigned *num_maps) + unsigned int *num_maps) { return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps, PIN_MAP_TYPE_CONFIGS_GROUP); @@ -211,7 +211,7 @@ static inline int pinconf_generic_dt_node_to_map_group(struct pinctrl_dev *pctld static inline int pinconf_generic_dt_node_to_map_pin(struct pinctrl_dev *pctldev, struct device_node *np_config, struct pinctrl_map **map, - unsigned *num_maps) + unsigned int *num_maps) { return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps, PIN_MAP_TYPE_CONFIGS_PIN); diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h index f8a8215e9021..770ec2221156 100644 --- a/include/linux/pinctrl/pinconf.h +++ b/include/linux/pinctrl/pinconf.h @@ -40,25 +40,25 @@ struct pinconf_ops { bool is_generic; #endif int (*pin_config_get) (struct pinctrl_dev *pctldev, - unsigned pin, + unsigned int pin, unsigned long *config); int (*pin_config_set) (struct pinctrl_dev *pctldev, - unsigned pin, + unsigned int pin, unsigned long *configs, - unsigned num_configs); + unsigned int num_configs); int (*pin_config_group_get) (struct pinctrl_dev *pctldev, - unsigned selector, + unsigned int selector, unsigned long *config); int (*pin_config_group_set) (struct pinctrl_dev *pctldev, - unsigned selector, + unsigned int selector, unsigned long *configs, - unsigned num_configs); + unsigned int num_configs); void (*pin_config_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, - unsigned offset); + unsigned int offset); void (*pin_config_group_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, - unsigned selector); + unsigned int selector); void (*pin_config_config_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, unsigned long config); diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 4d252ea00ed1..9a8189ffd0f2 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -54,7 +54,7 @@ struct pingroup { * @drv_data: driver-defined per-pin data. pinctrl core does not touch this */ struct pinctrl_pin_desc { - unsigned number; + unsigned int number; const char *name; void *drv_data; }; @@ -82,7 +82,7 @@ struct pinctrl_gpio_range { unsigned int base; unsigned int pin_base; unsigned int npins; - unsigned const *pins; + unsigned int const *pins; struct gpio_chip *gc; }; @@ -108,18 +108,18 @@ struct pinctrl_gpio_range { struct pinctrl_ops { int (*get_groups_count) (struct pinctrl_dev *pctldev); const char *(*get_group_name) (struct pinctrl_dev *pctldev, - unsigned selector); + unsigned int selector); int (*get_group_pins) (struct pinctrl_dev *pctldev, - unsigned selector, - const unsigned **pins, - unsigned *num_pins); + unsigned int selector, + const unsigned int **pins, + unsigned int *num_pins); void (*pin_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s, - unsigned offset); + unsigned int offset); int (*dt_node_to_map) (struct pinctrl_dev *pctldev, struct device_node *np_config, - struct pinctrl_map **map, unsigned *num_maps); + struct pinctrl_map **map, unsigned int *num_maps); void (*dt_free_map) (struct pinctrl_dev *pctldev, - struct pinctrl_map *map, unsigned num_maps); + struct pinctrl_map *map, unsigned int num_maps); }; /** @@ -193,7 +193,7 @@ extern void pinctrl_add_gpio_range(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range); extern void pinctrl_add_gpio_ranges(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *ranges, - unsigned nranges); + unsigned int nranges); extern void pinctrl_remove_gpio_range(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range); @@ -203,8 +203,8 @@ extern struct pinctrl_gpio_range * pinctrl_find_gpio_range_from_pin(struct pinctrl_dev *pctldev, unsigned int pin); extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev, - const char *pin_group, const unsigned **pins, - unsigned *num_pins); + const char *pin_group, const unsigned int **pins, + unsigned int *num_pins); /** * struct pinfunction - Description about a function diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index a7e370965c53..d6f7b58d6ad0 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -57,26 +57,26 @@ struct pinctrl_gpio_range; * the pin request. */ struct pinmux_ops { - int (*request) (struct pinctrl_dev *pctldev, unsigned offset); - int (*free) (struct pinctrl_dev *pctldev, unsigned offset); + int (*request) (struct pinctrl_dev *pctldev, unsigned int offset); + int (*free) (struct pinctrl_dev *pctldev, unsigned int offset); int (*get_functions_count) (struct pinctrl_dev *pctldev); const char *(*get_function_name) (struct pinctrl_dev *pctldev, - unsigned selector); + unsigned int selector); int (*get_function_groups) (struct pinctrl_dev *pctldev, - unsigned selector, - const char * const **groups, - unsigned *num_groups); - int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector, - unsigned group_selector); + unsigned int selector, + const char * const **groups, + unsigned int *num_groups); + int (*set_mux) (struct pinctrl_dev *pctldev, unsigned int func_selector, + unsigned int group_selector); int (*gpio_request_enable) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, - unsigned offset); + unsigned int offset); void (*gpio_disable_free) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, - unsigned offset); + unsigned int offset); int (*gpio_set_direction) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, - unsigned offset, + unsigned int offset, bool input); bool strict; }; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 02e0086b10f6..8ff23bf5a819 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -62,9 +62,6 @@ struct pipe_inode_info { unsigned int tail; unsigned int max_usage; unsigned int ring_size; -#ifdef CONFIG_WATCH_QUEUE - bool note_loss; -#endif unsigned int nr_accounted; unsigned int readers; unsigned int writers; @@ -72,6 +69,9 @@ struct pipe_inode_info { unsigned int r_counter; unsigned int w_counter; bool poll_usage; +#ifdef CONFIG_WATCH_QUEUE + bool note_loss; +#endif struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; @@ -125,6 +125,22 @@ struct pipe_buf_operations { }; /** + * pipe_has_watch_queue - Check whether the pipe is a watch_queue, + * i.e. it was created with O_NOTIFICATION_PIPE + * @pipe: The pipe to check + * + * Return: true if pipe is a watch queue, false otherwise. + */ +static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) +{ +#ifdef CONFIG_WATCH_QUEUE + return pipe->watch_queue != NULL; +#else + return false; +#endif +} + +/** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer @@ -269,10 +285,10 @@ bool pipe_is_unprivileged_user(void); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); -long pipe_fcntl(struct file *, unsigned int, unsigned long arg); +long pipe_fcntl(struct file *, unsigned int, unsigned int arg); struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); int create_pipe_files(struct file **, int); -unsigned int round_pipe_size(unsigned long size); +unsigned int round_pipe_size(unsigned int size); #endif diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 80cb00db42a4..2f1b952d596a 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -154,7 +154,9 @@ struct packet_stacked_data struct pktcdvd_device { - struct block_device *bdev; /* dev attached */ + struct file *bdev_file; /* dev attached */ + /* handle acquired for bdev during pkt_open_dev() */ + struct file *f_open_bdev; dev_t pkt_dev; /* our dev */ struct packet_settings settings; struct packet_stats stats; diff --git a/include/linux/platform_data/bd6107.h b/include/linux/platform_data/bd6107.h index 54a06a4d2618..596ca4f95cfa 100644 --- a/include/linux/platform_data/bd6107.h +++ b/include/linux/platform_data/bd6107.h @@ -8,7 +8,7 @@ struct device; struct bd6107_platform_data { - struct device *fbdev; + struct device *dev; unsigned int def_value; }; diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h index f922a192fe58..ec99b7b73d1d 100644 --- a/include/linux/platform_data/brcmfmac.h +++ b/include/linux/platform_data/brcmfmac.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 201 Broadcom Corporation + * Copyright (c) 2016 Broadcom Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ab721cf13a98..ecc47d5fe239 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3961,60 +3961,52 @@ struct ec_response_i2c_passthru { } __ec_align1; /*****************************************************************************/ -/* Power button hang detect */ - +/* AP hang detect */ #define EC_CMD_HANG_DETECT 0x009F -/* Reasons to start hang detection timer */ -/* Power button pressed */ -#define EC_HANG_START_ON_POWER_PRESS BIT(0) - -/* Lid closed */ -#define EC_HANG_START_ON_LID_CLOSE BIT(1) - - /* Lid opened */ -#define EC_HANG_START_ON_LID_OPEN BIT(2) +#define EC_HANG_DETECT_MIN_TIMEOUT 5 +#define EC_HANG_DETECT_MAX_TIMEOUT 65535 -/* Start of AP S3->S0 transition (booting or resuming from suspend) */ -#define EC_HANG_START_ON_RESUME BIT(3) +/* EC hang detect commands */ +enum ec_hang_detect_cmds { + /* Reload AP hang detect timer. */ + EC_HANG_DETECT_CMD_RELOAD = 0x0, -/* Reasons to cancel hang detection */ + /* Stop AP hang detect timer. */ + EC_HANG_DETECT_CMD_CANCEL = 0x1, -/* Power button released */ -#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8) - -/* Any host command from AP received */ -#define EC_HANG_STOP_ON_HOST_COMMAND BIT(9) - -/* Stop on end of AP S0->S3 transition (suspending or shutting down) */ -#define EC_HANG_STOP_ON_SUSPEND BIT(10) + /* Configure watchdog with given reboot timeout and + * cancel currently running AP hang detect timer. + */ + EC_HANG_DETECT_CMD_SET_TIMEOUT = 0x2, -/* - * If this flag is set, all the other fields are ignored, and the hang detect - * timer is started. This provides the AP a way to start the hang timer - * without reconfiguring any of the other hang detect settings. Note that - * you must previously have configured the timeouts. - */ -#define EC_HANG_START_NOW BIT(30) + /* Get last hang status - whether the AP boot was clear or not */ + EC_HANG_DETECT_CMD_GET_STATUS = 0x3, -/* - * If this flag is set, all the other fields are ignored (including - * EC_HANG_START_NOW). This provides the AP a way to stop the hang timer - * without reconfiguring any of the other hang detect settings. - */ -#define EC_HANG_STOP_NOW BIT(31) + /* Clear last hang status. Called when AP is rebooting/shutting down + * gracefully. + */ + EC_HANG_DETECT_CMD_CLEAR_STATUS = 0x4 +}; struct ec_params_hang_detect { - /* Flags; see EC_HANG_* */ - uint32_t flags; - - /* Timeout in msec before generating host event, if enabled */ - uint16_t host_event_timeout_msec; + uint16_t command; /* enum ec_hang_detect_cmds */ + /* Timeout in seconds before generating reboot */ + uint16_t reboot_timeout_sec; +} __ec_align2; - /* Timeout in msec before generating warm reboot, if enabled */ - uint16_t warm_reboot_timeout_msec; -} __ec_align4; +/* Status codes that describe whether AP has boot normally or the hang has been + * detected and EC has reset AP + */ +enum ec_hang_detect_status { + EC_HANG_DETECT_AP_BOOT_NORMAL = 0x0, + EC_HANG_DETECT_AP_BOOT_EC_WDT = 0x1, + EC_HANG_DETECT_AP_BOOT_COUNT, +}; +struct ec_response_hang_detect { + uint8_t status; /* enum ec_hang_detect_status */ +} __ec_align1; /*****************************************************************************/ /* Commands for battery charging */ @@ -4436,8 +4428,20 @@ struct ec_response_i2c_passthru_protect { * These commands are for sending and receiving message via HDMI CEC */ +#define EC_CEC_MAX_PORTS 16 + #define MAX_CEC_MSG_LEN 16 +/* + * Helper macros for packing/unpacking cec_events. + * bits[27:0] : bitmask of events from enum mkbp_cec_event + * bits[31:28]: port number + */ +#define EC_MKBP_EVENT_CEC_PACK(events, port) \ + (((events) & GENMASK(27, 0)) | (((port) & 0xf) << 28)) +#define EC_MKBP_EVENT_CEC_GET_EVENTS(event) ((event) & GENMASK(27, 0)) +#define EC_MKBP_EVENT_CEC_GET_PORT(event) (((event) >> 28) & 0xf) + /* CEC message from the AP to be written on the CEC bus */ #define EC_CMD_CEC_WRITE_MSG 0x00B8 @@ -4449,19 +4453,54 @@ struct ec_params_cec_write { uint8_t msg[MAX_CEC_MSG_LEN]; } __ec_align1; +/** + * struct ec_params_cec_write_v1 - Message to write to the CEC bus + * @port: CEC port to write the message on + * @msg_len: length of msg in bytes + * @msg: message content to write to the CEC bus + */ +struct ec_params_cec_write_v1 { + uint8_t port; + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + +/* CEC message read from a CEC bus reported back to the AP */ +#define EC_CMD_CEC_READ_MSG 0x00B9 + +/** + * struct ec_params_cec_read - Read a message from the CEC bus + * @port: CEC port to read a message on + */ +struct ec_params_cec_read { + uint8_t port; +} __ec_align1; + +/** + * struct ec_response_cec_read - Message read from the CEC bus + * @msg_len: length of msg in bytes + * @msg: message content read from the CEC bus + */ +struct ec_response_cec_read { + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + /* Set various CEC parameters */ #define EC_CMD_CEC_SET 0x00BA /** * struct ec_params_cec_set - CEC parameters set * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to set the parameter on * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC * or 1 to enable CEC functionality, in case cmd is * CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical * address between 0 and 15 or 0xff to unregister */ struct ec_params_cec_set { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; uint8_t val; } __ec_align1; @@ -4471,9 +4510,11 @@ struct ec_params_cec_set { /** * struct ec_params_cec_get - CEC parameters get * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to get the parameter on */ struct ec_params_cec_get { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; } __ec_align1; /** @@ -4487,6 +4528,17 @@ struct ec_response_cec_get { uint8_t val; } __ec_align1; +/* Get the number of CEC ports */ +#define EC_CMD_CEC_PORT_COUNT 0x00C1 + +/** + * struct ec_response_cec_port_count - CEC port count response + * @port_count: number of CEC ports + */ +struct ec_response_cec_port_count { + uint8_t port_count; +} __ec_align1; + /* CEC parameters command */ enum cec_command { /* CEC reading, writing and events enable */ @@ -4501,6 +4553,8 @@ enum mkbp_cec_event { EC_MKBP_CEC_SEND_OK = BIT(0), /* Outgoing message was not acknowledged */ EC_MKBP_CEC_SEND_FAILED = BIT(1), + /* Incoming message can be read out by AP */ + EC_MKBP_CEC_HAVE_DATA = BIT(2), }; /*****************************************************************************/ diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4f9f756bc17c..8865e350c12a 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -258,7 +258,7 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); -int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, +int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata, size_t outsize, void *indata, size_t insize); /** diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index f377817ce75c..cdd8cfb424f5 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -144,9 +144,6 @@ #define OMAP_MAX_GPIO_LINES 192 -#define OMAP_MPUIO(nr) (OMAP_MAX_GPIO_LINES + (nr)) -#define OMAP_GPIO_IS_MPUIO(nr) ((nr) >= OMAP_MAX_GPIO_LINES) - #ifndef __ASSEMBLER__ struct omap_gpio_reg_offs { u16 revision; diff --git a/include/linux/platform_data/gpio_backlight.h b/include/linux/platform_data/gpio_backlight.h index 1a8b5b1946fe..323fbf5f7613 100644 --- a/include/linux/platform_data/gpio_backlight.h +++ b/include/linux/platform_data/gpio_backlight.h @@ -8,7 +8,7 @@ struct device; struct gpio_backlight_platform_data { - struct device *fbdev; + struct device *dev; }; #endif diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h index f2781aa7eff8..70e8a6bec0f6 100644 --- a/include/linux/platform_data/gsc_hwmon.h +++ b/include/linux/platform_data/gsc_hwmon.h @@ -40,6 +40,6 @@ struct gsc_hwmon_platform_data { unsigned int resolution; unsigned int vreference; unsigned int fan_base; - struct gsc_hwmon_channel channels[]; + struct gsc_hwmon_channel channels[] __counted_by(nchannels); }; #endif diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h index 6a000df5541f..8748680e9e3c 100644 --- a/include/linux/platform_data/hirschmann-hellcreek.h +++ b/include/linux/platform_data/hirschmann-hellcreek.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0 or MIT) */ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* * Hirschmann Hellcreek TSN switch platform data. * diff --git a/include/linux/platform_data/i2c-mux-reg.h b/include/linux/platform_data/i2c-mux-reg.h index 2543c2a1c9ae..e2e895768311 100644 --- a/include/linux/platform_data/i2c-mux-reg.h +++ b/include/linux/platform_data/i2c-mux-reg.h @@ -17,7 +17,6 @@ * @n_values: Number of multiplexer channels * @little_endian: Indicating if the register is in little endian * @write_only: Reading the register is not allowed by hardware - * @classes: Optional I2C auto-detection classes * @idle: Value to write to mux when idle * @idle_in_use: indicate if idle value is in use * @reg: Virtual address of the register to switch channel @@ -30,7 +29,6 @@ struct i2c_mux_reg_platform_data { int n_values; bool little_endian; bool write_only; - const unsigned int *classes; u32 idle; bool idle_in_use; void __iomem *reg; diff --git a/include/linux/platform_data/lv5207lp.h b/include/linux/platform_data/lv5207lp.h index c9da8d402750..95d85c1394bc 100644 --- a/include/linux/platform_data/lv5207lp.h +++ b/include/linux/platform_data/lv5207lp.h @@ -8,7 +8,7 @@ struct device; struct lv5207lp_platform_data { - struct device *fbdev; + struct device *dev; unsigned int max_value; unsigned int def_value; }; diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h index 8a5f9f0b2c52..724e1f57b81f 100644 --- a/include/linux/platform_data/mdio-bcm-unimac.h +++ b/include/linux/platform_data/mdio-bcm-unimac.h @@ -1,11 +1,14 @@ #ifndef __MDIO_BCM_UNIMAC_PDATA_H #define __MDIO_BCM_UNIMAC_PDATA_H +struct clk; + struct unimac_mdio_pdata { u32 phy_mask; int (*wait_func)(void *data); void *wait_func_data; const char *bus_name; + struct clk *clk; }; #define UNIMAC_MDIO_DRV_NAME "unimac-mdio" diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index ea1cc6d829e9..8c659db4da6b 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -20,10 +20,32 @@ #define __MICROCHIP_KSZ_H #include <linux/types.h> +#include <linux/platform_data/dsa.h> + +enum ksz_chip_id { + KSZ8563_CHIP_ID = 0x8563, + KSZ8795_CHIP_ID = 0x8795, + KSZ8794_CHIP_ID = 0x8794, + KSZ8765_CHIP_ID = 0x8765, + KSZ8830_CHIP_ID = 0x8830, + KSZ9477_CHIP_ID = 0x00947700, + KSZ9896_CHIP_ID = 0x00989600, + KSZ9897_CHIP_ID = 0x00989700, + KSZ9893_CHIP_ID = 0x00989300, + KSZ9563_CHIP_ID = 0x00956300, + KSZ8567_CHIP_ID = 0x00856700, + KSZ9567_CHIP_ID = 0x00956700, + LAN9370_CHIP_ID = 0x00937000, + LAN9371_CHIP_ID = 0x00937100, + LAN9372_CHIP_ID = 0x00937200, + LAN9373_CHIP_ID = 0x00937300, + LAN9374_CHIP_ID = 0x00937400, +}; struct ksz_platform_data { + /* Must be first such that dsa_register_switch() can access it */ + struct dsa_chip_data cd; u32 chip_id; - u16 enabled_ports; }; #endif diff --git a/include/linux/platform_data/net-cw1200.h b/include/linux/platform_data/net-cw1200.h index c510734405bb..89d0ec6f7d46 100644 --- a/include/linux/platform_data/net-cw1200.h +++ b/include/linux/platform_data/net-cw1200.h @@ -14,8 +14,6 @@ struct cw1200_platform_data_spi { /* All others are optional */ bool have_5ghz; - int reset; /* GPIO to RSTn signal (0 disables) */ - int powerup; /* GPIO to POWERUP signal (0 disables) */ int (*power_ctrl)(const struct cw1200_platform_data_spi *pdata, bool enable); /* Control 3v3 / 1v8 supply */ int (*clk_ctrl)(const struct cw1200_platform_data_spi *pdata, @@ -30,8 +28,6 @@ struct cw1200_platform_data_sdio { /* All others are optional */ bool have_5ghz; bool no_nptb; /* SDIO hardware does not support non-power-of-2-blocksizes */ - int reset; /* GPIO to RSTn signal (0 disables) */ - int powerup; /* GPIO to POWERUP signal (0 disables) */ int irq; /* IRQ line or 0 to use SDIO IRQ */ int (*power_ctrl)(const struct cw1200_platform_data_sdio *pdata, bool enable); /* Control 3v3 / 1v8 supply */ diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h index 0dd851ea1c72..7fcb55fe21c9 100644 --- a/include/linux/platform_data/omap-twl4030.h +++ b/include/linux/platform_data/omap-twl4030.h @@ -37,9 +37,6 @@ struct omap_tw4030_pdata { bool has_digimic0; bool has_digimic1; u8 has_linein; - - /* Jack detect GPIO or <= 0 if it is not implemented */ - int jack_detect; }; #endif /* _OMAP_TWL4030_H_ */ diff --git a/include/linux/platform_data/pca953x.h b/include/linux/platform_data/pca953x.h index 96c1a14ab365..3c3787c4d96c 100644 --- a/include/linux/platform_data/pca953x.h +++ b/include/linux/platform_data/pca953x.h @@ -11,21 +11,8 @@ struct pca953x_platform_data { /* number of the first GPIO */ unsigned gpio_base; - /* initial polarity inversion setting */ - u32 invert; - /* interrupt base */ int irq_base; - - void *context; /* param to setup/teardown */ - - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - void (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - const char *const *names; }; #endif /* _LINUX_PCA953X_H */ diff --git a/include/linux/platform_data/rtc-ds2404.h b/include/linux/platform_data/rtc-ds2404.h deleted file mode 100644 index 22c53825528f..000000000000 --- a/include/linux/platform_data/rtc-ds2404.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * ds2404.h - platform data structure for the DS2404 RTC. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2012 Sven Schnelle <svens@stackframe.org> - */ - -#ifndef __LINUX_DS2404_H -#define __LINUX_DS2404_H - -struct ds2404_platform_data { - - unsigned int gpio_rst; - unsigned int gpio_clk; - unsigned int gpio_dq; -}; -#endif diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index d661399b217d..6c19d4fbbe39 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -10,7 +10,7 @@ #ifndef __SHMOB_DRM_H__ #define __SHMOB_DRM_H__ -#include <drm/drm_mode.h> +#include <video/videomode.h> enum shmob_drm_clk_source { SHMOB_DRM_CLK_BUS, @@ -18,72 +18,21 @@ enum shmob_drm_clk_source { SHMOB_DRM_CLK_EXTERNAL, }; -enum shmob_drm_interface { - SHMOB_DRM_IFACE_RGB8, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_RGB9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_RGB12A, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_RGB12B, /* 12bpp */ - SHMOB_DRM_IFACE_RGB16, /* 16bpp */ - SHMOB_DRM_IFACE_RGB18, /* 18bpp */ - SHMOB_DRM_IFACE_RGB24, /* 24bpp */ - SHMOB_DRM_IFACE_YUV422, /* 16bpp */ - SHMOB_DRM_IFACE_SYS8A, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_SYS8B, /* 18bpp, 8:8:2 */ - SHMOB_DRM_IFACE_SYS8C, /* 18bpp, 2:8:8 */ - SHMOB_DRM_IFACE_SYS8D, /* 16bpp, 8:8 */ - SHMOB_DRM_IFACE_SYS9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_SYS12, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_SYS16A, /* 16bpp */ - SHMOB_DRM_IFACE_SYS16B, /* 18bpp, 16:2 */ - SHMOB_DRM_IFACE_SYS16C, /* 18bpp, 2:16 */ - SHMOB_DRM_IFACE_SYS18, /* 18bpp */ - SHMOB_DRM_IFACE_SYS24, /* 24bpp */ -}; - -struct shmob_drm_backlight_data { - const char *name; - int max_brightness; - int (*get_brightness)(void); - int (*set_brightness)(int brightness); -}; - struct shmob_drm_panel_data { unsigned int width_mm; /* Panel width in mm */ unsigned int height_mm; /* Panel height in mm */ - struct drm_mode_modeinfo mode; + struct videomode mode; }; -struct shmob_drm_sys_interface_data { - unsigned int read_latch:6; - unsigned int read_setup:8; - unsigned int read_cycle:8; - unsigned int read_strobe:8; - unsigned int write_setup:8; - unsigned int write_cycle:8; - unsigned int write_strobe:8; - unsigned int cs_setup:3; - unsigned int vsync_active_high:1; - unsigned int vsync_dir_input:1; -}; - -#define SHMOB_DRM_IFACE_FL_DWPOL (1 << 0) /* Rising edge dot clock data latch */ -#define SHMOB_DRM_IFACE_FL_DIPOL (1 << 1) /* Active low display enable */ -#define SHMOB_DRM_IFACE_FL_DAPOL (1 << 2) /* Active low display data */ -#define SHMOB_DRM_IFACE_FL_HSCNT (1 << 3) /* Disable HSYNC during VBLANK */ -#define SHMOB_DRM_IFACE_FL_DWCNT (1 << 4) /* Disable dotclock during blanking */ - struct shmob_drm_interface_data { - enum shmob_drm_interface interface; - struct shmob_drm_sys_interface_data sys; + unsigned int bus_fmt; /* MEDIA_BUS_FMT_* */ unsigned int clk_div; - unsigned int flags; }; struct shmob_drm_platform_data { enum shmob_drm_clk_source clk_source; struct shmob_drm_interface_data iface; struct shmob_drm_panel_data panel; - struct shmob_drm_backlight_data backlight; }; #endif /* __SHMOB_DRM_H__ */ diff --git a/include/linux/platform_data/si5351.h b/include/linux/platform_data/si5351.h index c71a2dd66143..5f412a615532 100644 --- a/include/linux/platform_data/si5351.h +++ b/include/linux/platform_data/si5351.h @@ -105,10 +105,12 @@ struct si5351_clkout_config { * @clk_xtal: xtal input clock * @clk_clkin: clkin input clock * @pll_src: array of pll source clock setting + * @pll_reset: array indicating if plls should be reset after setting the rate * @clkout: array of clkout configuration */ struct si5351_platform_data { enum si5351_pll_src pll_src[2]; + bool pll_reset[2]; struct si5351_clkout_config clkout[8]; }; diff --git a/include/linux/platform_data/video-mx3fb.h b/include/linux/platform_data/video-mx3fb.h deleted file mode 100644 index d03dc322a616..000000000000 --- a/include/linux/platform_data/video-mx3fb.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2008 - * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de> - */ - -#ifndef __ASM_ARCH_MX3FB_H__ -#define __ASM_ARCH_MX3FB_H__ - -#include <linux/device.h> -#include <linux/fb.h> - -/* Proprietary FB_SYNC_ flags */ -#define FB_SYNC_OE_ACT_HIGH 0x80000000 -#define FB_SYNC_CLK_INVERT 0x40000000 -#define FB_SYNC_DATA_INVERT 0x20000000 -#define FB_SYNC_CLK_IDLE_EN 0x10000000 -#define FB_SYNC_SHARP_MODE 0x08000000 -#define FB_SYNC_SWAP_RGB 0x04000000 -#define FB_SYNC_CLK_SEL_EN 0x02000000 - -/* - * Specify the way your display is connected. The IPU can arbitrarily - * map the internal colors to the external data lines. We only support - * the following mappings at the moment. - */ -enum disp_data_mapping { - /* blue -> d[0..5], green -> d[6..11], red -> d[12..17] */ - IPU_DISP_DATA_MAPPING_RGB666, - /* blue -> d[0..4], green -> d[5..10], red -> d[11..15] */ - IPU_DISP_DATA_MAPPING_RGB565, - /* blue -> d[0..7], green -> d[8..15], red -> d[16..23] */ - IPU_DISP_DATA_MAPPING_RGB888, -}; - -/** - * struct mx3fb_platform_data - mx3fb platform data - * - * @dma_dev: pointer to the dma-device, used for dma-slave connection - * @mode: pointer to a platform-provided per mxc_register_fb() videomode - */ -struct mx3fb_platform_data { - struct device *dma_dev; - const char *name; - const struct fb_videomode *mode; - int num_modes; - enum disp_data_mapping disp_data_fmt; -}; - -#endif diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 28234dc9fa6a..ab1c7deff118 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -58,6 +58,10 @@ #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 +/* This can only be used to disable the screen, not re-enable */ +#define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031 +/* Writing a brightness re-enables the screen if disabled */ +#define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032 #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075 @@ -66,6 +70,7 @@ #define ASUS_WMI_DEVID_CAMERA 0x00060013 #define ASUS_WMI_DEVID_LID_FLIP 0x00060062 #define ASUS_WMI_DEVID_LID_FLIP_ROG 0x00060077 +#define ASUS_WMI_DEVID_MINI_LED_MODE 0x0005001E /* Storage */ #define ASUS_WMI_DEVID_CARDREADER 0x00080013 @@ -80,8 +85,19 @@ #define ASUS_WMI_DEVID_FAN_CTRL 0x00110012 /* deprecated */ #define ASUS_WMI_DEVID_CPU_FAN_CTRL 0x00110013 #define ASUS_WMI_DEVID_GPU_FAN_CTRL 0x00110014 +#define ASUS_WMI_DEVID_MID_FAN_CTRL 0x00110031 #define ASUS_WMI_DEVID_CPU_FAN_CURVE 0x00110024 #define ASUS_WMI_DEVID_GPU_FAN_CURVE 0x00110025 +#define ASUS_WMI_DEVID_MID_FAN_CURVE 0x00110032 + +/* Tunables for AUS ROG laptops */ +#define ASUS_WMI_DEVID_PPT_PL2_SPPT 0x001200A0 +#define ASUS_WMI_DEVID_PPT_PL1_SPL 0x001200A3 +#define ASUS_WMI_DEVID_PPT_APU_SPPT 0x001200B0 +#define ASUS_WMI_DEVID_PPT_PLAT_SPPT 0x001200B1 +#define ASUS_WMI_DEVID_PPT_FPPT 0x001200C1 +#define ASUS_WMI_DEVID_NV_DYN_BOOST 0x001200C0 +#define ASUS_WMI_DEVID_NV_THERM_TARGET 0x001200C2 /* Power */ #define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012 @@ -95,7 +111,15 @@ /* Keyboard dock */ #define ASUS_WMI_DEVID_KBD_DOCK 0x00120063 -/* dgpu on/off */ +/* Charging mode - 1=Barrel, 2=USB */ +#define ASUS_WMI_DEVID_CHARGE_MODE 0x0012006C + +/* MCU powersave mode */ +#define ASUS_WMI_DEVID_MCU_POWERSAVE 0x001200E2 + +/* epu is connected? 1 == true */ +#define ASUS_WMI_DEVID_EGPU_CONNECTED 0x00090018 +/* egpu on/off */ #define ASUS_WMI_DEVID_EGPU 0x00090019 /* dgpu on/off */ diff --git a/include/linux/platform_data/x86/clk-lpss.h b/include/linux/platform_data/x86/clk-lpss.h index 41df326583f9..7f132029316a 100644 --- a/include/linux/platform_data/x86/clk-lpss.h +++ b/include/linux/platform_data/x86/clk-lpss.h @@ -15,6 +15,6 @@ struct lpss_clk_data { struct clk *clk; }; -extern int lpss_atom_clk_init(void); +int lpss_atom_clk_init(void); #endif /* __CLK_LPSS_H */ diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index b8a701c77fd0..161e4bc1c9ee 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -43,6 +43,19 @@ BIT_ORED_DEDICATED_IRQ_GPSC | \ BIT_SHARED_IRQ_GPSS) +/* External clk generator settings */ +#define PMC_CLK_CTL_OFFSET 0x60 +#define PMC_CLK_CTL_SIZE 4 +#define PMC_CLK_NUM 6 +#define PMC_CLK_CTL_GATED_ON_D3 0x0 +#define PMC_CLK_CTL_FORCE_ON 0x1 +#define PMC_CLK_CTL_FORCE_OFF 0x2 +#define PMC_CLK_CTL_RESERVED 0x3 +#define PMC_MASK_CLK_CTL GENMASK(1, 0) +#define PMC_MASK_CLK_FREQ BIT(2) +#define PMC_CLK_FREQ_XTAL (0 << 2) /* 25 MHz */ +#define PMC_CLK_FREQ_PLL (1 << 2) /* 19.2 MHz */ + /* The timers accumulate time spent in sleep state */ #define PMC_S0IR_TMR 0x80 #define PMC_S0I1_TMR 0x84 @@ -104,14 +117,14 @@ #define BIT_SCC_SDIO BIT(9) #define BIT_SCC_SDCARD BIT(10) #define BIT_SCC_MIPI BIT(11) -#define BIT_HDA BIT(12) +#define BIT_HDA BIT(12) /* CHT datasheet: reserved */ #define BIT_LPE BIT(13) #define BIT_OTG BIT(14) -#define BIT_USH BIT(15) -#define BIT_GBE BIT(16) -#define BIT_SATA BIT(17) -#define BIT_USB_EHCI BIT(18) -#define BIT_SEC BIT(19) +#define BIT_USH BIT(15) /* CHT datasheet: reserved */ +#define BIT_GBE BIT(16) /* CHT datasheet: reserved */ +#define BIT_SATA BIT(17) /* CHT datasheet: reserved */ +#define BIT_USB_EHCI BIT(18) /* CHT datasheet: XHCI! */ +#define BIT_SEC BIT(19) /* BYT datasheet: reserved */ #define BIT_PCIE_PORT0 BIT(20) #define BIT_PCIE_PORT1 BIT(21) #define BIT_PCIE_PORT2 BIT(22) diff --git a/include/linux/platform_data/x86/pwm-lpss.h b/include/linux/platform_data/x86/pwm-lpss.h index c852fe24fe2a..752c06b47cc8 100644 --- a/include/linux/platform_data/x86/pwm-lpss.h +++ b/include/linux/platform_data/x86/pwm-lpss.h @@ -27,7 +27,7 @@ struct pwm_lpss_boardinfo { bool other_devices_aml_touches_pwm_regs; }; -struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, - const struct pwm_lpss_boardinfo *info); +struct pwm_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, + const struct pwm_lpss_boardinfo *info); #endif /* __PLATFORM_DATA_X86_PWM_LPSS_H */ diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h index 57d6a10dfc9e..2d7f7120ba6b 100644 --- a/include/linux/platform_data/x86/simatic-ipc-base.h +++ b/include/linux/platform_data/x86/simatic-ipc-base.h @@ -2,7 +2,7 @@ /* * Siemens SIMATIC IPC drivers * - * Copyright (c) Siemens AG, 2018-2021 + * Copyright (c) Siemens AG, 2018-2023 * * Authors: * Henning Schild <henning.schild@siemens.com> @@ -20,6 +20,9 @@ #define SIMATIC_IPC_DEVICE_127E 3 #define SIMATIC_IPC_DEVICE_227E 4 #define SIMATIC_IPC_DEVICE_227G 5 +#define SIMATIC_IPC_DEVICE_BX_21A 6 +#define SIMATIC_IPC_DEVICE_BX_39A 7 +#define SIMATIC_IPC_DEVICE_BX_59A 8 struct simatic_ipc_platform { u8 devmode; diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h index a48bb5240977..8d8b3b919674 100644 --- a/include/linux/platform_data/x86/simatic-ipc.h +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -2,7 +2,7 @@ /* * Siemens SIMATIC IPC drivers * - * Copyright (c) Siemens AG, 2018-2021 + * Copyright (c) Siemens AG, 2018-2023 * * Authors: * Henning Schild <henning.schild@siemens.com> @@ -32,8 +32,12 @@ enum simatic_ipc_station_ids { SIMATIC_IPC_IPC477E = 0x00000A02, SIMATIC_IPC_IPC127E = 0x00000D01, SIMATIC_IPC_IPC227G = 0x00000F01, + SIMATIC_IPC_IPC277G = 0x00000F02, SIMATIC_IPC_IPCBX_39A = 0x00001001, SIMATIC_IPC_IPCPX_39A = 0x00001002, + SIMATIC_IPC_IPCBX_21A = 0x00001101, + SIMATIC_IPC_IPCBX_56A = 0x00001201, + SIMATIC_IPC_IPCBX_59A = 0x00001202, }; static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index b845fd83f429..7a41c72c1959 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -63,6 +63,8 @@ extern struct resource *platform_get_mem_or_io(struct platform_device *, extern struct device * platform_find_device_by_driver(struct device *start, const struct device_driver *drv); + +#ifdef CONFIG_HAS_IOMEM extern void __iomem * devm_platform_get_and_ioremap_resource(struct platform_device *pdev, unsigned int index, struct resource **res); @@ -72,6 +74,32 @@ devm_platform_ioremap_resource(struct platform_device *pdev, extern void __iomem * devm_platform_ioremap_resource_byname(struct platform_device *pdev, const char *name); +#else + +static inline void __iomem * +devm_platform_get_and_ioremap_resource(struct platform_device *pdev, + unsigned int index, struct resource **res) +{ + return ERR_PTR(-EINVAL); +} + + +static inline void __iomem * +devm_platform_ioremap_resource(struct platform_device *pdev, + unsigned int index) +{ + return ERR_PTR(-EINVAL); +} + +static inline void __iomem * +devm_platform_ioremap_resource_byname(struct platform_device *pdev, + const char *name) +{ + return ERR_PTR(-EINVAL); +} + +#endif + extern int platform_get_irq(struct platform_device *, unsigned int); extern int platform_get_irq_optional(struct platform_device *, unsigned int); extern int platform_irq_count(struct platform_device *); diff --git a/include/linux/plist.h b/include/linux/plist.h index 0f352c1d3c80..8c1c8adf7fe9 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -75,20 +75,10 @@ #include <linux/container_of.h> #include <linux/list.h> -#include <linux/types.h> +#include <linux/plist_types.h> #include <asm/bug.h> -struct plist_head { - struct list_head node_list; -}; - -struct plist_node { - int prio; - struct list_head prio_list; - struct list_head node_list; -}; - /** * PLIST_HEAD_INIT - static struct plist_head initializer * @head: struct plist_head variable name diff --git a/include/linux/plist_types.h b/include/linux/plist_types.h new file mode 100644 index 000000000000..c37e784330af --- /dev/null +++ b/include/linux/plist_types.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_PLIST_TYPES_H +#define _LINUX_PLIST_TYPES_H + +#include <linux/types.h> + +struct plist_head { + struct list_head node_list; +}; + +struct plist_node { + int prio; + struct list_head prio_list; + struct list_head node_list; +}; + +#endif /* _LINUX_PLIST_TYPES_H */ diff --git a/include/linux/pm.h b/include/linux/pm.h index badad7d11f4f..97b0e23363c8 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -374,24 +374,39 @@ const struct dev_pm_ops name = { \ RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \ } -#ifdef CONFIG_PM -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ +#define _EXPORT_PM_OPS(name, license, ns) \ const struct dev_pm_ops name; \ __EXPORT_SYMBOL(name, license, ns); \ const struct dev_pm_ops name -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) -#else -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ + +#define _DISCARD_PM_OPS(name, license, ns) \ static __maybe_unused const struct dev_pm_ops __static_##name + +#ifdef CONFIG_PM +#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) +#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) +#else +#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) #define EXPORT_PM_FN_GPL(name) #define EXPORT_PM_FN_NS_GPL(name, ns) #endif -#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") -#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") -#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) -#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) +#ifdef CONFIG_PM_SLEEP +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#else +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) +#endif + +#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) + +#define EXPORT_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", #ns) /* * Use this if you want to use the same suspend and resume callbacks for suspend @@ -404,19 +419,19 @@ const struct dev_pm_ops name = { \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_DEV_PM_OPS(name) = { \ + EXPORT_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_GPL_DEV_PM_OPS(name) = { \ + EXPORT_GPL_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } @@ -448,6 +463,15 @@ const struct dev_pm_ops __maybe_unused name = { \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } +/* + * Use this if you want to have the suspend and resume callbacks be called + * with IRQs disabled. + */ +#define DEFINE_NOIRQ_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +const struct dev_pm_ops name = { \ + NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +} + #define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr)) #define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr)) @@ -638,8 +662,8 @@ struct pm_subsys_data { struct dev_pm_info { pm_message_t power_state; - unsigned int can_wakeup:1; - unsigned int async_suspend:1; + bool can_wakeup:1; + bool async_suspend:1; bool in_dpm_list:1; /* Owned by the PM core */ bool is_prepared:1; /* Owned by the PM core */ bool is_suspended:1; /* Ditto */ @@ -657,10 +681,11 @@ struct dev_pm_info { bool wakeup_path:1; bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ - unsigned int must_resume:1; /* Owned by the PM core */ - unsigned int may_skip_resume:1; /* Set by subsystems */ + bool async_in_progress:1; /* Owned by the PM core */ + bool must_resume:1; /* Owned by the PM core */ + bool may_skip_resume:1; /* Set by subsystems */ #else - unsigned int should_wakeup:1; + bool should_wakeup:1; #endif #ifdef CONFIG_PM struct hrtimer suspend_timer; @@ -671,17 +696,17 @@ struct dev_pm_info { atomic_t usage_count; atomic_t child_count; unsigned int disable_depth:3; - unsigned int idle_notification:1; - unsigned int request_pending:1; - unsigned int deferred_resume:1; - unsigned int needs_force_resume:1; - unsigned int runtime_auto:1; + bool idle_notification:1; + bool request_pending:1; + bool deferred_resume:1; + bool needs_force_resume:1; + bool runtime_auto:1; bool ignore_children:1; - unsigned int no_callbacks:1; - unsigned int irq_safe:1; - unsigned int use_autosuspend:1; - unsigned int timer_autosuspends:1; - unsigned int memalloc_noio:1; + bool no_callbacks:1; + bool irq_safe:1; + bool use_autosuspend:1; + bool timer_autosuspends:1; + bool memalloc_noio:1; unsigned int links_count; enum rpm_request request; enum rpm_status runtime_status; @@ -710,6 +735,7 @@ extern void dev_pm_put_subsys_data(struct device *dev); * @activate: Called before executing probe routines for bus types and drivers. * @sync: Called after successful driver probe. * @dismiss: Called after unsuccessful driver probe and after driver removal. + * @set_performance_state: Called to request a new performance state. * * Power domains provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions instead of @@ -722,6 +748,7 @@ struct dev_pm_domain { int (*activate)(struct device *dev); void (*sync)(struct device *dev); void (*dismiss)(struct device *dev); + int (*set_performance_state)(struct device *dev, unsigned int state); }; /* diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index ada3a0ab10bf..68669ce18720 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -91,10 +91,10 @@ static inline int devm_pm_clk_create(struct device *dev) #endif #ifdef CONFIG_HAVE_CLK -extern void pm_clk_add_notifier(struct bus_type *bus, +extern void pm_clk_add_notifier(const struct bus_type *bus, struct pm_clk_notifier_block *clknb); #else -static inline void pm_clk_add_notifier(struct bus_type *bus, +static inline void pm_clk_add_notifier(const struct bus_type *bus, struct pm_clk_notifier_block *clknb) { } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f776fb93eaa0..772d3280d35f 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -20,6 +20,33 @@ #include <linux/time64.h> /* + * Flags to control the behaviour when attaching a device to its PM domains. + * + * PD_FLAG_NO_DEV_LINK: As the default behaviour creates a device-link + * for every PM domain that gets attached, this + * flag can be used to skip that. + * + * PD_FLAG_DEV_LINK_ON: Add the DL_FLAG_RPM_ACTIVE to power-on the + * supplier and its PM domain when creating the + * device-links. + * + */ +#define PD_FLAG_NO_DEV_LINK BIT(0) +#define PD_FLAG_DEV_LINK_ON BIT(1) + +struct dev_pm_domain_attach_data { + const char * const *pd_names; + const u32 num_pd_names; + const u32 pd_flags; +}; + +struct dev_pm_domain_list { + struct device **pd_devs; + struct device_link **pd_links; + u32 num_pds; +}; + +/* * Flags to control the behaviour of a genpd. * * These flags may be set in the struct generic_pm_domain's flags field by a @@ -61,6 +88,10 @@ * GENPD_FLAG_MIN_RESIDENCY: Enable the genpd governor to consider its * components' next wakeup when determining the * optimal idle state. + * + * GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states, + * but its corresponding OPP tables are not + * described in DT, but are given directly by FW. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -69,6 +100,7 @@ #define GENPD_FLAG_CPU_DOMAIN (1U << 4) #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) #define GENPD_FLAG_MIN_RESIDENCY (1U << 6) +#define GENPD_FLAG_OPP_TABLE_FW (1U << 7) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ @@ -113,7 +145,6 @@ struct genpd_power_state { }; struct genpd_lock_ops; -struct dev_pm_opp; struct opp_table; struct generic_pm_domain { @@ -141,8 +172,6 @@ struct generic_pm_domain { int (*power_on)(struct generic_pm_domain *domain); struct raw_notifier_head power_notifiers; /* Power on/off notifiers */ struct opp_table *opp_table; /* OPP table of the genpd */ - unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd, - struct dev_pm_opp *opp); int (*set_performance_state)(struct generic_pm_domain *genpd, unsigned int state); struct gpd_dev_ops dev_ops; @@ -320,7 +349,7 @@ static inline void dev_pm_genpd_resume(struct device *dev) {} /* OF PM domain providers */ struct of_device_id; -typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args, +typedef struct generic_pm_domain *(*genpd_xlate_t)(const struct of_phandle_args *args, void *data); struct genpd_onecell_data { @@ -335,16 +364,14 @@ int of_genpd_add_provider_simple(struct device_node *np, int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data); void of_genpd_del_provider(struct device_node *np); -int of_genpd_add_device(struct of_phandle_args *args, struct device *dev); -int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec); -int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec); +int of_genpd_add_device(const struct of_phandle_args *args, struct device *dev); +int of_genpd_add_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec); +int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec); struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); -unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev, - struct dev_pm_opp *opp); int genpd_dev_pm_attach(struct device *dev); struct device *genpd_dev_pm_attach_by_id(struct device *dev, @@ -366,20 +393,20 @@ static inline int of_genpd_add_provider_onecell(struct device_node *np, static inline void of_genpd_del_provider(struct device_node *np) {} -static inline int of_genpd_add_device(struct of_phandle_args *args, +static inline int of_genpd_add_device(const struct of_phandle_args *args, struct device *dev) { return -ENODEV; } -static inline int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec) +static inline int of_genpd_add_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec) { return -ENODEV; } -static inline int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec) +static inline int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec) { return -ENODEV; } @@ -390,13 +417,6 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn, return -ENODEV; } -static inline unsigned int -pm_genpd_opp_to_performance_state(struct device *genpd_dev, - struct dev_pm_opp *opp) -{ - return 0; -} - static inline int genpd_dev_pm_attach(struct device *dev) { return 0; @@ -427,9 +447,14 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev, unsigned int index); struct device *dev_pm_domain_attach_by_name(struct device *dev, const char *name); +int dev_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list); void dev_pm_domain_detach(struct device *dev, bool power_off); +void dev_pm_domain_detach_list(struct dev_pm_domain_list *list); int dev_pm_domain_start(struct device *dev); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); +int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state); #else static inline int dev_pm_domain_attach(struct device *dev, bool power_on) { @@ -445,13 +470,25 @@ static inline struct device *dev_pm_domain_attach_by_name(struct device *dev, { return NULL; } +static inline int dev_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list) +{ + return 0; +} static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} +static inline void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) {} static inline int dev_pm_domain_start(struct device *dev) { return 0; } static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} +static inline int dev_pm_domain_set_performance_state(struct device *dev, + unsigned int state) +{ + return 0; +} #endif #endif /* _LINUX_PM_DOMAIN_H */ diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index dc1fb5890792..065a47382302 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -16,6 +16,7 @@ #include <linux/notifier.h> struct clk; +struct cpufreq_frequency_table; struct regulator; struct dev_pm_opp; struct device; @@ -45,18 +46,6 @@ struct dev_pm_opp_supply { unsigned long u_watt; }; -/** - * struct dev_pm_opp_icc_bw - Interconnect bandwidth values - * @avg: Average bandwidth corresponding to this OPP (in icc units) - * @peak: Peak bandwidth corresponding to this OPP (in icc units) - * - * This structure stores the bandwidth values for a single interconnect path. - */ -struct dev_pm_opp_icc_bw { - u32 avg; - u32 peak; -}; - typedef int (*config_regulators_t)(struct device *dev, struct dev_pm_opp *old_opp, struct dev_pm_opp *new_opp, struct regulator **regulators, unsigned int count); @@ -74,8 +63,10 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. - * @virt_devs: Pointer to return the array of virtual devices. + * attach. Mutually exclusive with required_devs. + * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually + * exclusive with required_devs. + * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs. * * This structure contains platform specific OPP configurations for the device. */ @@ -90,6 +81,24 @@ struct dev_pm_opp_config { const char * const *regulator_names; const char * const *genpd_names; struct device ***virt_devs; + struct device **required_devs; +}; + +#define OPP_LEVEL_UNSET U32_MAX + +/** + * struct dev_pm_opp_data - The data to use to initialize an OPP. + * @turbo: Flag to indicate whether the OPP is to be marked turbo or not. + * @level: The performance level for the OPP. Set level to OPP_LEVEL_UNSET if + * level field isn't used. + * @freq: The clock rate in Hz for the OPP. + * @u_volt: The voltage in uV for the OPP. + */ +struct dev_pm_opp_data { + bool turbo; + unsigned int level; + unsigned long freq; + unsigned long u_volt; }; #if defined(CONFIG_PM_OPP) @@ -103,7 +112,7 @@ int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *su unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp); -unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); +unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index); unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp); @@ -121,16 +130,31 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); + +struct dev_pm_opp * +dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, + u32 index, bool available); + struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_freq_floor_indexed(struct device *dev, + unsigned long *freq, u32 index); + +struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, + unsigned long *freq); + +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_indexed(struct device *dev, + unsigned long *freq, u32 index); + struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, unsigned int level); + struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, unsigned int *level); -struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, - unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned int *level); struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index); @@ -140,8 +164,8 @@ struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, void dev_pm_opp_put(struct dev_pm_opp *opp); -int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt); +int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); + void dev_pm_opp_remove(struct device *dev, unsigned long freq); void dev_pm_opp_remove_all_dynamic(struct device *dev); @@ -200,7 +224,7 @@ static inline unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp) return 0; } -static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) +static inline unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index) { return 0; } @@ -247,26 +271,27 @@ static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev) return 0; } -static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, - unsigned int level) +static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, + unsigned long freq, bool available) { return ERR_PTR(-EOPNOTSUPP); } -static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, - unsigned int *level) +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, + u32 index, bool available) { return ERR_PTR(-EOPNOTSUPP); } -static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, - unsigned long freq, bool available) +static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, + unsigned long *freq) { return ERR_PTR(-EOPNOTSUPP); } -static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, - unsigned long *freq) +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq, u32 index) { return ERR_PTR(-EOPNOTSUPP); } @@ -277,6 +302,30 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp * +dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq, u32 index) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, + unsigned int level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, + unsigned int *level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned int *level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index) { @@ -291,8 +340,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} -static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt) +static inline int +dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp) { return -EOPNOTSUPP; } @@ -398,6 +447,21 @@ static inline int dev_pm_opp_sync_regulators(struct device *dev) #endif /* CONFIG_PM_OPP */ +#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) +int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table); +void dev_pm_opp_free_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table); +#else +static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table) +{ +} +#endif + + #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int dev_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_add_table_indexed(struct device *dev, int index); @@ -488,6 +552,17 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta /* OPP Configuration helpers */ +static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, + unsigned long u_volt) +{ + struct dev_pm_opp_data data = { + .freq = freq, + .u_volt = u_volt, + }; + + return dev_pm_opp_add_dynamic(dev, &data); +} + /* Regulators helpers */ static inline int dev_pm_opp_set_regulators(struct device *dev, const char * const names[]) @@ -631,4 +706,9 @@ static inline void dev_pm_opp_put_prop_name(int token) dev_pm_opp_clear_config(token); } +static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) +{ + return dev_pm_opp_get_freq_indexed(opp, 0); +} + #endif /* __LINUX_OPP_H__ */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9a8151a2bdea..d39dc863f612 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -72,7 +72,8 @@ extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); -extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count); +extern int pm_runtime_get_if_active(struct device *dev); +extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); @@ -85,8 +86,6 @@ extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern u64 pm_runtime_autosuspend_expiration(struct device *dev); -extern void pm_runtime_update_max_time_suspended(struct device *dev, - s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); @@ -97,18 +96,6 @@ extern void pm_runtime_release_supplier(struct device_link *link); extern int devm_pm_runtime_enable(struct device *dev); /** - * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. - * @dev: Target device. - * - * Increment the runtime PM usage counter of @dev if its runtime PM status is - * %RPM_ACTIVE and its runtime PM usage counter is greater than 0. - */ -static inline int pm_runtime_get_if_in_use(struct device *dev) -{ - return pm_runtime_get_if_active(dev, false); -} - -/** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. * @dev: Target device. * @enable: Whether or not to ignore possible dependencies on children. @@ -277,8 +264,7 @@ static inline int pm_runtime_get_if_in_use(struct device *dev) { return -EINVAL; } -static inline int pm_runtime_get_if_active(struct device *dev, - bool ign_usage_count) +static inline int pm_runtime_get_if_active(struct device *dev) { return -EINVAL; } @@ -463,6 +449,18 @@ static inline int pm_runtime_put(struct device *dev) } /** + * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. + * @dev: Target device. + * + * Decrement the runtime PM usage counter of @dev and if it turns out to be + * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + */ +static inline int __pm_runtime_put_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); +} + +/** * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. * diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 77f4849e3418..6eb9adaef52b 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -194,6 +194,16 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec, #endif /* !CONFIG_PM_SLEEP */ +static inline bool device_awake_path(struct device *dev) +{ + return device_wakeup_path(dev); +} + +static inline void device_set_awake_path(struct device *dev) +{ + device_set_wakeup_path(dev); +} + static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { return pm_wakeup_ws_event(ws, msec, false); diff --git a/include/linux/pnp.h b/include/linux/pnp.h index c2a7cfbca713..ddbe7c3ca4ce 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -291,7 +291,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data) struct pnp_fixup { char id[7]; - void (*quirk_function) (struct pnp_dev * dev); /* fixup function */ + void (*quirk_function) (struct pnp_dev *dev); /* fixup function */ }; /* config parameters */ @@ -419,8 +419,8 @@ struct pnp_protocol { /* protocol specific suspend/resume */ bool (*can_wakeup) (struct pnp_dev *dev); - int (*suspend) (struct pnp_dev * dev, pm_message_t state); - int (*resume) (struct pnp_dev * dev); + int (*suspend) (struct pnp_dev *dev, pm_message_t state); + int (*resume) (struct pnp_dev *dev); /* used by pnp layer only (look but don't touch) */ unsigned char number; /* protocol number */ @@ -435,7 +435,7 @@ struct pnp_protocol { #define protocol_for_each_dev(protocol, dev) \ list_for_each_entry(dev, &(protocol)->devices, protocol_list) -extern struct bus_type pnp_bus_type; +extern const struct bus_type pnp_bus_type; #if defined(CONFIG_PNP) @@ -492,7 +492,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } -static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;} +static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0; } /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } diff --git a/include/linux/poison.h b/include/linux/poison.h index 851a855d3868..1f0ee2459f2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -83,6 +83,8 @@ /********** net/core/skbuff.c **********/ #define SKB_LIST_POISON_NEXT ((void *)(0x800 + POISON_POINTER_DELTA)) +/********** net/ **********/ +#define NET_PTR_POISON ((void *)(0x801 + POISON_POINTER_DELTA)) /********** kernel/bpf/ **********/ #define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA)) @@ -90,4 +92,7 @@ /********** VFS **********/ #define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) +/********** lib/stackdepot.c **********/ +#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA)) + #endif diff --git a/include/linux/poll.h b/include/linux/poll.h index a9e0e1c2d1f2..d1ea4f3714a8 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -14,11 +14,7 @@ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ -#ifdef __clang__ -#define MAX_STACK_ALLOC 768 -#else #define MAX_STACK_ALLOC 832 -#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 468328b1e1dd..ef8619f48920 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -14,6 +14,7 @@ #include <linux/rwsem.h> struct posix_clock; +struct posix_clock_context; /** * struct posix_clock_operations - functional interface to the clock @@ -50,18 +51,18 @@ struct posix_clock_operations { /* * Optional character device methods: */ - long (*ioctl) (struct posix_clock *pc, - unsigned int cmd, unsigned long arg); + long (*ioctl)(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg); - int (*open) (struct posix_clock *pc, fmode_t f_mode); + int (*open)(struct posix_clock_context *pccontext, fmode_t f_mode); - __poll_t (*poll) (struct posix_clock *pc, - struct file *file, poll_table *wait); + __poll_t (*poll)(struct posix_clock_context *pccontext, struct file *file, + poll_table *wait); - int (*release) (struct posix_clock *pc); + int (*release)(struct posix_clock_context *pccontext); - ssize_t (*read) (struct posix_clock *pc, - uint flags, char __user *buf, size_t cnt); + ssize_t (*read)(struct posix_clock_context *pccontext, uint flags, + char __user *buf, size_t cnt); }; /** @@ -91,6 +92,24 @@ struct posix_clock { }; /** + * struct posix_clock_context - represents clock file operations context + * + * @clk: Pointer to the clock + * @private_clkdata: Pointer to user data + * + * Drivers should use struct posix_clock_context during specific character + * device file operation methods to access the posix clock. + * + * Drivers can store a private data structure during the open operation + * if they have specific information that is required in other file + * operations. + */ +struct posix_clock_context { + struct posix_clock *clk; + void *private_clkdata; +}; + +/** * posix_clock_register() - register a new clock * @clk: Pointer to the clock. Caller must provide 'ops' field * @dev: Pointer to the initialized device. Caller must provide diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index d607f51404fc..dc7b738de299 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -2,40 +2,16 @@ #ifndef _linux_POSIX_TIMERS_H #define _linux_POSIX_TIMERS_H -#include <linux/spinlock.h> +#include <linux/alarmtimer.h> #include <linux/list.h> #include <linux/mutex.h> -#include <linux/alarmtimer.h> +#include <linux/posix-timers_types.h> +#include <linux/spinlock.h> #include <linux/timerqueue.h> struct kernel_siginfo; struct task_struct; -/* - * Bit fields within a clockid: - * - * The most significant 29 bits hold either a pid or a file descriptor. - * - * Bit 2 indicates whether a cpu clock refers to a thread or a process. - * - * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3. - * - * A clockid is invalid if bits 2, 1, and 0 are all set. - */ -#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) -#define CPUCLOCK_PERTHREAD(clock) \ - (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) - -#define CPUCLOCK_PERTHREAD_MASK 4 -#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) -#define CPUCLOCK_CLOCK_MASK 3 -#define CPUCLOCK_PROF 0 -#define CPUCLOCK_VIRT 1 -#define CPUCLOCK_SCHED 2 -#define CPUCLOCK_MAX 3 -#define CLOCKFD CPUCLOCK_MAX -#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) - static inline clockid_t make_process_cpuclock(const unsigned int pid, const clockid_t clock) { @@ -109,44 +85,6 @@ static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp) ctmr->node.expires = exp; } -/** - * posix_cputimer_base - Container per posix CPU clock - * @nextevt: Earliest-expiration cache - * @tqhead: timerqueue head for cpu_timers - */ -struct posix_cputimer_base { - u64 nextevt; - struct timerqueue_head tqhead; -}; - -/** - * posix_cputimers - Container for posix CPU timer related data - * @bases: Base container for posix CPU clocks - * @timers_active: Timers are queued. - * @expiry_active: Timer expiry is active. Used for - * process wide timers to avoid multiple - * task trying to handle expiry concurrently - * - * Used in task_struct and signal_struct - */ -struct posix_cputimers { - struct posix_cputimer_base bases[CPUCLOCK_MAX]; - unsigned int timers_active; - unsigned int expiry_active; -}; - -/** - * posix_cputimers_work - Container for task work based posix CPU timer expiry - * @work: The task work to be scheduled - * @mutex: Mutex held around expiry in context of this task work - * @scheduled: @work has been scheduled already, no further processing - */ -struct posix_cputimers_work { - struct callback_head work; - struct mutex mutex; - unsigned int scheduled; -}; - static inline void posix_cputimers_init(struct posix_cputimers *pct) { memset(pct, 0, sizeof(*pct)); @@ -179,7 +117,6 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, .bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \ }, #else -struct posix_cputimers { }; struct cpu_timer { }; #define INIT_CPU_TIMERS(s) static inline void posix_cputimers_init(struct posix_cputimers *pct) { } diff --git a/include/linux/posix-timers_types.h b/include/linux/posix-timers_types.h new file mode 100644 index 000000000000..a4712c1008c9 --- /dev/null +++ b/include/linux/posix-timers_types.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _linux_POSIX_TIMERS_TYPES_H +#define _linux_POSIX_TIMERS_TYPES_H + +#include <linux/mutex_types.h> +#include <linux/timerqueue_types.h> +#include <linux/types.h> + +/* + * Bit fields within a clockid: + * + * The most significant 29 bits hold either a pid or a file descriptor. + * + * Bit 2 indicates whether a cpu clock refers to a thread or a process. + * + * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3. + * + * A clockid is invalid if bits 2, 1, and 0 are all set. + */ +#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) +#define CPUCLOCK_PERTHREAD(clock) \ + (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) + +#define CPUCLOCK_PERTHREAD_MASK 4 +#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) +#define CPUCLOCK_CLOCK_MASK 3 +#define CPUCLOCK_PROF 0 +#define CPUCLOCK_VIRT 1 +#define CPUCLOCK_SCHED 2 +#define CPUCLOCK_MAX 3 +#define CLOCKFD CPUCLOCK_MAX +#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) + +#ifdef CONFIG_POSIX_TIMERS + +/** + * posix_cputimer_base - Container per posix CPU clock + * @nextevt: Earliest-expiration cache + * @tqhead: timerqueue head for cpu_timers + */ +struct posix_cputimer_base { + u64 nextevt; + struct timerqueue_head tqhead; +}; + +/** + * posix_cputimers - Container for posix CPU timer related data + * @bases: Base container for posix CPU clocks + * @timers_active: Timers are queued. + * @expiry_active: Timer expiry is active. Used for + * process wide timers to avoid multiple + * task trying to handle expiry concurrently + * + * Used in task_struct and signal_struct + */ +struct posix_cputimers { + struct posix_cputimer_base bases[CPUCLOCK_MAX]; + unsigned int timers_active; + unsigned int expiry_active; +}; + +/** + * posix_cputimers_work - Container for task work based posix CPU timer expiry + * @work: The task work to be scheduled + * @mutex: Mutex held around expiry in context of this task work + * @scheduled: @work has been scheduled already, no further processing + */ +struct posix_cputimers_work { + struct callback_head work; + struct mutex mutex; + unsigned int scheduled; +}; + +#else /* CONFIG_POSIX_TIMERS */ + +struct posix_cputimers { }; + +#endif /* CONFIG_POSIX_TIMERS */ + +#endif /* _linux_POSIX_TIMERS_TYPES_H */ diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 7c8d65414a70..b9e5bd2b42d3 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -61,7 +61,6 @@ struct bq27xxx_reg_cache { struct bq27xxx_device_info { struct device *dev; - int id; enum bq27xxx_chip chip; u32 opts; const char *name; @@ -83,5 +82,6 @@ struct bq27xxx_device_info { void bq27xxx_battery_update(struct bq27xxx_device_info *di); int bq27xxx_battery_setup(struct bq27xxx_device_info *di); void bq27xxx_battery_teardown(struct bq27xxx_device_info *di); +extern const struct dev_pm_ops bq27xxx_battery_battery_pm_ops; #endif diff --git a/include/linux/power/power_on_reason.h b/include/linux/power/power_on_reason.h new file mode 100644 index 000000000000..95a1ec0c403c --- /dev/null +++ b/include/linux/power/power_on_reason.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Author: Kamel Bouhra <kamel.bouhara@bootlin.com> + */ + +#ifndef POWER_ON_REASON_H +#define POWER_ON_REASON_H + +#define POWER_ON_REASON_REGULAR "regular power-up" +#define POWER_ON_REASON_RTC "RTC wakeup" +#define POWER_ON_REASON_WATCHDOG "watchdog timeout" +#define POWER_ON_REASON_SOFTWARE "software reset" +#define POWER_ON_REASON_RST_BTN "reset button action" +#define POWER_ON_REASON_CPU_CLK_FAIL "CPU clock failure" +#define POWER_ON_REASON_XTAL_FAIL "crystal oscillator failure" +#define POWER_ON_REASON_BROWN_OUT "brown-out reset" +#define POWER_ON_REASON_UNKNOWN "unknown reason" + +#endif /* POWER_ON_REASON_H */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index a427f13c757f..8e5705a56b85 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -242,6 +242,7 @@ struct power_supply_config { struct power_supply_desc { const char *name; enum power_supply_type type; + u8 charge_behaviours; const enum power_supply_usb_type *usb_types; size_t num_usb_types; const enum power_supply_property *properties; @@ -767,7 +768,6 @@ struct power_supply_battery_info { int bti_resistance_tolerance; }; -extern struct atomic_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); #if IS_ENABLED(CONFIG_POWER_SUPPLY) @@ -895,8 +895,7 @@ extern int power_supply_powers(struct power_supply *psy, struct device *dev); #define to_power_supply(device) container_of(device, struct power_supply, dev) extern void *power_supply_get_drvdata(struct power_supply *psy); -/* For APM emulation, think legacy userspace. */ -extern struct class *power_supply_class; +extern int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data)); static inline bool power_supply_is_amp_property(enum power_supply_property psp) { diff --git a/include/linux/prandom.h b/include/linux/prandom.h index f2ed5b72b3d6..f7f1e5251c67 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -10,7 +10,6 @@ #include <linux/types.h> #include <linux/once.h> -#include <linux/percpu.h> #include <linux/random.h> struct rnd_state { diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1424670df161..7233e9cf1bab 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -9,7 +9,7 @@ #include <linux/linkage.h> #include <linux/cleanup.h> -#include <linux/list.h> +#include <linux/types.h> /* * We put the hardirq and softirq counter into the preemption @@ -99,14 +99,21 @@ static __always_inline unsigned char interrupt_context_level(void) return level; } +/* + * These macro definitions avoid redundant invocations of preempt_count() + * because such invocations would result in redundant loads given that + * preempt_count() is commonly implemented with READ_ONCE(). + */ + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT # define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +# define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count()) #else # define softirq_count() (preempt_count() & SOFTIRQ_MASK) +# define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK)) #endif -#define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* * Macros to retrieve the current execution context: @@ -119,7 +126,11 @@ static __always_inline unsigned char interrupt_context_level(void) #define in_nmi() (nmi_count()) #define in_hardirq() (hardirq_count()) #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) -#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq())) +#ifdef CONFIG_PREEMPT_RT +# define in_task() (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | in_serving_softirq())) +#else +# define in_task() (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#endif /* * The following macros are deprecated and should not be used in new code: @@ -349,7 +360,9 @@ void preempt_notifier_unregister(struct preempt_notifier *notifier); static inline void preempt_notifier_init(struct preempt_notifier *notifier, struct preempt_ops *ops) { - INIT_HLIST_NODE(¬ifier->link); + /* INIT_HLIST_NODE() open coded, to avoid dependency on list.h */ + notifier->link.next = NULL; + notifier->link.pprev = NULL; notifier->ops = ops; } diff --git a/include/linux/printk.h b/include/linux/printk.h index 8ef499ab3c1e..955e31860095 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -273,6 +273,8 @@ static inline void printk_trigger_flush(void) } #endif +bool this_cpu_in_panic(void); + #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 253f2676d93a..0b2a89854440 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -65,6 +65,7 @@ struct proc_fs_info { kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; + struct rcu_head rcu; }; static inline struct proc_fs_info *proc_sb_info(struct super_block *sb) @@ -159,6 +160,7 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, #endif /* CONFIG_PROC_PID_ARCH_STATUS */ void arch_report_meminfo(struct seq_file *m); +void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task); #else /* CONFIG_PROC_FS */ diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 49539bc416ce..5ea470eb4d76 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -66,7 +66,7 @@ static inline void proc_free_inum(unsigned int inum) {} static inline int ns_alloc_inum(struct ns_common *ns) { - atomic_long_set(&ns->stashed, 0); + WRITE_ONCE(ns->stashed, NULL); return proc_alloc_inum(&ns->inum); } diff --git a/include/linux/profile.h b/include/linux/profile.h index 11db1ec516e2..04ae5ebcb637 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -18,13 +18,8 @@ struct proc_dir_entry; struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) -void create_prof_cpu_mask(void); int create_proc_profile(void); #else -static inline void create_prof_cpu_mask(void) -{ -} - static inline int create_proc_profile(void) { return 0; diff --git a/include/linux/property.h b/include/linux/property.h index 8c3c6685a2ae..3a1045eb786c 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -10,6 +10,8 @@ #ifndef _LINUX_PROPERTY_H_ #define _LINUX_PROPERTY_H_ +#include <linux/args.h> +#include <linux/array_size.h> #include <linux/bits.h> #include <linux/fwnode.h> #include <linux/stddef.h> @@ -26,12 +28,6 @@ enum dev_prop_type { DEV_PROP_REF, }; -enum dev_dma_attr { - DEV_DMA_NOT_SUPPORTED, - DEV_DMA_NON_COHERENT, - DEV_DMA_COHERENT, -}; - const struct fwnode_handle *__dev_fwnode_const(const struct device *dev); struct fwnode_handle *__dev_fwnode(struct device *dev); #define dev_fwnode(dev) \ @@ -79,6 +75,16 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode, bool fwnode_device_is_available(const struct fwnode_handle *fwnode); +static inline bool fwnode_device_is_big_endian(const struct fwnode_handle *fwnode) +{ + if (fwnode_property_present(fwnode, "big-endian")) + return true; + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && + fwnode_property_present(fwnode, "native-endian")) + return true; + return false; +} + static inline bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char *compat) { @@ -86,6 +92,22 @@ bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char } /** + * device_is_big_endian - check if a device has BE registers + * @dev: Pointer to the struct device + * + * Returns: true if the device has a "big-endian" property, or if the kernel + * was compiled for BE *and* the device has a "native-endian" property. + * Returns false otherwise. + * + * Callers would nominally use ioread32be/iowrite32be if + * device_is_big_endian() == true, or readl/writel otherwise. + */ +static inline bool device_is_big_endian(const struct device *dev) +{ + return fwnode_device_is_big_endian(dev_fwnode(dev)); +} + +/** * device_is_compatible - match 'compatible' property of the device with a given string * @dev: Pointer to the struct device * @compat: The string to match 'compatible' property with @@ -97,6 +119,18 @@ static inline bool device_is_compatible(const struct device *dev, const char *co return fwnode_device_is_compatible(dev_fwnode(dev), compat); } +int fwnode_property_match_property_string(const struct fwnode_handle *fwnode, + const char *propname, + const char * const *array, size_t n); + +static inline +int device_property_match_property_string(const struct device *dev, + const char *propname, + const char * const *array, size_t n) +{ + return fwnode_property_match_property_string(dev_fwnode(dev), propname, array, n); +} + int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, @@ -108,6 +142,7 @@ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, const char *fwnode_get_name(const struct fwnode_handle *fwnode); const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode); +bool fwnode_name_eq(const struct fwnode_handle *fwnode, const char *name); struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode); @@ -116,11 +151,9 @@ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode); for (parent = fwnode_get_parent(fwnode); parent; \ parent = fwnode_get_next_parent(parent)) -struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwnode); unsigned int fwnode_count_parents(const struct fwnode_handle *fwn); struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwn, unsigned int depth); -bool fwnode_is_ancestor_of(const struct fwnode_handle *ancestor, const struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_child_node( const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_available_child_node( @@ -288,7 +321,7 @@ struct software_node_ref_args { #define SOFTWARE_NODE_REFERENCE(_ref_, ...) \ (const struct software_node_ref_args) { \ .node = _ref_, \ - .nargs = ARRAY_SIZE(((u64[]){ 0, ##__VA_ARGS__ })) - 1, \ + .nargs = COUNT_ARGS(__VA_ARGS__), \ .args = { __VA_ARGS__ }, \ } @@ -488,6 +521,13 @@ struct software_node { const struct property_entry *properties; }; +#define SOFTWARE_NODE(_name_, _properties_, _parent_) \ + (struct software_node) { \ + .name = _name_, \ + .properties = _properties_, \ + .parent = _parent_, \ + } + bool is_software_node(const struct fwnode_handle *fwnode); const struct software_node * to_software_node(const struct fwnode_handle *fwnode); diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index eceda1d1407a..730f77381d55 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,7 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; - const struct xattr_handler **xattr; + const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; }; diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h index 75da8f5f7ad8..c1dc87fc536b 100644 --- a/include/linux/psp-platform-access.h +++ b/include/linux/psp-platform-access.h @@ -8,6 +8,10 @@ enum psp_platform_access_msg { PSP_CMD_NONE = 0x0, PSP_I2C_REQ_BUS_CMD = 0x64, + PSP_DYNAMIC_BOOST_GET_NONCE, + PSP_DYNAMIC_BOOST_SET_UID, + PSP_DYNAMIC_BOOST_GET_PARAMETER, + PSP_DYNAMIC_BOOST_SET_PARAMETER, }; struct psp_req_buffer_hdr { diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 7fd17e82bab4..3705c2044fc0 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -78,6 +78,36 @@ enum sev_cmd { SEV_CMD_DBG_DECRYPT = 0x060, SEV_CMD_DBG_ENCRYPT = 0x061, + /* SNP specific commands */ + SEV_CMD_SNP_INIT = 0x081, + SEV_CMD_SNP_SHUTDOWN = 0x082, + SEV_CMD_SNP_PLATFORM_STATUS = 0x083, + SEV_CMD_SNP_DF_FLUSH = 0x084, + SEV_CMD_SNP_INIT_EX = 0x085, + SEV_CMD_SNP_SHUTDOWN_EX = 0x086, + SEV_CMD_SNP_DECOMMISSION = 0x090, + SEV_CMD_SNP_ACTIVATE = 0x091, + SEV_CMD_SNP_GUEST_STATUS = 0x092, + SEV_CMD_SNP_GCTX_CREATE = 0x093, + SEV_CMD_SNP_GUEST_REQUEST = 0x094, + SEV_CMD_SNP_ACTIVATE_EX = 0x095, + SEV_CMD_SNP_LAUNCH_START = 0x0A0, + SEV_CMD_SNP_LAUNCH_UPDATE = 0x0A1, + SEV_CMD_SNP_LAUNCH_FINISH = 0x0A2, + SEV_CMD_SNP_DBG_DECRYPT = 0x0B0, + SEV_CMD_SNP_DBG_ENCRYPT = 0x0B1, + SEV_CMD_SNP_PAGE_SWAP_OUT = 0x0C0, + SEV_CMD_SNP_PAGE_SWAP_IN = 0x0C1, + SEV_CMD_SNP_PAGE_MOVE = 0x0C2, + SEV_CMD_SNP_PAGE_MD_INIT = 0x0C3, + SEV_CMD_SNP_PAGE_SET_STATE = 0x0C6, + SEV_CMD_SNP_PAGE_RECLAIM = 0x0C7, + SEV_CMD_SNP_PAGE_UNSMASH = 0x0C8, + SEV_CMD_SNP_CONFIG = 0x0C9, + SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA, + SEV_CMD_SNP_COMMIT = 0x0CB, + SEV_CMD_SNP_VLEK_LOAD = 0x0CD, + SEV_CMD_MAX, }; @@ -523,12 +553,269 @@ struct sev_data_attestation_report { u32 len; /* In/Out */ } __packed; +/** + * struct sev_data_snp_download_firmware - SNP_DOWNLOAD_FIRMWARE command params + * + * @address: physical address of firmware image + * @len: length of the firmware image + */ +struct sev_data_snp_download_firmware { + u64 address; /* In */ + u32 len; /* In */ +} __packed; + +/** + * struct sev_data_snp_activate - SNP_ACTIVATE command params + * + * @gctx_paddr: system physical address guest context page + * @asid: ASID to bind to the guest + */ +struct sev_data_snp_activate { + u64 gctx_paddr; /* In */ + u32 asid; /* In */ +} __packed; + +/** + * struct sev_data_snp_addr - generic SNP command params + * + * @address: physical address of generic data param + */ +struct sev_data_snp_addr { + u64 address; /* In/Out */ +} __packed; + +/** + * struct sev_data_snp_launch_start - SNP_LAUNCH_START command params + * + * @gctx_paddr: system physical address of guest context page + * @policy: guest policy + * @ma_gctx_paddr: system physical address of migration agent + * @ma_en: the guest is associated with a migration agent + * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the + * purpose of guest-assisted migration. + * @rsvd: reserved + * @gosvw: guest OS-visible workarounds, as defined by hypervisor + */ +struct sev_data_snp_launch_start { + u64 gctx_paddr; /* In */ + u64 policy; /* In */ + u64 ma_gctx_paddr; /* In */ + u32 ma_en:1; /* In */ + u32 imi_en:1; /* In */ + u32 rsvd:30; + u8 gosvw[16]; /* In */ +} __packed; + +/* SNP support page type */ +enum { + SNP_PAGE_TYPE_NORMAL = 0x1, + SNP_PAGE_TYPE_VMSA = 0x2, + SNP_PAGE_TYPE_ZERO = 0x3, + SNP_PAGE_TYPE_UNMEASURED = 0x4, + SNP_PAGE_TYPE_SECRET = 0x5, + SNP_PAGE_TYPE_CPUID = 0x6, + + SNP_PAGE_TYPE_MAX +}; + +/** + * struct sev_data_snp_launch_update - SNP_LAUNCH_UPDATE command params + * + * @gctx_paddr: system physical address of guest context page + * @page_size: page size 0 indicates 4K and 1 indicates 2MB page + * @page_type: encoded page type + * @imi_page: indicates that this page is part of the IMI (Incoming Migration + * Image) of the guest + * @rsvd: reserved + * @rsvd2: reserved + * @address: system physical address of destination page to encrypt + * @rsvd3: reserved + * @vmpl1_perms: VMPL permission mask for VMPL1 + * @vmpl2_perms: VMPL permission mask for VMPL2 + * @vmpl3_perms: VMPL permission mask for VMPL3 + * @rsvd4: reserved + */ +struct sev_data_snp_launch_update { + u64 gctx_paddr; /* In */ + u32 page_size:1; /* In */ + u32 page_type:3; /* In */ + u32 imi_page:1; /* In */ + u32 rsvd:27; + u32 rsvd2; + u64 address; /* In */ + u32 rsvd3:8; + u32 vmpl1_perms:8; /* In */ + u32 vmpl2_perms:8; /* In */ + u32 vmpl3_perms:8; /* In */ + u32 rsvd4; +} __packed; + +/** + * struct sev_data_snp_launch_finish - SNP_LAUNCH_FINISH command params + * + * @gctx_paddr: system physical address of guest context page + * @id_block_paddr: system physical address of ID block + * @id_auth_paddr: system physical address of ID block authentication structure + * @id_block_en: indicates whether ID block is present + * @auth_key_en: indicates whether author key is present in authentication structure + * @rsvd: reserved + * @host_data: host-supplied data for guest, not interpreted by firmware + */ +struct sev_data_snp_launch_finish { + u64 gctx_paddr; + u64 id_block_paddr; + u64 id_auth_paddr; + u8 id_block_en:1; + u8 auth_key_en:1; + u64 rsvd:62; + u8 host_data[32]; +} __packed; + +/** + * struct sev_data_snp_guest_status - SNP_GUEST_STATUS command params + * + * @gctx_paddr: system physical address of guest context page + * @address: system physical address of guest status page + */ +struct sev_data_snp_guest_status { + u64 gctx_paddr; + u64 address; +} __packed; + +/** + * struct sev_data_snp_page_reclaim - SNP_PAGE_RECLAIM command params + * + * @paddr: system physical address of page to be claimed. The 0th bit in the + * address indicates the page size. 0h indicates 4KB and 1h indicates + * 2MB page. + */ +struct sev_data_snp_page_reclaim { + u64 paddr; +} __packed; + +/** + * struct sev_data_snp_page_unsmash - SNP_PAGE_UNSMASH command params + * + * @paddr: system physical address of page to be unsmashed. The 0th bit in the + * address indicates the page size. 0h indicates 4 KB and 1h indicates + * 2 MB page. + */ +struct sev_data_snp_page_unsmash { + u64 paddr; +} __packed; + +/** + * struct sev_data_snp_dbg - DBG_ENCRYPT/DBG_DECRYPT command parameters + * + * @gctx_paddr: system physical address of guest context page + * @src_addr: source address of data to operate on + * @dst_addr: destination address of data to operate on + */ +struct sev_data_snp_dbg { + u64 gctx_paddr; /* In */ + u64 src_addr; /* In */ + u64 dst_addr; /* In */ +} __packed; + +/** + * struct sev_data_snp_guest_request - SNP_GUEST_REQUEST command params + * + * @gctx_paddr: system physical address of guest context page + * @req_paddr: system physical address of request page + * @res_paddr: system physical address of response page + */ +struct sev_data_snp_guest_request { + u64 gctx_paddr; /* In */ + u64 req_paddr; /* In */ + u64 res_paddr; /* In */ +} __packed; + +/** + * struct sev_data_snp_init_ex - SNP_INIT_EX structure + * + * @init_rmp: indicate that the RMP should be initialized. + * @list_paddr_en: indicate that list_paddr is valid + * @rsvd: reserved + * @rsvd1: reserved + * @list_paddr: system physical address of range list + * @rsvd2: reserved + */ +struct sev_data_snp_init_ex { + u32 init_rmp:1; + u32 list_paddr_en:1; + u32 rsvd:30; + u32 rsvd1; + u64 list_paddr; + u8 rsvd2[48]; +} __packed; + +/** + * struct sev_data_range - RANGE structure + * + * @base: system physical address of first byte of range + * @page_count: number of 4KB pages in this range + * @rsvd: reserved + */ +struct sev_data_range { + u64 base; + u32 page_count; + u32 rsvd; +} __packed; + +/** + * struct sev_data_range_list - RANGE_LIST structure + * + * @num_elements: number of elements in RANGE_ARRAY + * @rsvd: reserved + * @ranges: array of num_elements of type RANGE + */ +struct sev_data_range_list { + u32 num_elements; + u32 rsvd; + struct sev_data_range ranges[]; +} __packed; + +/** + * struct sev_data_snp_shutdown_ex - SNP_SHUTDOWN_EX structure + * + * @len: length of the command buffer read by the PSP + * @iommu_snp_shutdown: Disable enforcement of SNP in the IOMMU + * @rsvd1: reserved + */ +struct sev_data_snp_shutdown_ex { + u32 len; + u32 iommu_snp_shutdown:1; + u32 rsvd1:31; +} __packed; + +/** + * struct sev_platform_init_args + * + * @error: SEV firmware error code + * @probe: True if this is being called as part of CCP module probe, which + * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed + * unless psp_init_on_probe module param is set + */ +struct sev_platform_init_args { + int error; + bool probe; +}; + +/** + * struct sev_data_snp_commit - SNP_COMMIT structure + * + * @len: length of the command buffer read by the PSP + */ +struct sev_data_snp_commit { + u32 len; +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** * sev_platform_init - perform SEV INIT command * - * @error: SEV command return code + * @args: struct sev_platform_init_args to pass in arguments * * Returns: * 0 if the SEV successfully processed the command @@ -537,7 +824,7 @@ struct sev_data_attestation_report { * -%ETIMEDOUT if the SEV command timed out * -%EIO if the SEV returned a non-zero return code */ -int sev_platform_init(int *error); +int sev_platform_init(struct sev_platform_init_args *args); /** * sev_platform_status - perform SEV PLATFORM_STATUS command @@ -637,14 +924,32 @@ int sev_guest_df_flush(int *error); */ int sev_guest_decommission(struct sev_data_decommission *data, int *error); +/** + * sev_do_cmd - issue an SEV or an SEV-SNP command + * + * @cmd: SEV or SEV-SNP firmware command to issue + * @data: arguments for firmware command + * @psp_ret: SEV command return code + * + * Returns: + * 0 if the SEV device successfully processed the command + * -%ENODEV if the PSP device is not available + * -%ENOTSUPP if PSP device does not support SEV + * -%ETIMEDOUT if the SEV command timed out + * -%EIO if PSP device returned a non-zero return code + */ +int sev_do_cmd(int cmd, void *data, int *psp_ret); + void *psp_copy_user_blob(u64 uaddr, u32 len); +void *snp_alloc_firmware_page(gfp_t mask); +void snp_free_firmware_page(void *addr); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ static inline int sev_platform_status(struct sev_user_data_status *status, int *error) { return -ENODEV; } -static inline int sev_platform_init(int *error) { return -ENODEV; } +static inline int sev_platform_init(struct sev_platform_init_args *args) { return -ENODEV; } static inline int sev_guest_deactivate(struct sev_data_deactivate *data, int *error) { return -ENODEV; } @@ -653,6 +958,9 @@ static inline int sev_guest_decommission(struct sev_data_decommission *data, int *error) { return -ENODEV; } static inline int +sev_do_cmd(int cmd, void *data, int *psp_ret) { return -ENODEV; } + +static inline int sev_guest_activate(struct sev_data_activate *data, int *error) { return -ENODEV; } static inline int sev_guest_df_flush(int *error) { return -ENODEV; } @@ -662,6 +970,13 @@ sev_issue_cmd_external_user(struct file *filep, unsigned int id, void *data, int static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_PTR(-EINVAL); } +static inline void *snp_alloc_firmware_page(gfp_t mask) +{ + return NULL; +} + +static inline void snp_free_firmware_page(void *addr) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index 2a3a95586425..8dbd51ea8626 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -18,6 +18,16 @@ struct ptdump_state { const struct ptdump_range *range; }; +bool ptdump_walk_pgd_level_core(struct seq_file *m, + struct mm_struct *mm, pgd_t *pgd, + bool checkwx, bool dmesg); void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd); +bool ptdump_check_wx(void); + +static inline void debug_checkwx(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_WX)) + ptdump_check_wx(); +} #endif /* _LINUX_PTDUMP_H */ diff --git a/include/linux/pti.h b/include/linux/pti.h index 1a941efcaa62..1fbf9d6c20ef 100644 --- a/include/linux/pti.h +++ b/include/linux/pti.h @@ -2,7 +2,7 @@ #ifndef _INCLUDE_PTI_H #define _INCLUDE_PTI_H -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #include <asm/pti.h> #else static inline void pti_init(void) { } diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 1ef4e0f9bd2a..6e4b8206c7d0 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -200,6 +200,7 @@ struct ptp_clock; enum ptp_clock_events { PTP_CLOCK_ALARM, PTP_CLOCK_EXTTS, + PTP_CLOCK_EXTOFF, PTP_CLOCK_PPS, PTP_CLOCK_PPSUSR, }; @@ -210,6 +211,7 @@ enum ptp_clock_events { * @type: One of the ptp_clock_events enumeration values. * @index: Identifies the source of the event. * @timestamp: When the event occurred (%PTP_CLOCK_EXTTS only). + * @offset: When the event occurred (%PTP_CLOCK_EXTOFF only). * @pps_times: When the event occurred (%PTP_CLOCK_PPSUSR only). */ @@ -218,6 +220,7 @@ struct ptp_clock_event { int index; union { u64 timestamp; + s64 offset; struct pps_event_time pps_times; }; }; diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index 746fd67c3480..e8c74fa3f455 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -8,15 +8,15 @@ #ifndef _PTP_KVM_H_ #define _PTP_KVM_H_ +#include <linux/clocksource_ids.h> #include <linux/types.h> struct timespec64; -struct clocksource; int kvm_arch_ptp_init(void); void kvm_arch_ptp_exit(void); int kvm_arch_ptp_get_clock(struct timespec64 *ts); int kvm_arch_ptp_get_crosststamp(u64 *cycle, - struct timespec64 *tspec, struct clocksource **cs); + struct timespec64 *tspec, enum clocksource_ids *cs_id); #endif /* _PTP_KVM_H_ */ diff --git a/include/linux/ptp_mock.h b/include/linux/ptp_mock.h new file mode 100644 index 000000000000..72eb401034d9 --- /dev/null +++ b/include/linux/ptp_mock.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Mock-up PTP Hardware Clock driver for virtual network devices + * + * Copyright 2023 NXP + */ + +#ifndef _PTP_MOCK_H_ +#define _PTP_MOCK_H_ + +struct device; +struct mock_phc; + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK_MOCK) + +struct mock_phc *mock_phc_create(struct device *dev); +void mock_phc_destroy(struct mock_phc *phc); +int mock_phc_index(struct mock_phc *phc); + +#else + +static inline struct mock_phc *mock_phc_create(struct device *dev) +{ + return NULL; +} + +static inline void mock_phc_destroy(struct mock_phc *phc) +{ +} + +static inline int mock_phc_index(struct mock_phc *phc) +{ + return -1; +} + +#endif + +#endif /* _PTP_MOCK_H_ */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index eaaef3ffec22..90507d4afcd6 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif +#ifndef exception_ip +#define exception_ip(x) instruction_pointer(x) +#endif + extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 04ae1d9073a7..4a6568dfdf3f 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -41,8 +41,8 @@ struct pwm_args { }; enum { - PWMF_REQUESTED = 1 << 0, - PWMF_EXPORTED = 1 << 1, + PWMF_REQUESTED = 0, + PWMF_EXPORTED = 1, }; /* @@ -69,9 +69,7 @@ struct pwm_state { * @label: name of the PWM device * @flags: flags associated with the PWM device * @hwpwm: per-chip relative index of the PWM device - * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device - * @chip_data: chip-private data associated with the PWM device * @args: PWM arguments * @state: last applied state * @last: last implemented state (for PWM_DEBUG) @@ -80,9 +78,7 @@ struct pwm_device { const char *label; unsigned long flags; unsigned int hwpwm; - unsigned int pwm; struct pwm_chip *chip; - void *chip_data; struct pwm_args args; struct pwm_state state; @@ -95,8 +91,8 @@ struct pwm_device { * @state: state to fill with the current PWM state * * The returned PWM state represents the state that was applied by a previous call to - * pwm_apply_state(). Drivers may have to slightly tweak that state before programming it to - * hardware. If pwm_apply_state() was never called, this returns either the current hardware + * pwm_apply_might_sleep(). Drivers may have to slightly tweak that state before programming it to + * hardware. If pwm_apply_might_sleep() was never called, this returns either the current hardware * state (if supported) or the default settings. */ static inline void pwm_get_state(const struct pwm_device *pwm, @@ -114,12 +110,6 @@ static inline bool pwm_is_enabled(const struct pwm_device *pwm) return state.enabled; } -static inline void pwm_set_period(struct pwm_device *pwm, u64 period) -{ - if (pwm) - pwm->state.period = period; -} - static inline u64 pwm_get_period(const struct pwm_device *pwm) { struct pwm_state state; @@ -129,12 +119,6 @@ static inline u64 pwm_get_period(const struct pwm_device *pwm) return state.period; } -static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty) -{ - if (pwm) - pwm->state.duty_cycle = duty; -} - static inline u64 pwm_get_duty_cycle(const struct pwm_device *pwm) { struct pwm_state state; @@ -160,20 +144,20 @@ static inline void pwm_get_args(const struct pwm_device *pwm, } /** - * pwm_init_state() - prepare a new state to be applied with pwm_apply_state() + * pwm_init_state() - prepare a new state to be applied with pwm_apply_might_sleep() * @pwm: PWM device * @state: state to fill with the prepared PWM state * * This functions prepares a state that can later be tweaked and applied - * to the PWM device with pwm_apply_state(). This is a convenient function + * to the PWM device with pwm_apply_might_sleep(). This is a convenient function * that first retrieves the current PWM state and the replaces the period * and polarity fields with the reference values defined in pwm->args. * Once the function returns, you can adjust the ->enabled and ->duty_cycle - * fields according to your needs before calling pwm_apply_state(). + * fields according to your needs before calling pwm_apply_might_sleep(). * * ->duty_cycle is initially set to zero to avoid cases where the current * ->duty_cycle value exceed the pwm_args->period one, which would trigger - * an error if the user calls pwm_apply_state() without adjusting ->duty_cycle + * an error if the user calls pwm_apply_might_sleep() without adjusting ->duty_cycle * first. */ static inline void pwm_init_state(const struct pwm_device *pwm, @@ -229,7 +213,7 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) * * pwm_init_state(pwm, &state); * pwm_set_relative_duty_cycle(&state, 50, 100); - * pwm_apply_state(pwm, &state); + * pwm_apply_might_sleep(pwm, &state); * * This functions returns -EINVAL if @duty_cycle and/or @scale are * inconsistent (@scale == 0 or @duty_cycle > @scale). @@ -267,7 +251,6 @@ struct pwm_capture { * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. - * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); @@ -278,38 +261,63 @@ struct pwm_ops { const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); - struct module *owner; }; /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs * @ops: callbacks for this PWM controller - * @base: number of first PWM controlled by this chip + * @owner: module providing this chip + * @id: unique number of this PWM chip * @npwm: number of PWMs controlled by this chip * @of_xlate: request a PWM device given a device tree PWM specifier - * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier - * @list: list node for internal use + * @atomic: can the driver's ->apply() be called in atomic context + * @driver_data: Private pointer for driver specific info * @pwms: array of PWM devices allocated by the framework */ struct pwm_chip { struct device *dev; const struct pwm_ops *ops; - int base; + struct module *owner; + unsigned int id; unsigned int npwm; - struct pwm_device * (*of_xlate)(struct pwm_chip *pc, + struct pwm_device * (*of_xlate)(struct pwm_chip *chip, const struct of_phandle_args *args); - unsigned int of_pwm_n_cells; + bool atomic; /* only used internally by the PWM framework */ - struct list_head list; + void *driver_data; struct pwm_device *pwms; }; +static inline struct device *pwmchip_parent(const struct pwm_chip *chip) +{ + return chip->dev; +} + +static inline void *pwmchip_get_drvdata(struct pwm_chip *chip) +{ + /* + * After pwm_chip got a dedicated struct device, this can be replaced by + * dev_get_drvdata(&chip->dev); + */ + return chip->driver_data; +} + +static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data) +{ + /* + * After pwm_chip got a dedicated struct device, this can be replaced by + * dev_set_drvdata(&chip->dev, data); + */ + chip->driver_data = data; +} + #if IS_ENABLED(CONFIG_PWM) /* PWM user APIs */ -int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state); +int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state); +int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); /** @@ -337,7 +345,7 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns, state.duty_cycle = duty_ns; state.period = period_ns; - return pwm_apply_state(pwm, &state); + return pwm_apply_might_sleep(pwm, &state); } /** @@ -358,7 +366,7 @@ static inline int pwm_enable(struct pwm_device *pwm) return 0; state.enabled = true; - return pwm_apply_state(pwm, &state); + return pwm_apply_might_sleep(pwm, &state); } /** @@ -377,27 +385,42 @@ static inline void pwm_disable(struct pwm_device *pwm) return; state.enabled = false; - pwm_apply_state(pwm, &state); + pwm_apply_might_sleep(pwm, &state); +} + +/** + * pwm_might_sleep() - is pwm_apply_atomic() supported? + * @pwm: PWM device + * + * Returns: false if pwm_apply_atomic() can be called from atomic context. + */ +static inline bool pwm_might_sleep(struct pwm_device *pwm) +{ + return !pwm->chip->atomic; } /* PWM provider APIs */ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); -int pwm_set_chip_data(struct pwm_device *pwm, void *data); -void *pwm_get_chip_data(struct pwm_device *pwm); -int pwmchip_add(struct pwm_chip *chip); +void pwmchip_put(struct pwm_chip *chip); +struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); +struct pwm_chip *devm_pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); + +int __pwmchip_add(struct pwm_chip *chip, struct module *owner); +#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) void pwmchip_remove(struct pwm_chip *chip); -int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip); +int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner); +#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE) struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, const char *label); -struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, +struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args); -struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc, +struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args); struct pwm_device *pwm_get(struct device *dev, const char *con_id); @@ -408,16 +431,27 @@ struct pwm_device *devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode, const char *con_id); #else -static inline int pwm_apply_state(struct pwm_device *pwm, - const struct pwm_state *state) +static inline bool pwm_might_sleep(struct pwm_device *pwm) +{ + return true; +} + +static inline int pwm_apply_might_sleep(struct pwm_device *pwm, + const struct pwm_state *state) { might_sleep(); - return -ENOTSUPP; + return -EOPNOTSUPP; +} + +static inline int pwm_apply_atomic(struct pwm_device *pwm, + const struct pwm_state *state) +{ + return -EOPNOTSUPP; } static inline int pwm_adjust_config(struct pwm_device *pwm) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int pwm_config(struct pwm_device *pwm, int duty_ns, @@ -445,14 +479,22 @@ static inline int pwm_capture(struct pwm_device *pwm, return -EINVAL; } -static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) +static inline void pwmchip_put(struct pwm_chip *chip) { - return -EINVAL; } -static inline void *pwm_get_chip_data(struct pwm_device *pwm) +static inline struct pwm_chip *pwmchip_alloc(struct device *parent, + unsigned int npwm, + size_t sizeof_priv) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct pwm_chip *devm_pwmchip_alloc(struct device *parent, + unsigned int npwm, + size_t sizeof_priv) { - return NULL; + return pwmchip_alloc(parent, npwm, sizeof_priv); } static inline int pwmchip_add(struct pwm_chip *chip) @@ -536,7 +578,14 @@ static inline void pwm_apply_args(struct pwm_device *pwm) state.period = pwm->args.period; state.usage_power = false; - pwm_apply_state(pwm, &state); + pwm_apply_might_sleep(pwm, &state); +} + +/* only for backwards-compatibility, new code should not use this */ +static inline int pwm_apply_state(struct pwm_device *pwm, + const struct pwm_state *state) +{ + return pwm_apply_might_sleep(pwm, state); } struct pwm_lookup { diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h index 90e3045b2dcb..0d3b6ed21628 100644 --- a/include/linux/qed/qed_fcoe_if.h +++ b/include/linux/qed/qed_fcoe_if.h @@ -67,9 +67,6 @@ struct qed_fcoe_cb_ops { u32 (*get_login_failures)(void *cookie); }; -void qed_fcoe_set_pf_params(struct qed_dev *cdev, - struct qed_fcoe_pf_params *params); - /** * struct qed_fcoe_ops - qed FCoE operations. * @common: common operations pointer diff --git a/include/linux/quota.h b/include/linux/quota.h index fd692b4a41d5..07071e64abf3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type) #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ -#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ +#define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting + * to be cleaned up */ +#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\ * for the mask of entries set via SETQUOTA\ * quotactl. They are set under dq_data_lock\ * and the quota format handling dquot can\ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 11a4becff3a9..06cc8888199e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot) { if (test_bit(DQ_MOD_B, &dquot->dq_flags)) return true; - if (atomic_read(&dquot->dq_count) > 1) + if (atomic_read(&dquot->dq_count) > 0) return true; return false; } @@ -74,7 +74,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags); int dquot_alloc_inode(struct inode *inode); -int dquot_claim_space_nodirty(struct inode *inode, qsize_t number); +void dquot_claim_space_nodirty(struct inode *inode, qsize_t number); void dquot_free_inode(struct inode *inode); void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number); @@ -257,10 +257,9 @@ static inline void __dquot_free_space(struct inode *inode, qsize_t number, inode_sub_bytes(inode, number); } -static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) +static inline void dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { inode_add_bytes(inode, number); - return 0; } static inline int dquot_reclaim_space_nodirty(struct inode *inode, @@ -358,14 +357,10 @@ static inline int dquot_reserve_block(struct inode *inode, qsize_t nr) DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE); } -static inline int dquot_claim_block(struct inode *inode, qsize_t nr) +static inline void dquot_claim_block(struct inode *inode, qsize_t nr) { - int ret; - - ret = dquot_claim_space_nodirty(inode, nr << inode->i_blkbits); - if (!ret) - mark_inode_dirty_sync(inode); - return ret; + dquot_claim_space_nodirty(inode, nr << inode->i_blkbits); + mark_inode_dirty_sync(inode); } static inline void dquot_reclaim_block(struct inode *inode, qsize_t nr) diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index f29aaaf2eb21..98030accf641 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -84,8 +84,6 @@ extern const struct raid6_calls raid6_intx1; extern const struct raid6_calls raid6_intx2; extern const struct raid6_calls raid6_intx4; extern const struct raid6_calls raid6_intx8; -extern const struct raid6_calls raid6_intx16; -extern const struct raid6_calls raid6_intx32; extern const struct raid6_calls raid6_mmxx1; extern const struct raid6_calls raid6_mmxx2; extern const struct raid6_calls raid6_sse1x1; @@ -108,6 +106,8 @@ extern const struct raid6_calls raid6_vpermxor1; extern const struct raid6_calls raid6_vpermxor2; extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; +extern const struct raid6_calls raid6_lsx; +extern const struct raid6_calls raid6_lasx; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); @@ -123,6 +123,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2; extern const struct raid6_recov_calls raid6_recov_avx512; extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; +extern const struct raid6_recov_calls raid6_recov_lsx; +extern const struct raid6_recov_calls raid6_recov_lasx; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 5d868505a94e..6d92b68efbf6 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - offset ^= (rand); \ + offset = ror32(offset, 5) ^ (rand); \ raw_cpu_write(kstack_offset, offset); \ } \ } while (0) diff --git a/include/linux/range.h b/include/linux/range.h index 7efb6a9b069b..6ad0b73cb7ad 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -31,12 +31,4 @@ int clean_sort_range(struct range *range, int az); void sort_range(struct range *range, int nr_range); -#define MAX_RESOURCE ((resource_size_t)~0) -static inline resource_size_t cap_resource(u64 val) -{ - if (val > MAX_RESOURCE) - return MAX_RESOURCE; - - return val; -} #endif diff --git a/include/linux/ras.h b/include/linux/ras.h index 1f4048bf2674..a64182bc72ad 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); void log_arm_hw_error(struct cper_sec_proc_arm *err); + #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -35,4 +36,21 @@ static inline void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif +struct atl_err { + u64 addr; + u64 ipid; + u32 cpu; +}; + +#if IS_ENABLED(CONFIG_AMD_ATL) +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); +void amd_atl_unregister_decoder(void); +void amd_retire_dram_row(struct atl_err *err); +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +#else +static inline void amd_retire_dram_row(struct atl_err *err) { } +static inline unsigned long +amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } +#endif /* CONFIG_AMD_ATL */ + #endif /* __RAS_H__ */ diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 7ee7ed5de722..6dbc5a1bf6a8 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node, rb_insert_augmented(node, &root->rb_root, augment); } +static __always_inline struct rb_node * +rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree, + bool (*less)(struct rb_node *, const struct rb_node *), + const struct rb_augment_callbacks *augment) +{ + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + bool leftmost = true; + + while (*link) { + parent = *link; + if (less(node, parent)) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(node, parent, link); + augment->propagate(parent, NULL); /* suboptimal */ + rb_insert_augmented_cached(node, tree, leftmost, augment); + + return leftmost ? node : NULL; +} + /* * Template for declaring augmented rbtree callbacks (generic case) * diff --git a/include/linux/rcu_notifier.h b/include/linux/rcu_notifier.h new file mode 100644 index 000000000000..5640f024773b --- /dev/null +++ b/include/linux/rcu_notifier.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Read-Copy Update notifiers, initially RCU CPU stall notifier. + * Separate from rcupdate.h to avoid #include loops. + * + * Copyright (C) 2023 Paul E. McKenney. + */ + +#ifndef __LINUX_RCU_NOTIFIER_H +#define __LINUX_RCU_NOTIFIER_H + +// Actions for RCU CPU stall notifier calls. +#define RCU_STALL_NOTIFY_NORM 1 +#define RCU_STALL_NOTIFY_EXP 2 + +#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) + +#include <linux/notifier.h> +#include <linux/types.h> + +int rcu_stall_chain_notifier_register(struct notifier_block *n); +int rcu_stall_chain_notifier_unregister(struct notifier_block *n); + +#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) + +// No RCU CPU stall warnings in Tiny RCU. +static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; } +static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; } + +#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) + +#endif /* __LINUX_RCU_NOTIFIER_H */ diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 0027d4c8087c..3860dbb9107a 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) } extern void rcu_sync_init(struct rcu_sync *); -extern void rcu_sync_enter_start(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); diff --git a/include/linux/rculist.h b/include/linux/rculist.h index d29740be4833..3dc1e58865f7 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -355,7 +355,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, }) /** - * list_next_or_null_rcu - get the first element from a list + * list_next_or_null_rcu - get the next element from a list * @head: the head for the list. * @ptr: the list head to take the next element from. * @type: the type of the struct this is embedded in. diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index ba4c00dd8005..89186c499dd4 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, { struct hlist_nulls_node *first = h->first; - n->next = first; + WRITE_ONCE(n->next, first); WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) @@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, last = i; if (last) { - n->next = last->next; + WRITE_ONCE(n->next, last->next); n->pprev = &last->next; rcu_assign_pointer(hlist_nulls_next_rcu(last), n); } else { diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e5f920ade90..17d7ed5f3ae6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -34,9 +34,6 @@ #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) -#define ulong2long(a) (*(long *)(&(a))) -#define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b))) -#define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b))) /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); @@ -122,8 +119,6 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(unsigned int cpu); -void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); @@ -189,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); do { \ int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ \ - if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ + if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \ likely(!___rttq_nesting)) { \ - rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ + rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \ } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ rcu_tasks_trace_qs_blkd(t); \ @@ -252,6 +247,37 @@ do { \ cond_resched(); \ } while (0) +/** + * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states + * @old_ts: jiffies at start of processing. + * + * This helper is for long-running softirq handlers, such as NAPI threads in + * networking. The caller should initialize the variable passed in as @old_ts + * at the beginning of the softirq handler. When invoked frequently, this macro + * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will + * provide both RCU and RCU-Tasks quiescent states. Note that this macro + * modifies its old_ts argument. + * + * Because regions of code that have disabled softirq act as RCU read-side + * critical sections, this macro should be invoked with softirq (and + * preemption) enabled. + * + * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would + * have more chance to invoke schedule() calls and provide necessary quiescent + * states. As a contrast, calling cond_resched() only won't achieve the same + * effect because cond_resched() does not provide RCU-Tasks quiescent states. + */ +#define rcu_softirq_qs_periodic(old_ts) \ +do { \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \ + time_after(jiffies, (old_ts) + HZ / 10)) { \ + preempt_disable(); \ + rcu_softirq_qs(); \ + preempt_enable(); \ + (old_ts) = jiffies; \ + } \ +} while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. @@ -303,6 +329,11 @@ static inline void rcu_lock_acquire(struct lockdep_map *map) lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } +static inline void rcu_try_lock_acquire(struct lockdep_map *map) +{ + lock_acquire(map, 0, 1, 2, 0, NULL, _THIS_IP_); +} + static inline void rcu_lock_release(struct lockdep_map *map) { lock_release(map, _THIS_IP_); @@ -317,6 +348,7 @@ int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ # define rcu_lock_acquire(a) do { } while (0) +# define rcu_try_lock_acquire(a) do { } while (0) # define rcu_lock_release(a) do { } while (0) static inline int rcu_read_lock_held(void) diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 9bc8cbb33340..eda493200663 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -87,6 +87,7 @@ static inline void rcu_read_unlock_trace(void) void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); void synchronize_rcu_tasks_trace(void); void rcu_barrier_tasks_trace(void); +struct task_struct *get_rcu_tasks_trace_gp_kthread(void); #else /* * The BPF JIT forms these addresses even when it doesn't call these diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 699b938358bf..d07f0848802e 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -8,6 +8,7 @@ #include <linux/rcupdate.h> #include <linux/completion.h> +#include <linux/sched.h> /* * Structure allowing asynchronous waiting on RCU. @@ -42,6 +43,11 @@ do { \ * call_srcu() function, with this wrapper supplying the pointer to the * corresponding srcu_struct. * + * Note that call_rcu_hurry() should be used instead of call_rcu() + * because in kernels built with CONFIG_RCU_LAZY=y the delay between the + * invocation of call_rcu() and that of the corresponding RCU callback + * can be multiple seconds. + * * The first argument tells Tiny RCU's _wait_rcu_gp() not to * bother waiting for RCU. The reason for this is because anywhere * synchronize_rcu_mult() can be called is automatically already a full @@ -50,4 +56,13 @@ do { \ #define synchronize_rcu_mult(...) \ _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) +static inline void cond_resched_rcu(void) +{ +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) + rcu_read_unlock(); + cond_resched(); + rcu_read_lock(); +#endif +} + #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7f17acf29dda..d9ac7b136aea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -138,6 +138,8 @@ static inline int rcu_needs_cpu(void) return 0; } +static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } + /* * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. @@ -169,6 +171,6 @@ static inline void rcu_all_qs(void) { barrier(); } #define rcutree_offline_cpu NULL #define rcutree_dead_cpu NULL #define rcutree_dying_cpu NULL -static inline void rcu_cpu_starting(unsigned int cpu) { } +static inline void rcutree_report_cpu_starting(unsigned int cpu) { } #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 56bccb5a8fde..254244202ea9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -21,6 +21,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); +void rcu_request_urgent_qs_task(struct task_struct *t); /* * Note a virtualization-based context switch. This is simply a @@ -36,7 +37,6 @@ void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); void rcu_barrier(void); -bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); @@ -110,9 +110,21 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -int rcutree_offline_cpu(unsigned int cpu); +void rcutree_report_cpu_starting(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); int rcutree_dying_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +int rcutree_offline_cpu(unsigned int cpu); +#else +#define rcutree_dead_cpu NULL +#define rcutree_dying_cpu NULL +#define rcutree_offline_cpu NULL +#endif + +void rcutree_migrate_callbacks(int cpu); + +/* Called from hotplug and also arm64 early secondary boot failure */ +void rcutree_report_cpu_dead(void); #endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 2b6bb593be5b..abcdde4df697 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -129,11 +129,14 @@ enum sys_off_mode { * @cb_data: User's callback data. * @cmd: Command string. Currently used only by the sys-off restart mode, * NULL otherwise. + * @dev: Device of the sys-off handler. Only if known (devm_register_*), + * NULL otherwise. */ struct sys_off_data { int mode; void *cb_data; const char *cmd; + struct device *dev; }; struct sys_off_handler * @@ -174,7 +177,17 @@ void ctrl_alt_del(void); extern void orderly_poweroff(bool force); extern void orderly_reboot(void); -void hw_protection_shutdown(const char *reason, int ms_until_forced); +void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown); + +static inline void hw_protection_reboot(const char *reason, int ms_until_forced) +{ + __hw_protection_shutdown(reason, ms_until_forced, false); +} + +static inline void hw_protection_shutdown(const char *reason, int ms_until_forced) +{ + __hw_protection_shutdown(reason, ms_until_forced, true); +} /* * Emergency restart, callable from an interrupt handler. diff --git a/include/linux/refcount.h b/include/linux/refcount.h index a62fcca97486..59b3b752394d 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -96,22 +96,11 @@ #include <linux/bug.h> #include <linux/compiler.h> #include <linux/limits.h> +#include <linux/refcount_types.h> #include <linux/spinlock_types.h> struct mutex; -/** - * typedef refcount_t - variant of atomic_t specialized for reference counts - * @refs: atomic_t counter field - * - * The counter saturates at REFCOUNT_SATURATED and will not move once - * there. This avoids wrapping the counter and causing 'spurious' - * use-after-free bugs. - */ -typedef struct refcount_struct { - atomic_t refs; -} refcount_t; - #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } #define REFCOUNT_MAX INT_MAX #define REFCOUNT_SATURATED (INT_MIN / 2) @@ -147,7 +136,8 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) +static inline __must_check __signed_wrap +bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -188,7 +178,8 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } -static inline void __refcount_add(int i, refcount_t *r, int *oldp) +static inline __signed_wrap +void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -267,7 +258,8 @@ static inline void refcount_inc(refcount_t *r) __refcount_inc(r, NULL); } -static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +static inline __must_check __signed_wrap +bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(i, &r->refs); diff --git a/include/linux/refcount_types.h b/include/linux/refcount_types.h new file mode 100644 index 000000000000..162004f06edf --- /dev/null +++ b/include/linux/refcount_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_REFCOUNT_TYPES_H +#define _LINUX_REFCOUNT_TYPES_H + +#include <linux/types.h> + +/** + * typedef refcount_t - variant of atomic_t specialized for reference counts + * @refs: atomic_t counter field + * + * The counter saturates at REFCOUNT_SATURATED and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free bugs. + */ +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#endif /* _LINUX_REFCOUNT_TYPES_H */ diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8fc0b3ebce44..d470303b1bbb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -332,6 +332,10 @@ typedef void (*regmap_unlock)(void *); * @io_port: Support IO port accessors. Makes sense only when MMIO vs. IO port * access can be distinguished. * @max_register: Optional, specifies the maximum valid register address. + * @max_register_is_0: Optional, specifies that zero value in @max_register + * should be taken into account. This is a workaround to + * apply handling of @max_register for regmap that contains + * only one register. * @wr_table: Optional, points to a struct regmap_access_table specifying * valid ranges for write access. * @rd_table: As above, for read access. @@ -422,6 +426,7 @@ struct regmap_config { bool io_port; unsigned int max_register; + bool max_register_is_0; const struct regmap_access_table *wr_table; const struct regmap_access_table *rd_table; const struct regmap_access_table *volatile_table; @@ -1225,6 +1230,7 @@ int regmap_multi_reg_write_bypassed(struct regmap *map, int regmap_raw_write_async(struct regmap *map, unsigned int reg, const void *val, size_t val_len); int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val); +int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val); int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_noinc_read(struct regmap *map, unsigned int reg, @@ -1287,6 +1293,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min, void regcache_cache_only(struct regmap *map, bool enable); void regcache_cache_bypass(struct regmap *map, bool enable); void regcache_mark_dirty(struct regmap *map); +bool regcache_reg_cached(struct regmap *map, unsigned int reg); bool regmap_check_range_table(struct regmap *map, unsigned int reg, const struct regmap_access_table *table); @@ -1733,6 +1740,13 @@ static inline int regmap_read(struct regmap *map, unsigned int reg, return -EINVAL; } +static inline int regmap_read_bypassed(struct regmap *map, unsigned int reg, + unsigned int *val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len) { diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 39b666b40ea6..ed180ca419da 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -33,6 +33,7 @@ #include <linux/err.h> #include <linux/suspend.h> +#include <regulator/regulator.h> struct device; struct notifier_block; @@ -85,52 +86,6 @@ struct regulator_dev; #define REGULATOR_MODE_STANDBY 0x8 /* - * Regulator notifier events. - * - * UNDER_VOLTAGE Regulator output is under voltage. - * OVER_CURRENT Regulator output current is too high. - * REGULATION_OUT Regulator output is out of regulation. - * FAIL Regulator output has failed. - * OVER_TEMP Regulator over temp. - * FORCE_DISABLE Regulator forcibly shut down by software. - * VOLTAGE_CHANGE Regulator voltage changed. - * Data passed is old voltage cast to (void *). - * DISABLE Regulator was disabled. - * PRE_VOLTAGE_CHANGE Regulator is about to have voltage changed. - * Data passed is "struct pre_voltage_change_data" - * ABORT_VOLTAGE_CHANGE Regulator voltage change failed for some reason. - * Data passed is old voltage cast to (void *). - * PRE_DISABLE Regulator is about to be disabled - * ABORT_DISABLE Regulator disable failed for some reason - * - * NOTE: These events can be OR'ed together when passed into handler. - */ - -#define REGULATOR_EVENT_UNDER_VOLTAGE 0x01 -#define REGULATOR_EVENT_OVER_CURRENT 0x02 -#define REGULATOR_EVENT_REGULATION_OUT 0x04 -#define REGULATOR_EVENT_FAIL 0x08 -#define REGULATOR_EVENT_OVER_TEMP 0x10 -#define REGULATOR_EVENT_FORCE_DISABLE 0x20 -#define REGULATOR_EVENT_VOLTAGE_CHANGE 0x40 -#define REGULATOR_EVENT_DISABLE 0x80 -#define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE 0x100 -#define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE 0x200 -#define REGULATOR_EVENT_PRE_DISABLE 0x400 -#define REGULATOR_EVENT_ABORT_DISABLE 0x800 -#define REGULATOR_EVENT_ENABLE 0x1000 -/* - * Following notifications should be emitted only if detected condition - * is such that the HW is likely to still be working but consumers should - * take a recovery action to prevent problems esacalating into errors. - */ -#define REGULATOR_EVENT_UNDER_VOLTAGE_WARN 0x2000 -#define REGULATOR_EVENT_OVER_CURRENT_WARN 0x4000 -#define REGULATOR_EVENT_OVER_VOLTAGE_WARN 0x8000 -#define REGULATOR_EVENT_OVER_TEMP_WARN 0x10000 -#define REGULATOR_EVENT_WARN_MASK 0x1E000 - -/* * Regulator errors that can be queried using regulator_get_error_flags * * UNDER_VOLTAGE Regulator output is under voltage. @@ -365,13 +320,13 @@ devm_regulator_get_exclusive(struct device *dev, const char *id) static inline int devm_regulator_get_enable(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline int devm_regulator_get_enable_optional(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline struct regulator *__must_check diff --git a/include/linux/regulator/db8500-prcmu.h b/include/linux/regulator/db8500-prcmu.h index f90df9ee703e..d58ff273157e 100644 --- a/include/linux/regulator/db8500-prcmu.h +++ b/include/linux/regulator/db8500-prcmu.h @@ -35,10 +35,4 @@ enum db8500_regulator_id { DB8500_NUM_REGULATORS }; -/* - * Exported interface for CPUIdle only. This function is called with all - * interrupts turned off. - */ -int power_state_active_is_enabled(void); - #endif diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index c6ef7d68eb9a..22a07c0900a4 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -51,12 +51,7 @@ enum regulator_detection_severity { /* Initialize struct linear_range for regulators */ #define REGULATOR_LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV) \ -{ \ - .min = _min_uV, \ - .min_sel = _min_sel, \ - .max_sel = _max_sel, \ - .step = _step_uV, \ -} + LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV) /** * struct regulator_ops - regulator operations. @@ -292,11 +287,12 @@ enum regulator_type { * @ramp_delay: Time to settle down after voltage change (unit: uV/us) * @min_dropout_uV: The minimum dropout voltage this regulator can handle * @linear_ranges: A constant table of possible voltage ranges. - * @linear_range_selectors: A constant table of voltage range selectors. - * If pickable ranges are used each range must - * have corresponding selector here. + * @linear_range_selectors_bitfield: A constant table of voltage range + * selectors as bitfield values. If + * pickable ranges are used each range + * must have corresponding selector here. * @n_linear_ranges: Number of entries in the @linear_ranges (and in - * linear_range_selectors if used) table(s). + * linear_range_selectors_bitfield if used) table(s). * @volt_table: Voltage mapping table (if table based mapping) * @curr_table: Current limit mapping table (if table based mapping) * @@ -384,7 +380,7 @@ struct regulator_desc { int min_dropout_uV; const struct linear_range *linear_ranges; - const unsigned int *linear_range_selectors; + const unsigned int *linear_range_selectors_bitfield; int n_linear_ranges; diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 621b7f4a3639..0cd76d264727 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -49,6 +49,13 @@ struct regulator; #define DISABLE_IN_SUSPEND 1 #define ENABLE_IN_SUSPEND 2 +/* + * Default time window (in milliseconds) following a critical under-voltage + * event during which less critical actions can be safely carried out by the + * system. + */ +#define REGULATOR_DEF_UV_LESS_CRITICAL_WINDOW_MS 10 + /* Regulator active discharge flags */ enum regulator_active_discharge { REGULATOR_ACTIVE_DISCHARGE_DEFAULT, @@ -127,6 +134,8 @@ struct notification_limit { * @ramp_disable: Disable ramp delay when initialising or when setting voltage. * @soft_start: Enable soft start so that voltage ramps slowly. * @pull_down: Enable pull down when regulator is disabled. + * @system_critical: Set if the regulator is critical to system stability or + * functionality. * @over_current_protection: Auto disable on over current event. * * @over_current_detection: Configure over current limits. @@ -153,6 +162,13 @@ struct notification_limit { * regulator_active_discharge values are used for * initialisation. * @enable_time: Turn-on time of the rails (unit: microseconds) + * @uv_less_critical_window_ms: Specifies the time window (in milliseconds) + * following a critical under-voltage (UV) event + * during which less critical actions can be + * safely carried out by the system (for example + * logging). After this time window more critical + * actions should be done (for example prevent + * HW damage). */ struct regulation_constraints { @@ -204,6 +220,7 @@ struct regulation_constraints { unsigned int settling_time_up; unsigned int settling_time_down; unsigned int enable_time; + unsigned int uv_less_critical_window_ms; unsigned int active_discharge; @@ -214,6 +231,7 @@ struct regulation_constraints { unsigned ramp_disable:1; /* disable ramp delay */ unsigned soft_start:1; /* ramp voltage slowly */ unsigned pull_down:1; /* pull down resistor when regulator off */ + unsigned system_critical:1; /* critical to system stability */ unsigned over_current_protection:1; /* auto disable on over current */ unsigned over_current_detection:1; /* notify on over current */ unsigned over_voltage_detection:1; /* notify on over voltage */ diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h index 8313e7ed6aec..a225e9eeb30d 100644 --- a/include/linux/regulator/max8973-regulator.h +++ b/include/linux/regulator/max8973-regulator.h @@ -48,10 +48,6 @@ * control signal from EN input pin. If it is false then * voltage output will be enabled/disabled through EN bit of * device register. - * @enable_gpio: Enable GPIO. If EN pin is controlled through GPIO from host - * then GPIO number can be provided. If no GPIO controlled then - * it should be -1. - * @dvs_gpio: GPIO for dvs. It should be -1 if this is tied with fixed logic. * @dvs_def_state: Default state of dvs. 1 if it is high else 0. */ struct max8973_regulator_platform_data { @@ -59,8 +55,6 @@ struct max8973_regulator_platform_data { unsigned long control_flags; unsigned long junction_temp_warning; bool enable_ext_control; - int enable_gpio; - int dvs_gpio; unsigned dvs_def_state:1; }; diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h index c71a6a9fce7a..562386f9b80e 100644 --- a/include/linux/regulator/mt6358-regulator.h +++ b/include/linux/regulator/mt6358-regulator.h @@ -86,6 +86,9 @@ enum { MT6366_ID_VMC, MT6366_ID_VAUD28, MT6366_ID_VSIM2, + MT6366_ID_VM18, + MT6366_ID_VMDDR, + MT6366_ID_VSRAM_CORE, MT6366_ID_RG_MAX, }; diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index fe8978eb69f1..b4795698d8c2 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -690,6 +690,10 @@ int rproc_detach(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem); + +/* from remoteproc_coredump.c */ +void rproc_coredump_cleanup(struct rproc *rproc); +void rproc_coredump(struct rproc *rproc); void rproc_coredump_using_sections(struct rproc *rproc); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8334eeacfec5..a365f67131ec 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -6,6 +6,12 @@ #include <linux/list.h> #include <linux/pid.h> +/* CLOSID, RMID value used by the default control group */ +#define RESCTRL_RESERVED_CLOSID 0 +#define RESCTRL_RESERVED_RMID 0 + +#define RESCTRL_PICK_ANY_CPU -1 + #ifdef CONFIG_PROC_CPU_RESCTRL int proc_resctrl_show(struct seq_file *m, @@ -94,7 +100,7 @@ struct rdt_domain { * zero CBM. * @shareable_bits: Bitmask of shareable resource with other * executing entities - * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. + * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. */ @@ -102,7 +108,7 @@ struct resctrl_cache { unsigned int cbm_len; unsigned int min_cbm_bits; unsigned int shareable_bits; - bool arch_has_sparse_bitmaps; + bool arch_has_sparse_bitmasks; bool arch_has_per_cpu_cfg; }; @@ -153,7 +159,7 @@ struct resctrl_schema; * @cache_level: Which cache level defines scope of this resource * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. - * @domains: All domains for this resource + * @domains: RCU list of all domains for this resource * @name: Name to use in "schemata" file. * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. @@ -219,36 +225,70 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_online_cpu(unsigned int cpu); +void resctrl_offline_cpu(unsigned int cpu); /** * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid * for this resource and domain. * @r: resource that the counter should be read from. * @d: domain that the counter should be read from. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid + * only. * @rmid: rmid of the counter to read. * @eventid: eventid to read, e.g. L3 occupancy. * @val: result of the counter read in bytes. + * @arch_mon_ctx: An architecture specific value from + * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies + * the hardware monitor allocated for this read request. * - * Call from process context on a CPU that belongs to domain @d. + * Some architectures need to sleep when first programming some of the counters. + * (specifically: arm64's MPAM cache occupancy counters can return 'not ready' + * for a short period of time). Call from a non-migrateable process context on + * a CPU that belongs to domain @d. e.g. use smp_call_on_cpu() or + * schedule_work_on(). This function can be called with interrupts masked, + * e.g. using smp_call_function_any(), but may consistently return an error. * * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, - u32 rmid, enum resctrl_event_id eventid, u64 *val); + u32 closid, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *arch_mon_ctx); + +/** + * resctrl_arch_rmid_read_context_check() - warn about invalid contexts + * + * When built with CONFIG_DEBUG_ATOMIC_SLEEP generate a warning when + * resctrl_arch_rmid_read() is called with preemption disabled. + * + * The contract with resctrl_arch_rmid_read() is that if interrupts + * are unmasked, it can sleep. This allows NOHZ_FULL systems to use an + * IPI, (and fail if the call needed to sleep), while most of the time + * the work is scheduled, allowing the call to sleep. + */ +static inline void resctrl_arch_rmid_read_context_check(void) +{ + if (!irqs_disabled()) + might_sleep(); +} /** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid * and eventid. * @r: The domain's resource. * @d: The rmid's domain. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid only. * @rmid: The rmid whose counter values should be reset. * @eventid: The eventid whose counter values should be reset. * * This can be called from any CPU. */ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, - u32 rmid, enum resctrl_event_id eventid); + u32 closid, u32 rmid, + enum resctrl_event_id eventid); /** * resctrl_arch_reset_rmid_all() - Reset all private state associated with diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 0fa4f60e1186..357df16ede32 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -60,6 +60,9 @@ struct reset_control_lookup { * @reset_control_head: head of internal list of requested reset controls * @dev: corresponding driver model device struct * @of_node: corresponding device tree node as phandle target + * @of_args: for reset-gpios controllers: corresponding phandle args with + * of_node and GPIO number complementing of_node; either this or + * of_node should be present * @of_reset_n_cells: number of cells in reset line specifiers * @of_xlate: translation function to translate from specifier as found in the * device tree to id as given to the reset control ops, defaults @@ -73,6 +76,7 @@ struct reset_controller_dev { struct list_head reset_control_head; struct device *dev; struct device_node *of_node; + const struct of_phandle_args *of_args; int of_reset_n_cells; int (*of_xlate)(struct reset_controller_dev *rcdev, const struct of_phandle_args *reset_spec); diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index 980a65594412..13f17676c5f4 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -7,8 +7,8 @@ #include <linux/compiler.h> #include <linux/types.h> -#include <linux/time64.h> +struct __kernel_timespec; struct timespec; struct old_timespec32; struct pollfd; diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h index 285189454449..e0135e0adae0 100644 --- a/include/linux/resume_user_mode.h +++ b/include/linux/resume_user_mode.h @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/task_work.h> #include <linux/memcontrol.h> +#include <linux/rseq.h> #include <linux/blk-cgroup.h> /** @@ -55,7 +56,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs) } #endif - mem_cgroup_handle_over_high(); + mem_cgroup_handle_over_high(GFP_KERNEL); blkcg_maybe_throttle_current(); rseq_handle_notify_resume(NULL, regs); diff --git a/include/linux/rethook.h b/include/linux/rethook.h index 26b6f3c81a76..ba60962805f6 100644 --- a/include/linux/rethook.h +++ b/include/linux/rethook.h @@ -6,11 +6,10 @@ #define _LINUX_RETHOOK_H #include <linux/compiler.h> -#include <linux/freelist.h> +#include <linux/objpool.h> #include <linux/kallsyms.h> #include <linux/llist.h> #include <linux/rcupdate.h> -#include <linux/refcount.h> struct rethook_node; @@ -29,15 +28,18 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long, */ struct rethook { void *data; - rethook_handler_t handler; - struct freelist_head pool; - refcount_t ref; + /* + * To avoid sparse warnings, this uses a raw function pointer with + * __rcu, instead of rethook_handler_t. But this must be same as + * rethook_handler_t. + */ + void (__rcu *handler) (struct rethook_node *, void *, unsigned long, struct pt_regs *); + struct objpool_head pool; struct rcu_head rcu; }; /** * struct rethook_node - The rethook shadow-stack entry node. - * @freelist: The freelist, linked to struct rethook::pool. * @rcu: The rcu_head for deferred freeing. * @llist: The llist, linked to a struct task_struct::rethooks. * @rethook: The pointer to the struct rethook. @@ -48,20 +50,16 @@ struct rethook { * on each entry of the shadow stack. */ struct rethook_node { - union { - struct freelist_node freelist; - struct rcu_head rcu; - }; + struct rcu_head rcu; struct llist_node llist; struct rethook *rethook; unsigned long ret_addr; unsigned long frame; }; -struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +struct rethook *rethook_alloc(void *data, rethook_handler_t handler, int size, int num); void rethook_stop(struct rethook *rh); void rethook_free(struct rethook *rh); -void rethook_add_node(struct rethook *rh, struct rethook_node *node); struct rethook_node *rethook_try_get(struct rethook *rh); void rethook_recycle(struct rethook_node *node); void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount); @@ -98,4 +96,3 @@ void rethook_flush_task(struct task_struct *tk); #endif #endif - diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 57467cbf4c5b..b6f3797277ff 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -12,7 +12,7 @@ #include <linux/atomic.h> #include <linux/compiler.h> #include <linux/mutex.h> -#include <linux/workqueue.h> +#include <linux/workqueue_types.h> struct rhash_head { struct rhash_head __rcu *next; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 782e14f62201..dc5ae4e96aee 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -98,7 +98,9 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); +typedef bool (*ring_buffer_cond_fn)(void *data); +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, + ring_buffer_cond_fn cond, void *data); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); @@ -141,6 +143,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter); bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter); unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu); +unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer); void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu); void ring_buffer_reset_online_cpus(struct trace_buffer *buffer); @@ -191,15 +194,24 @@ bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); -void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu); -void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data); -int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page, +struct buffer_data_read_page; +struct buffer_data_read_page * +ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu); +void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, + struct buffer_data_read_page *page); +int ring_buffer_read_page(struct trace_buffer *buffer, + struct buffer_data_read_page *data_page, size_t len, int cpu, int full); +void *ring_buffer_read_page_data(struct buffer_data_read_page *page); struct trace_seq; int ring_buffer_print_entry_header(struct trace_seq *s); -int ring_buffer_print_page_header(struct trace_seq *s); +int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s); + +int ring_buffer_subbuf_order_get(struct trace_buffer *buffer); +int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order); +int ring_buffer_subbuf_size_get(struct trace_buffer *buffer); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b87d01660412..b7944a833668 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -121,6 +121,11 @@ static inline void anon_vma_lock_write(struct anon_vma *anon_vma) down_write(&anon_vma->root->rwsem); } +static inline int anon_vma_trylock_write(struct anon_vma *anon_vma) +{ + return down_write_trylock(&anon_vma->root->rwsem); +} + static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) { up_write(&anon_vma->root->rwsem); @@ -172,131 +177,323 @@ struct anon_vma *folio_get_anon_vma(struct folio *folio); typedef int __bitwise rmap_t; /* - * No special request: if the page is a subpage of a compound page, it is - * mapped via a PTE. The mapped (sub)page is possibly shared between processes. + * No special request: A mapped anonymous (sub)page is possibly shared between + * processes. */ #define RMAP_NONE ((__force rmap_t)0) -/* The (sub)page is exclusive to a single process. */ +/* The anonymous (sub)page is exclusive to a single process. */ #define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) /* - * The compound page is not mapped via PTEs, but instead via a single PMD and - * should be accounted accordingly. + * Internally, we're using an enum to specify the granularity. We make the + * compiler emit specialized code for each granularity. */ -#define RMAP_COMPOUND ((__force rmap_t)BIT(1)) +enum rmap_level { + RMAP_LEVEL_PTE = 0, + RMAP_LEVEL_PMD, +}; + +static inline void __folio_rmap_sanity_checks(struct folio *folio, + struct page *page, int nr_pages, enum rmap_level level) +{ + /* hugetlb folios are handled separately. */ + VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + + /* + * TODO: we get driver-allocated folios that have nothing to do with + * the rmap using vm_insert_page(); therefore, we cannot assume that + * folio_test_large_rmappable() holds for large folios. We should + * handle any desired mapcount+stats accounting for these folios in + * VM_MIXEDMAP VMAs separately, and then sanity-check here that + * we really only get rmappable folios. + */ + + VM_WARN_ON_ONCE(nr_pages <= 0); + VM_WARN_ON_FOLIO(page_folio(page) != folio, folio); + VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); + + switch (level) { + case RMAP_LEVEL_PTE: + break; + case RMAP_LEVEL_PMD: + /* + * We don't support folios larger than a single PMD yet. So + * when RMAP_LEVEL_PMD is set, we assume that we are creating + * a single "entire" mapping of the folio. + */ + VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); + VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); + break; + default: + VM_WARN_ON_ONCE(true); + } +} /* * rmap interfaces called when adding or removing pte of page */ -void page_move_anon_rmap(struct page *, struct vm_area_struct *); -void page_add_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long address, rmap_t flags); -void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, - unsigned long address); +void folio_move_anon_rmap(struct folio *, struct vm_area_struct *); +void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages, + struct vm_area_struct *, unsigned long address, rmap_t flags); +#define folio_add_anon_rmap_pte(folio, page, vma, address, flags) \ + folio_add_anon_rmap_ptes(folio, page, 1, vma, address, flags) +void folio_add_anon_rmap_pmd(struct folio *, struct page *, + struct vm_area_struct *, unsigned long address, rmap_t flags); void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); -void page_add_file_rmap(struct page *, struct vm_area_struct *, - bool compound); -void page_remove_rmap(struct page *, struct vm_area_struct *, - bool compound); - -void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, +void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, + struct vm_area_struct *); +#define folio_add_file_rmap_pte(folio, page, vma) \ + folio_add_file_rmap_ptes(folio, page, 1, vma) +void folio_add_file_rmap_pmd(struct folio *, struct page *, + struct vm_area_struct *); +void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, + struct vm_area_struct *); +#define folio_remove_rmap_pte(folio, page, vma) \ + folio_remove_rmap_ptes(folio, page, 1, vma) +void folio_remove_rmap_pmd(struct folio *, struct page *, + struct vm_area_struct *); + +void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); -void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *, +void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); -static inline void __page_dup_rmap(struct page *page, bool compound) +/* See folio_try_dup_anon_rmap_*() */ +static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, + struct vm_area_struct *vma) { - if (compound) { - struct folio *folio = (struct folio *)page; + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); - VM_BUG_ON_PAGE(compound && !PageHead(page), page); - atomic_inc(&folio->_entire_mapcount); - } else { - atomic_inc(&page->_mapcount); + if (PageAnonExclusive(&folio->page)) { + if (unlikely(folio_needs_cow_for_dma(vma, folio))) + return -EBUSY; + ClearPageAnonExclusive(&folio->page); } + atomic_inc(&folio->_entire_mapcount); + return 0; +} + +/* See folio_try_share_anon_rmap_*() */ +static inline int hugetlb_try_share_anon_rmap(struct folio *folio) +{ + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio); + + /* Paired with the memory barrier in try_grab_folio(). */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb(); + + if (unlikely(folio_maybe_dma_pinned(folio))) + return -EBUSY; + ClearPageAnonExclusive(&folio->page); + + /* + * This is conceptually a smp_wmb() paired with the smp_rmb() in + * gup_must_unshare(). + */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb__after_atomic(); + return 0; +} + +static inline void hugetlb_add_file_rmap(struct folio *folio) +{ + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); + + atomic_inc(&folio->_entire_mapcount); +} + +static inline void hugetlb_remove_rmap(struct folio *folio) +{ + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + + atomic_dec(&folio->_entire_mapcount); } -static inline void page_dup_file_rmap(struct page *page, bool compound) +static __always_inline void __folio_dup_file_rmap(struct folio *folio, + struct page *page, int nr_pages, enum rmap_level level) { - __page_dup_rmap(page, compound); + __folio_rmap_sanity_checks(folio, page, nr_pages, level); + + switch (level) { + case RMAP_LEVEL_PTE: + do { + atomic_inc(&page->_mapcount); + } while (page++, --nr_pages > 0); + break; + case RMAP_LEVEL_PMD: + atomic_inc(&folio->_entire_mapcount); + break; + } } /** - * page_try_dup_anon_rmap - try duplicating a mapping of an already mapped - * anonymous page - * @page: the page to duplicate the mapping for - * @compound: the page is mapped as compound or as a small page - * @vma: the source vma + * folio_dup_file_rmap_ptes - duplicate PTE mappings of a page range of a folio + * @folio: The folio to duplicate the mappings of + * @page: The first page to duplicate the mappings of + * @nr_pages: The number of pages of which the mapping will be duplicated * - * The caller needs to hold the PT lock and the vma->vma_mm->write_protect_seq. + * The page range of the folio is defined by [page, page + nr_pages) * - * Duplicating the mapping can only fail if the page may be pinned; device - * private pages cannot get pinned and consequently this function cannot fail. + * The caller needs to hold the page table lock. + */ +static inline void folio_dup_file_rmap_ptes(struct folio *folio, + struct page *page, int nr_pages) +{ + __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); +} +#define folio_dup_file_rmap_pte(folio, page) \ + folio_dup_file_rmap_ptes(folio, page, 1) + +/** + * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio + * @folio: The folio to duplicate the mapping of + * @page: The first page to duplicate the mapping of * - * If duplicating the mapping succeeds, the page has to be mapped R/O into - * the parent and the child. It must *not* get mapped writable after this call. + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * - * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. + * The caller needs to hold the page table lock. */ -static inline int page_try_dup_anon_rmap(struct page *page, bool compound, - struct vm_area_struct *vma) +static inline void folio_dup_file_rmap_pmd(struct folio *folio, + struct page *page) { - VM_BUG_ON_PAGE(!PageAnon(page), page); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE); +#else + WARN_ON_ONCE(true); +#endif +} - /* - * No need to check+clear for already shared pages, including KSM - * pages. - */ - if (!PageAnonExclusive(page)) - goto dup; +static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, + struct page *page, int nr_pages, struct vm_area_struct *src_vma, + enum rmap_level level) +{ + bool maybe_pinned; + int i; + + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + __folio_rmap_sanity_checks(folio, page, nr_pages, level); /* - * If this page may have been pinned by the parent process, - * don't allow to duplicate the mapping but instead require to e.g., - * copy the page immediately for the child so that we'll always - * guarantee the pinned page won't be randomly replaced in the + * If this folio may have been pinned by the parent process, + * don't allow to duplicate the mappings but instead require to e.g., + * copy the subpage immediately for the child so that we'll always + * guarantee the pinned folio won't be randomly replaced in the * future on write faults. */ - if (likely(!is_device_private_page(page) && - unlikely(page_needs_cow_for_dma(vma, page)))) - return -EBUSY; + maybe_pinned = likely(!folio_is_device_private(folio)) && + unlikely(folio_needs_cow_for_dma(src_vma, folio)); - ClearPageAnonExclusive(page); /* - * It's okay to share the anon page between both processes, mapping - * the page R/O into both processes. + * No need to check+clear for already shared PTEs/PMDs of the + * folio. But if any page is PageAnonExclusive, we must fallback to + * copying if the folio maybe pinned. */ -dup: - __page_dup_rmap(page, compound); + switch (level) { + case RMAP_LEVEL_PTE: + if (unlikely(maybe_pinned)) { + for (i = 0; i < nr_pages; i++) + if (PageAnonExclusive(page + i)) + return -EBUSY; + } + do { + if (PageAnonExclusive(page)) + ClearPageAnonExclusive(page); + atomic_inc(&page->_mapcount); + } while (page++, --nr_pages > 0); + break; + case RMAP_LEVEL_PMD: + if (PageAnonExclusive(page)) { + if (unlikely(maybe_pinned)) + return -EBUSY; + ClearPageAnonExclusive(page); + } + atomic_inc(&folio->_entire_mapcount); + break; + } return 0; } /** - * page_try_share_anon_rmap - try marking an exclusive anonymous page possibly - * shared to prepare for KSM or temporary unmapping - * @page: the exclusive anonymous page to try marking possibly shared + * folio_try_dup_anon_rmap_ptes - try duplicating PTE mappings of a page range + * of a folio + * @folio: The folio to duplicate the mappings of + * @page: The first page to duplicate the mappings of + * @nr_pages: The number of pages of which the mapping will be duplicated + * @src_vma: The vm area from which the mappings are duplicated * - * The caller needs to hold the PT lock and has to have the page table entry - * cleared/invalidated. + * The page range of the folio is defined by [page, page + nr_pages) * - * This is similar to page_try_dup_anon_rmap(), however, not used during fork() - * to duplicate a mapping, but instead to prepare for KSM or temporarily - * unmapping a page (swap, migration) via page_remove_rmap(). + * The caller needs to hold the page table lock and the + * vma->vma_mm->write_protect_seq. * - * Marking the page shared can only fail if the page may be pinned; device - * private pages cannot get pinned and consequently this function cannot fail. + * Duplicating the mappings can only fail if the folio may be pinned; device + * private folios cannot get pinned and consequently this function cannot fail + * for them. * - * Returns 0 if marking the page possibly shared succeeded. Returns -EBUSY - * otherwise. + * If duplicating the mappings succeeded, the duplicated PTEs have to be R/O in + * the parent and the child. They must *not* be writable after this call + * succeeded. + * + * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. + */ +static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, + struct page *page, int nr_pages, struct vm_area_struct *src_vma) +{ + return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, + RMAP_LEVEL_PTE); +} +#define folio_try_dup_anon_rmap_pte(folio, page, vma) \ + folio_try_dup_anon_rmap_ptes(folio, page, 1, vma) + +/** + * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range + * of a folio + * @folio: The folio to duplicate the mapping of + * @page: The first page to duplicate the mapping of + * @src_vma: The vm area from which the mapping is duplicated + * + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock and the + * vma->vma_mm->write_protect_seq. + * + * Duplicating the mapping can only fail if the folio may be pinned; device + * private folios cannot get pinned and consequently this function cannot fail + * for them. + * + * If duplicating the mapping succeeds, the duplicated PMD has to be R/O in + * the parent and the child. They must *not* be writable after this call + * succeeded. + * + * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. */ -static inline int page_try_share_anon_rmap(struct page *page) +static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, + struct page *page, struct vm_area_struct *src_vma) { - VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma, + RMAP_LEVEL_PMD); +#else + WARN_ON_ONCE(true); + return -EBUSY; +#endif +} + +static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, + struct page *page, int nr_pages, enum rmap_level level) +{ + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); + __folio_rmap_sanity_checks(folio, page, nr_pages, level); - /* device private pages cannot get pinned via GUP. */ - if (unlikely(is_device_private_page(page))) { + /* device private folios cannot get pinned via GUP. */ + if (unlikely(folio_is_device_private(folio))) { ClearPageAnonExclusive(page); return 0; } @@ -347,7 +544,7 @@ static inline int page_try_share_anon_rmap(struct page *page) if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) smp_mb(); - if (unlikely(page_maybe_dma_pinned(page))) + if (unlikely(folio_maybe_dma_pinned(folio))) return -EBUSY; ClearPageAnonExclusive(page); @@ -360,6 +557,68 @@ static inline int page_try_share_anon_rmap(struct page *page) return 0; } +/** + * folio_try_share_anon_rmap_pte - try marking an exclusive anonymous page + * mapped by a PTE possibly shared to prepare + * for KSM or temporary unmapping + * @folio: The folio to share a mapping of + * @page: The mapped exclusive page + * + * The caller needs to hold the page table lock and has to have the page table + * entries cleared/invalidated. + * + * This is similar to folio_try_dup_anon_rmap_pte(), however, not used during + * fork() to duplicate mappings, but instead to prepare for KSM or temporarily + * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pte(). + * + * Marking the mapped page shared can only fail if the folio maybe pinned; + * device private folios cannot get pinned and consequently this function cannot + * fail. + * + * Returns 0 if marking the mapped page possibly shared succeeded. Returns + * -EBUSY otherwise. + */ +static inline int folio_try_share_anon_rmap_pte(struct folio *folio, + struct page *page) +{ + return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); +} + +/** + * folio_try_share_anon_rmap_pmd - try marking an exclusive anonymous page + * range mapped by a PMD possibly shared to + * prepare for temporary unmapping + * @folio: The folio to share the mapping of + * @page: The first page to share the mapping of + * + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock and has to have the page table + * entries cleared/invalidated. + * + * This is similar to folio_try_dup_anon_rmap_pmd(), however, not used during + * fork() to duplicate a mapping, but instead to prepare for temporarily + * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pmd(). + * + * Marking the mapped pages shared can only fail if the folio maybe pinned; + * device private folios cannot get pinned and consequently this function cannot + * fail. + * + * Returns 0 if marking the mapped pages possibly shared succeeded. Returns + * -EBUSY otherwise. + */ +static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, + struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, + RMAP_LEVEL_PMD); +#else + WARN_ON_ONCE(true); + return -EBUSY; +#endif +} + /* * Called from mm/vmscan.c to handle paging out */ @@ -477,7 +736,6 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, #define anon_vma_init() do {} while (0) #define anon_vma_prepare(vma) (0) -#define anon_vma_link(vma) do {} while (0) static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 523c98b96cb4..90d8e4475f80 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -64,12 +64,14 @@ struct rpmsg_device { }; typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); +typedef int (*rpmsg_flowcontrol_cb_t)(struct rpmsg_device *, void *, bool); /** * struct rpmsg_endpoint - binds a local rpmsg address to its user * @rpdev: rpmsg channel device * @refcount: when this drops to zero, the ept is deallocated * @cb: rx callback handler + * @flow_cb: remote flow control callback handler * @cb_lock: must be taken before accessing/changing @cb * @addr: local rpmsg address * @priv: private data for the driver's use @@ -92,6 +94,7 @@ struct rpmsg_endpoint { struct rpmsg_device *rpdev; struct kref refcount; rpmsg_rx_cb_t cb; + rpmsg_flowcontrol_cb_t flow_cb; struct mutex cb_lock; u32 addr; void *priv; @@ -106,6 +109,7 @@ struct rpmsg_endpoint { * @probe: invoked when a matching rpmsg channel (i.e. device) is found * @remove: invoked when the rpmsg channel is removed * @callback: invoked when an inbound message is received on the channel + * @flowcontrol: invoked when remote side flow control request is received */ struct rpmsg_driver { struct device_driver drv; @@ -113,6 +117,7 @@ struct rpmsg_driver { int (*probe)(struct rpmsg_device *dev); void (*remove)(struct rpmsg_device *dev); int (*callback)(struct rpmsg_device *, void *, int, void *, u32); + int (*flowcontrol)(struct rpmsg_device *, void *, bool); }; static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val) @@ -192,6 +197,8 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept); +int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst); + #else static inline int rpmsg_register_device_override(struct rpmsg_device *rpdev, @@ -316,6 +323,14 @@ static inline ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept) return -ENXIO; } +static inline int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + #endif /* IS_ENABLED(CONFIG_RPMSG) */ /* use a macro to avoid include chaining to get THIS_MODULE */ diff --git a/include/linux/rseq.h b/include/linux/rseq.h new file mode 100644 index 000000000000..bc8af3eb5598 --- /dev/null +++ b/include/linux/rseq.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _LINUX_RSEQ_H +#define _LINUX_RSEQ_H + +#ifdef CONFIG_RSEQ + +#include <linux/preempt.h> +#include <linux/sched.h> + +/* + * Map the event mask on the user-space ABI enum rseq_cs_flags + * for direct mask checks. + */ +enum rseq_event_mask_bits { + RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, + RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, + RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, +}; + +enum rseq_event_mask { + RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), + RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), + RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), +}; + +static inline void rseq_set_notify_resume(struct task_struct *t) +{ + if (t->rseq) + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); +} + +void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); + +static inline void rseq_handle_notify_resume(struct ksignal *ksig, + struct pt_regs *regs) +{ + if (current->rseq) + __rseq_handle_notify_resume(ksig, regs); +} + +static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) +{ + preempt_disable(); + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); + preempt_enable(); + rseq_handle_notify_resume(ksig, regs); +} + +/* rseq_preempt() requires preemption to be disabled. */ +static inline void rseq_preempt(struct task_struct *t) +{ + __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); + rseq_set_notify_resume(t); +} + +/* rseq_migrate() requires preemption to be disabled. */ +static inline void rseq_migrate(struct task_struct *t) +{ + __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); + rseq_set_notify_resume(t); +} + +/* + * If parent process has a registered restartable sequences area, the + * child inherits. Unregister rseq for a clone with CLONE_VM set. + */ +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +{ + if (clone_flags & CLONE_VM) { + t->rseq = NULL; + t->rseq_len = 0; + t->rseq_sig = 0; + t->rseq_event_mask = 0; + } else { + t->rseq = current->rseq; + t->rseq_len = current->rseq_len; + t->rseq_sig = current->rseq_sig; + t->rseq_event_mask = current->rseq_event_mask; + } +} + +static inline void rseq_execve(struct task_struct *t) +{ + t->rseq = NULL; + t->rseq_len = 0; + t->rseq_sig = 0; + t->rseq_event_mask = 0; +} + +#else + +static inline void rseq_set_notify_resume(struct task_struct *t) +{ +} +static inline void rseq_handle_notify_resume(struct ksignal *ksig, + struct pt_regs *regs) +{ +} +static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) +{ +} +static inline void rseq_preempt(struct task_struct *t) +{ +} +static inline void rseq_migrate(struct task_struct *t) +{ +} +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +{ +} +static inline void rseq_execve(struct task_struct *t) +{ +} + +#endif + +#ifdef CONFIG_DEBUG_RSEQ + +void rseq_syscall(struct pt_regs *regs); + +#else + +static inline void rseq_syscall(struct pt_regs *regs) +{ +} + +#endif + +#endif /* _LINUX_RSEQ_H */ diff --git a/include/linux/rslib.h b/include/linux/rslib.h index 238bb85243d3..a04dacbdc8ae 100644 --- a/include/linux/rslib.h +++ b/include/linux/rslib.h @@ -10,7 +10,6 @@ #ifndef _RSLIB_H_ #define _RSLIB_H_ -#include <linux/list.h> #include <linux/types.h> /* for gfp_t */ #include <linux/gfp.h> /* for GFP_KERNEL */ diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 1fd9c6a21ebe..3f4d315aaec9 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -42,7 +42,7 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) #include <linux/timerqueue.h> #include <linux/workqueue.h> -extern struct class *rtc_class; +extern const struct class rtc_class; /* * For these RTC methods the device parameter is the physical device @@ -146,6 +146,7 @@ struct rtc_device { time64_t range_min; timeu64_t range_max; + timeu64_t alarm_offset_max; time64_t start_secs; time64_t offset_secs; bool set_start_time; @@ -224,6 +225,23 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +/** + * rtc_bound_alarmtime() - Return alarm time bound by rtc limit + * @rtc: Pointer to rtc device structure + * @requested: Requested alarm timeout + * + * Return: Alarm timeout bound by maximum alarm time supported by rtc. + */ +static inline ktime_t rtc_bound_alarmtime(struct rtc_device *rtc, + ktime_t requested) +{ + if (rtc->alarm_offset_max && + rtc->alarm_offset_max * MSEC_PER_SEC < ktime_to_ms(requested)) + return ms_to_ktime(rtc->alarm_offset_max * MSEC_PER_SEC); + + return requested; +} + #define devm_rtc_register_device(device) \ __devm_rtc_register_device(THIS_MODULE, device) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 3d6cf306cd55..cdfc897f1e3c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -10,6 +10,13 @@ #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); + +static inline int rtnetlink_maybe_send(struct sk_buff *skb, struct net *net, + u32 pid, u32 group, int echo) +{ + return !skb ? 0 : rtnetlink_send(skb, net, pid, group, echo); +} + extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, const struct nlmsghdr *nlh, gfp_t flags); @@ -40,6 +47,7 @@ extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; +extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; @@ -72,6 +80,18 @@ static inline bool lockdep_rtnl_is_held(void) #define rtnl_dereference(p) \ rcu_dereference_protected(p, lockdep_rtnl_is_held()) +/** + * rcu_replace_pointer_rtnl - replace an RCU pointer under rtnl_lock, returning + * its old value + * @rp: RCU pointer, whose value is returned + * @p: regular pointer + * + * Perform a replacement under rtnl_lock, where @rp is an RCU-annotated + * pointer. The old value of @rp is returned, and @rp is set to @p + */ +#define rcu_replace_pointer_rtnl(rp, p) \ + rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) + static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); @@ -130,4 +150,28 @@ extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, extern void rtnl_offload_xstats_notify(struct net_device *dev); +static inline int rtnl_has_listeners(const struct net *net, u32 group) +{ + struct sock *rtnl = net->rtnl; + + return netlink_has_listeners(rtnl, group); +} + +/** + * rtnl_notify_needed - check if notification is needed + * @net: Pointer to the net namespace + * @nlflags: netlink ingress message flags + * @group: rtnl group + * + * Based on the ingress message flags and rtnl group, returns true + * if a notification is needed, false otherwise. + */ +static inline bool +rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) +{ + return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); +} + +void netdev_set_operstate(struct net_device *dev, int newstate); + #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 534038d962e4..4612ef09a0c7 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -60,6 +60,7 @@ #define SD_EXIST (1 << 16) #define DELINK_INT GPIO0_INT #define MS_OC_INT (1 << 23) +#define SD_OVP_INT (1 << 23) #define SD_OC_INT (1 << 22) #define CARD_INT (XD_INT | MS_INT | SD_INT) @@ -80,6 +81,7 @@ #define OC_INT_EN (1 << 23) #define DELINK_INT_EN GPIO0_INT_EN #define MS_OC_INT_EN (1 << 23) +#define SD_OVP_INT_EN (1 << 23) #define SD_OC_INT_EN (1 << 22) #define RTSX_DUM_REG 0x1C @@ -583,6 +585,7 @@ #define OBFF_DISABLE 0x00 #define CDRESUMECTL 0xFE52 +#define CDGW 0xFE53 #define WAKE_SEL_CTL 0xFE54 #define PCLK_CTL 0xFE55 #define PCLK_MODE_SEL 0x20 @@ -764,6 +767,9 @@ #define SD_VIO_LDO_1V8 0x40 #define SD_VIO_LDO_3V3 0x70 +#define RTS5264_AUTOLOAD_CFG2 0xFF7D +#define RTS5264_CHIP_RST_N_SEL (1 << 6) + #define RTS5260_AUTOLOAD_CFG4 0xFF7F #define RTS5260_MIMO_DISABLE 0x8A /*RTS5261*/ @@ -1261,6 +1267,7 @@ struct rtsx_pcr { u8 dma_error_count; u8 ocp_stat; u8 ocp_stat2; + u8 ovp_stat; u8 rtd3_en; }; @@ -1271,6 +1278,7 @@ struct rtsx_pcr { #define PID_5260 0x5260 #define PID_5261 0x5261 #define PID_5228 0x5228 +#define PID_5264 0x5264 #define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) #define PCI_VID(pcr) ((pcr)->pci->vendor) diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h new file mode 100644 index 000000000000..309ca72f2dfb --- /dev/null +++ b/include/linux/rw_hint.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RW_HINT_H +#define _LINUX_RW_HINT_H + +#include <linux/build_bug.h> +#include <linux/compiler_attributes.h> +#include <uapi/linux/fcntl.h> + +/* Block storage write lifetime hint values. */ +enum rw_hint { + WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +} __packed; + +/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */ +#ifndef __CHECKER__ +static_assert(sizeof(enum rw_hint) == 1); +#endif + +#endif /* _LINUX_RW_HINT_H */ diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h index 1d264dd08625..f2394a409c9d 100644 --- a/include/linux/rwbase_rt.h +++ b/include/linux/rwbase_rt.h @@ -26,12 +26,17 @@ struct rwbase_rt { } while (0) -static __always_inline bool rw_base_is_locked(struct rwbase_rt *rwb) +static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb) { return atomic_read(&rwb->readers) != READER_BIAS; } -static __always_inline bool rw_base_is_contended(struct rwbase_rt *rwb) +static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb) +{ + return atomic_read(&rwb->readers) == WRITER_BIAS; +} + +static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb) { return atomic_read(&rwb->readers) > 0; } diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 1dd530ce8b45..c8b543d428b0 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -66,14 +66,24 @@ struct rw_semaphore { #endif }; -/* In all implementations count != 0 means locked */ +#define RWSEM_UNLOCKED_VALUE 0UL +#define RWSEM_WRITER_LOCKED (1UL << 0) +#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) + static inline int rwsem_is_locked(struct rw_semaphore *sem) { - return atomic_long_read(&sem->count) != 0; + return atomic_long_read(&sem->count) != RWSEM_UNLOCKED_VALUE; } -#define RWSEM_UNLOCKED_VALUE 0L -#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) +static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(atomic_long_read(&sem->count) == RWSEM_UNLOCKED_VALUE); +} + +static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!(atomic_long_read(&sem->count) & RWSEM_WRITER_LOCKED)); +} /* Common initializer macros and functions */ @@ -152,11 +162,21 @@ do { \ __init_rwsem((sem), #sem, &__key); \ } while (0) -static __always_inline int rwsem_is_locked(struct rw_semaphore *sem) +static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem) { return rw_base_is_locked(&sem->rwbase); } +static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!rwsem_is_locked(sem)); +} + +static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!rw_base_is_write_locked(&sem->rwbase)); +} + static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) { return rw_base_is_contended(&sem->rwbase); @@ -169,6 +189,22 @@ static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) * the RT specific variant. */ +static inline void rwsem_assert_held(const struct rw_semaphore *sem) +{ + if (IS_ENABLED(CONFIG_LOCKDEP)) + lockdep_assert_held(sem); + else + rwsem_assert_held_nolockdep(sem); +} + +static inline void rwsem_assert_held_write(const struct rw_semaphore *sem) +{ + if (IS_ENABLED(CONFIG_LOCKDEP)) + lockdep_assert_held_write(sem); + else + rwsem_assert_held_write_nolockdep(sem); +} + /* * lock for reading */ @@ -203,11 +239,11 @@ extern void up_read(struct rw_semaphore *sem); extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) -DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) - -DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) -DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) +DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) +DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) /* * downgrade write lock to read lock diff --git a/include/linux/sched.h b/include/linux/sched.h index 609bde814cb0..3c2abbc587b4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -10,33 +10,41 @@ #include <uapi/linux/sched.h> #include <asm/current.h> - -#include <linux/pid.h> -#include <linux/sem.h> +#include <asm/processor.h> +#include <linux/thread_info.h> +#include <linux/preempt.h> +#include <linux/cpumask.h> + +#include <linux/cache.h> +#include <linux/irqflags_types.h> +#include <linux/smp_types.h> +#include <linux/pid_types.h> +#include <linux/sem_types.h> #include <linux/shm.h> #include <linux/kmsan_types.h> -#include <linux/mutex.h> -#include <linux/plist.h> -#include <linux/hrtimer.h> -#include <linux/irqflags.h> -#include <linux/seccomp.h> -#include <linux/nodemask.h> -#include <linux/rcupdate.h> -#include <linux/refcount.h> +#include <linux/mutex_types.h> +#include <linux/plist_types.h> +#include <linux/hrtimer_types.h> +#include <linux/timer_types.h> +#include <linux/seccomp_types.h> +#include <linux/nodemask_types.h> +#include <linux/refcount_types.h> #include <linux/resource.h> #include <linux/latencytop.h> #include <linux/sched/prio.h> #include <linux/sched/types.h> #include <linux/signal_types.h> -#include <linux/syscall_user_dispatch.h> +#include <linux/syscall_user_dispatch_types.h> #include <linux/mm_types_task.h> #include <linux/task_io_accounting.h> -#include <linux/posix-timers.h> -#include <linux/rseq.h> -#include <linux/seqlock.h> +#include <linux/posix-timers_types.h> +#include <linux/restart_block.h> +#include <uapi/linux/rseq.h> +#include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> #include <linux/livepatch_sched.h> +#include <linux/uidgid_types.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ @@ -63,26 +71,27 @@ struct robust_list_head; struct root_domain; struct rq; struct sched_attr; -struct sched_param; +struct sched_dl_entity; struct seq_file; struct sighand_struct; struct signal_struct; struct task_delay_info; struct task_group; +struct task_struct; struct user_event_mm; /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). * - * We have two separate sets of flags: task->state + * We have two separate sets of flags: task->__state * is about runnability, while task->exit_state are * about the task exiting. Confusing, but this way * modifying one set can't modify the other one by * mistake. */ -/* Used in tsk->state: */ +/* Used in tsk->__state: */ #define TASK_RUNNING 0x00000000 #define TASK_INTERRUPTIBLE 0x00000001 #define TASK_UNINTERRUPTIBLE 0x00000002 @@ -92,7 +101,7 @@ struct user_event_mm; #define EXIT_DEAD 0x00000010 #define EXIT_ZOMBIE 0x00000020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) -/* Used in tsk->state again: */ +/* Used in tsk->__state again: */ #define TASK_PARKED 0x00000040 #define TASK_DEAD 0x00000080 #define TASK_WAKEKILL 0x00000100 @@ -173,7 +182,7 @@ struct user_event_mm; #endif /* - * set_current_state() includes a barrier so that the write of current->state + * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * @@ -196,9 +205,9 @@ struct user_event_mm; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * * where wake_up_state()/try_to_wake_up() executes a full memory barrier before - * accessing p->state. + * accessing p->__state. * - * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, + * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * @@ -370,6 +379,10 @@ extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; #endif +struct sched_param { + int sched_priority; +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ @@ -410,42 +423,6 @@ struct load_weight { u32 inv_weight; }; -/** - * struct util_est - Estimation utilization of FAIR tasks - * @enqueued: instantaneous estimated utilization of a task/cpu - * @ewma: the Exponential Weighted Moving Average (EWMA) - * utilization of a task - * - * Support data structure to track an Exponential Weighted Moving Average - * (EWMA) of a FAIR task's utilization. New samples are added to the moving - * average each time a task completes an activation. Sample's weight is chosen - * so that the EWMA will be relatively insensitive to transient changes to the - * task's workload. - * - * The enqueued attribute has a slightly different meaning for tasks and cpus: - * - task: the task's util_avg at last task dequeue time - * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU - * Thus, the util_est.enqueued of a task represents the contribution on the - * estimated utilization of the CPU where that task is currently enqueued. - * - * Only for tasks we track a moving average of the past instantaneous - * estimated utilization. This allows to absorb sporadic drops in utilization - * of an otherwise almost periodic task. - * - * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg - * updates. When a task is dequeued, its util_est should not be updated if its - * util_avg has not been updated in the meantime. - * This information is mapped into the MSB bit of util_est.enqueued at dequeue - * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg - * for a task) it is safe to use MSB. - */ -struct util_est { - unsigned int enqueued; - unsigned int ewma; -#define UTIL_EST_WEIGHT_SHIFT 2 -#define UTIL_AVG_UNCHANGED 0x80000000 -} __attribute__((__aligned__(sizeof(u64)))); - /* * The load/runnable/util_avg accumulates an infinite geometric series * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). @@ -500,9 +477,20 @@ struct sched_avg { unsigned long load_avg; unsigned long runnable_avg; unsigned long util_avg; - struct util_est util_est; + unsigned int util_est; } ____cacheline_aligned; +/* + * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg + * updates. When a task is dequeued, its util_est should not be updated if its + * util_avg has not been updated in the meantime. + * This information is mapped into the MSB bit of util_est at dequeue time. + * Since max value of util_est for a task is 1024 (PELT util_avg for a task) + * it is safe to use MSB. + */ +#define UTIL_EST_WEIGHT_SHIFT 2 +#define UTIL_AVG_UNCHANGED 0x80000000 + struct sched_statistics { #ifdef CONFIG_SCHEDSTATS u64 wait_start; @@ -520,7 +508,7 @@ struct sched_statistics { u64 block_max; s64 sum_block_runtime; - u64 exec_max; + s64 exec_max; u64 slice_max; u64 nr_migrations_cold; @@ -549,13 +537,18 @@ struct sched_entity { /* For load-balancing: */ struct load_weight load; struct rb_node run_node; + u64 deadline; + u64 min_vruntime; + struct list_head group_node; unsigned int on_rq; u64 exec_start; u64 sum_exec_runtime; - u64 vruntime; u64 prev_sum_exec_runtime; + u64 vruntime; + s64 vlag; + u64 slice; u64 nr_migrations; @@ -599,6 +592,9 @@ struct sched_rt_entity { #endif } __randomize_layout; +typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *); +typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *); + struct sched_dl_entity { struct rb_node rb_node; @@ -646,6 +642,7 @@ struct sched_dl_entity { unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; + unsigned int dl_server : 1; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -660,7 +657,20 @@ struct sched_dl_entity { * timer is needed to decrease the active utilization at the correct * time. */ - struct hrtimer inactive_timer; + struct hrtimer inactive_timer; + + /* + * Bits for DL-server functionality. Also see the comment near + * dl_server_update(). + * + * @rq the runqueue this server is for + * + * @server_has_tasks() returns true if @server_pick return a + * runnable task. + */ + struct rq *rq; + dl_server_has_tasks_f server_has_tasks; + dl_server_pick_f server_pick; #ifdef CONFIG_RT_MUTEXES /* @@ -745,10 +755,8 @@ struct task_struct { #endif unsigned int __state; -#ifdef CONFIG_PREEMPT_RT /* saved state for "spinlock sleepers" */ unsigned int saved_state; -#endif /* * This begins the randomizable portion of task_struct. Only @@ -789,6 +797,7 @@ struct task_struct { struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; + struct sched_dl_entity *dl_server; const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE @@ -849,6 +858,8 @@ struct task_struct { u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; struct list_head rcu_tasks_holdout_list; + int rcu_tasks_exit_cpu; + struct list_head rcu_tasks_exit_list; #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_TRACE_RCU @@ -870,6 +881,7 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; + struct address_space *faults_disabled_mapping; int exit_state; int exit_code; @@ -906,8 +918,11 @@ struct task_struct { * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; +#ifdef CONFIG_RT_MUTEXES + unsigned sched_rt_mutex:1; +#endif - /* Bit to tell LSMs we're in execve(): */ + /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK @@ -944,7 +959,7 @@ struct task_struct { /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd:1; #endif -#ifdef CONFIG_IOMMU_SVA +#ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif #ifdef CONFIG_CPU_SUP_INTEL @@ -997,7 +1012,6 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; - struct list_head thread_group; struct list_head thread_node; struct completion *vfork_done; @@ -1247,6 +1261,7 @@ struct task_struct { /* Protected by alloc_lock: */ struct mempolicy *mempolicy; short il_prev; + u8 il_weight; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING @@ -1438,6 +1453,10 @@ struct task_struct { struct mem_cgroup *active_memcg; #endif +#ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup *objcg; +#endif + #ifdef CONFIG_BLK_CGROUP struct gendisk *throttle_disk; #endif @@ -1548,114 +1567,6 @@ struct task_struct { */ }; -static inline struct pid *task_pid(struct task_struct *task) -{ - return task->thread_pid; -} - -/* - * the helpers to get the task's different pids as they are seen - * from various namespaces - * - * task_xid_nr() : global id, i.e. the id seen from the init namespace; - * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of - * current. - * task_xid_nr_ns() : id seen from the ns specified; - * - * see also pid_nr() etc in include/linux/pid.h - */ -pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); - -static inline pid_t task_pid_nr(struct task_struct *tsk) -{ - return tsk->pid; -} - -static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); -} - -static inline pid_t task_pid_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); -} - - -static inline pid_t task_tgid_nr(struct task_struct *tsk) -{ - return tsk->tgid; -} - -/** - * pid_alive - check that a task structure is not stale - * @p: Task structure to be checked. - * - * Test if a process is not yet dead (at most zombie state) - * If pid_alive fails, then pointers within the task structure - * can be stale and must not be dereferenced. - * - * Return: 1 if the process is alive. 0 otherwise. - */ -static inline int pid_alive(const struct task_struct *p) -{ - return p->thread_pid != NULL; -} - -static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); -} - -static inline pid_t task_pgrp_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); -} - - -static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); -} - -static inline pid_t task_session_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); -} - -static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); -} - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); -} - -static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) -{ - pid_t pid = 0; - - rcu_read_lock(); - if (pid_alive(tsk)) - pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); - rcu_read_unlock(); - - return pid; -} - -static inline pid_t task_ppid_nr(const struct task_struct *tsk) -{ - return task_ppid_nr_ns(tsk, &init_pid_ns); -} - -/* Obsolete, do not use: */ -static inline pid_t task_pgrp_nr(struct task_struct *tsk) -{ - return task_pgrp_nr_ns(tsk, &init_pid_ns); -} - #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) @@ -1666,7 +1577,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); - if (tsk_state == TASK_IDLE) + if ((tsk_state & TASK_IDLE) == TASK_IDLE) state = TASK_REPORT_IDLE; /* @@ -1674,7 +1585,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. */ - if (tsk_state == TASK_RTLOCK_WAIT) + if (tsk_state & TASK_RTLOCK_WAIT) state = TASK_UNINTERRUPTIBLE; return fls(state); @@ -1699,20 +1610,6 @@ static inline char task_state_to_char(struct task_struct *tsk) return task_index_to_char(task_state_index(tsk)); } -/** - * is_global_init - check if a task structure is init. Since init - * is free to have sub-threads we need to check tgid. - * @tsk: Task structure to be checked. - * - * Check if a task structure is the first user space task the kernel created. - * - * Return: 1 if the task structure is init. 0 otherwise. - */ -static inline int is_global_init(struct task_struct *tsk) -{ - return task_tgid_nr(tsk) == 1; -} - extern struct pid *cad_pid; /* @@ -1729,26 +1626,27 @@ extern struct pid *cad_pid; #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* Dumped core */ #define PF_SIGNALED 0x00000400 /* Killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF__HOLE__00010000 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ -#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ -#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ +#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ +#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to, * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ -#define PF__HOLE__00800000 0x00800000 -#define PF__HOLE__01000000 0x01000000 +#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */ +#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */ #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -#define PF__HOLE__20000000 0x20000000 +#define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning. + * See memalloc_pin_save() */ +#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ @@ -1853,7 +1751,17 @@ extern int task_can_attach(struct task_struct *p); extern int dl_bw_alloc(int cpu, u64 dl_bw); extern void dl_bw_free(int cpu, u64 dl_bw); #ifdef CONFIG_SMP + +/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); + +/** + * set_cpus_allowed_ptr - set CPU affinity mask of a task + * @p: the task + * @new_mask: CPU affinity mask + * + * Return: zero if successful, or a negative error code + */ extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); @@ -1932,9 +1840,7 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { -#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK struct task_struct task; -#endif #ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; #endif @@ -2154,15 +2060,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock); __cond_resched_rwlock_write(lock); \ }) -static inline void cond_resched_rcu(void) -{ -#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) - rcu_read_unlock(); - cond_resched(); - rcu_read_lock(); -#endif -} - #ifdef CONFIG_PREEMPT_DYNAMIC extern bool preempt_model_none(void); @@ -2204,37 +2101,6 @@ static inline bool preempt_model_preemptible(void) return preempt_model_full() || preempt_model_rt(); } -/* - * Does a critical section need to be broken due to another - * task waiting?: (technically does not depend on CONFIG_PREEMPTION, - * but a general need for low latency) - */ -static inline int spin_needbreak(spinlock_t *lock) -{ -#ifdef CONFIG_PREEMPTION - return spin_is_contended(lock); -#else - return 0; -#endif -} - -/* - * Check if a rwlock is contended. - * Returns non-zero if there is another task waiting on the rwlock. - * Returns zero if the lock is not contended or the system / underlying - * rwlock implementation does not support contention detection. - * Technically does not depend on CONFIG_PREEMPTION, but a general need - * for low latency. - */ -static inline int rwlock_needbreak(rwlock_t *lock) -{ -#ifdef CONFIG_PREEMPTION - return rwlock_is_contended(lock); -#else - return 0; -#endif -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); @@ -2269,6 +2135,8 @@ extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); +#include <linux/spinlock.h> + /* * In order to reduce various lock holder preemption latencies provide an * interface to see if a vCPU is currently running or not. @@ -2305,137 +2173,16 @@ static inline bool owner_on_cpu(struct task_struct *owner) unsigned long sched_cpu_util(int cpu); #endif /* CONFIG_SMP */ -#ifdef CONFIG_RSEQ - -/* - * Map the event mask on the user-space ABI enum rseq_cs_flags - * for direct mask checks. - */ -enum rseq_event_mask_bits { - RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, - RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, - RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, -}; - -enum rseq_event_mask { - RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), - RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), - RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), -}; - -static inline void rseq_set_notify_resume(struct task_struct *t) -{ - if (t->rseq) - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); -} - -void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); - -static inline void rseq_handle_notify_resume(struct ksignal *ksig, - struct pt_regs *regs) -{ - if (current->rseq) - __rseq_handle_notify_resume(ksig, regs); -} - -static inline void rseq_signal_deliver(struct ksignal *ksig, - struct pt_regs *regs) -{ - preempt_disable(); - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); - preempt_enable(); - rseq_handle_notify_resume(ksig, regs); -} - -/* rseq_preempt() requires preemption to be disabled. */ -static inline void rseq_preempt(struct task_struct *t) -{ - __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); - rseq_set_notify_resume(t); -} - -/* rseq_migrate() requires preemption to be disabled. */ -static inline void rseq_migrate(struct task_struct *t) -{ - __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); - rseq_set_notify_resume(t); -} - -/* - * If parent process has a registered restartable sequences area, the - * child inherits. Unregister rseq for a clone with CLONE_VM set. - */ -static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) -{ - if (clone_flags & CLONE_VM) { - t->rseq = NULL; - t->rseq_len = 0; - t->rseq_sig = 0; - t->rseq_event_mask = 0; - } else { - t->rseq = current->rseq; - t->rseq_len = current->rseq_len; - t->rseq_sig = current->rseq_sig; - t->rseq_event_mask = current->rseq_event_mask; - } -} - -static inline void rseq_execve(struct task_struct *t) -{ - t->rseq = NULL; - t->rseq_len = 0; - t->rseq_sig = 0; - t->rseq_event_mask = 0; -} - -#else - -static inline void rseq_set_notify_resume(struct task_struct *t) -{ -} -static inline void rseq_handle_notify_resume(struct ksignal *ksig, - struct pt_regs *regs) -{ -} -static inline void rseq_signal_deliver(struct ksignal *ksig, - struct pt_regs *regs) -{ -} -static inline void rseq_preempt(struct task_struct *t) -{ -} -static inline void rseq_migrate(struct task_struct *t) -{ -} -static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) -{ -} -static inline void rseq_execve(struct task_struct *t) -{ -} - -#endif - -#ifdef CONFIG_DEBUG_RSEQ - -void rseq_syscall(struct pt_regs *regs); - -#else - -static inline void rseq_syscall(struct pt_regs *regs) -{ -} - -#endif - #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); extern void sched_core_fork(struct task_struct *p); extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, unsigned long uaddr); +extern int sched_core_idle_cpu(int cpu); #else static inline void sched_core_free(struct task_struct *tsk) { } static inline void sched_core_fork(struct task_struct *p) { } +static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } #endif extern void sched_set_stop_task(int cpu, struct task_struct *stop); diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0ee96ea7a0e9..02f5090ffea2 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* @@ -85,10 +86,22 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HAS_MDWE 28 #define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) + +#define MMF_HAS_MDWE_NO_INHERIT 29 + +#define MMF_VM_MERGE_ANY 30 +#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) + MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ + MMF_VM_MERGE_ANY_MASK) + +static inline unsigned long mmf_init_flags(unsigned long flags) +{ + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) + flags &= ~((1UL << MMF_HAS_MDWE) | + (1UL << MMF_HAS_MDWE_NO_INHERIT)); + return flags & MMF_INIT_MASK; +} -#define MMF_VM_MERGE_ANY 29 #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 7c83d4d5a971..df3aca89d4f5 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_DEADLINE_H +#define _LINUX_SCHED_DEADLINE_H /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -34,3 +36,5 @@ extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); #endif /* CONFIG_SMP */ + +#endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index fe1a46f30d24..2b461129d1fa 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -2,6 +2,7 @@ #define _LINUX_SCHED_ISOLATION_H #include <linux/cpumask.h> +#include <linux/cpuset.h> #include <linux/init.h> #include <linux/tick.h> @@ -67,7 +68,8 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type) static inline bool cpu_is_isolated(int cpu) { return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) || - !housekeeping_test_cpu(cpu, HK_TYPE_TICK); + !housekeeping_test_cpu(cpu, HK_TYPE_TICK) || + cpuset_cpu_is_isolated(cpu); } #endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 8d89c8c4fac1..b6543f9d78d6 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -236,16 +236,25 @@ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); - if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { + if (unlikely(pflags & (PF_MEMALLOC_NOIO | + PF_MEMALLOC_NOFS | + PF_MEMALLOC_NORECLAIM | + PF_MEMALLOC_NOWARN | + PF_MEMALLOC_PIN))) { /* - * NOIO implies both NOIO and NOFS and it is a weaker context - * so always make sure it makes precedence + * Stronger flags before weaker flags: + * NORECLAIM implies NOIO, which in turn implies NOFS */ - if (pflags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NORECLAIM) + flags &= ~__GFP_DIRECT_RECLAIM; + else if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; + if (pflags & PF_MEMALLOC_NOWARN) + flags |= __GFP_NOWARN; + if (pflags & PF_MEMALLOC_PIN) flags &= ~__GFP_MOVABLE; } @@ -307,6 +316,24 @@ static inline void might_alloc(gfp_t gfp_mask) } /** + * memalloc_flags_save - Add a PF_* flag to current->flags, save old value + * + * This allows PF_* flags to be conveniently added, irrespective of current + * value, and then the old version restored with memalloc_flags_restore(). + */ +static inline unsigned memalloc_flags_save(unsigned flags) +{ + unsigned oldflags = ~current->flags & flags; + current->flags |= flags; + return oldflags; +} + +static inline void memalloc_flags_restore(unsigned flags) +{ + current->flags &= ~flags; +} + +/** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * * This functions marks the beginning of the GFP_NOIO allocation scope. @@ -315,13 +342,12 @@ static inline void might_alloc(gfp_t gfp_mask) * point of view. Use memalloc_noio_restore to end the scope with flags * returned by this function. * - * This function is safe to be used from any context. + * Context: This function is safe to be used from any context. + * Return: The saved flags to be passed to memalloc_noio_restore. */ static inline unsigned int memalloc_noio_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOIO); } /** @@ -334,7 +360,7 @@ static inline unsigned int memalloc_noio_save(void) */ static inline void memalloc_noio_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; + memalloc_flags_restore(flags); } /** @@ -346,13 +372,12 @@ static inline void memalloc_noio_restore(unsigned int flags) * point of view. Use memalloc_nofs_restore to end the scope with flags * returned by this function. * - * This function is safe to be used from any context. + * Context: This function is safe to be used from any context. + * Return: The saved flags to be passed to memalloc_nofs_restore. */ static inline unsigned int memalloc_nofs_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOFS; - current->flags |= PF_MEMALLOC_NOFS; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOFS); } /** @@ -365,32 +390,76 @@ static inline unsigned int memalloc_nofs_save(void) */ static inline void memalloc_nofs_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; + memalloc_flags_restore(flags); } +/** + * memalloc_noreclaim_save - Marks implicit __GFP_MEMALLOC scope. + * + * This function marks the beginning of the __GFP_MEMALLOC allocation scope. + * All further allocations will implicitly add the __GFP_MEMALLOC flag, which + * prevents entering reclaim and allows access to all memory reserves. This + * should only be used when the caller guarantees the allocation will allow more + * memory to be freed very shortly, i.e. it needs to allocate some memory in + * the process of freeing memory, and cannot reclaim due to potential recursion. + * + * Users of this scope have to be extremely careful to not deplete the reserves + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. Usage of a + * pre-allocated pool (e.g. mempool) should be always considered before using + * this scope. + * + * Individual allocations under the scope can opt out using __GFP_NOMEMALLOC + * + * Context: This function should not be used in an interrupt context as that one + * does not give PF_MEMALLOC access to reserves. + * See __gfp_pfmemalloc_flags(). + * Return: The saved flags to be passed to memalloc_noreclaim_restore. + */ static inline unsigned int memalloc_noreclaim_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - return flags; + return memalloc_flags_save(PF_MEMALLOC); } +/** + * memalloc_noreclaim_restore - Ends the implicit __GFP_MEMALLOC scope. + * @flags: Flags to restore. + * + * Ends the implicit __GFP_MEMALLOC scope started by memalloc_noreclaim_save + * function. Always make sure that the given flags is the return value from the + * pairing memalloc_noreclaim_save call. + */ static inline void memalloc_noreclaim_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC) | flags; + memalloc_flags_restore(flags); } +/** + * memalloc_pin_save - Marks implicit ~__GFP_MOVABLE scope. + * + * This function marks the beginning of the ~__GFP_MOVABLE allocation scope. + * All further allocations will implicitly remove the __GFP_MOVABLE flag, which + * will constraint the allocations to zones that allow long term pinning, i.e. + * not ZONE_MOVABLE zones. + * + * Return: The saved flags to be passed to memalloc_pin_restore. + */ static inline unsigned int memalloc_pin_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_PIN; - - current->flags |= PF_MEMALLOC_PIN; - return flags; + return memalloc_flags_save(PF_MEMALLOC_PIN); } +/** + * memalloc_pin_restore - Ends the implicit ~__GFP_MOVABLE scope. + * @flags: Flags to restore. + * + * Ends the implicit ~__GFP_MOVABLE scope started by memalloc_pin_save function. + * Always make sure that the given flags is the return value from the pairing + * memalloc_pin_save call. + */ static inline void memalloc_pin_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; + memalloc_flags_restore(flags); } #ifdef CONFIG_MEMCG @@ -403,6 +472,10 @@ DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); * __GFP_ACCOUNT allocations till the end of the scope will be charged to the * given memcg. * + * Please, make sure that caller has a reference to the passed memcg structure, + * so its lifetime is guaranteed to exceed the scope between two + * set_active_memcg() calls. + * * NOTE: This function can nest. Users must save the return value and * reset the previous value after their own charging scope is over. */ diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 3988762efe15..52b22c5c396d 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -15,13 +15,23 @@ #define TNF_FAULT_LOCAL 0x08 #define TNF_MIGRATE_FAIL 0x10 +enum numa_vmaskip_reason { + NUMAB_SKIP_UNSUITABLE, + NUMAB_SKIP_SHARED_RO, + NUMAB_SKIP_INACCESSIBLE, + NUMAB_SKIP_SCAN_DELAY, + NUMAB_SKIP_PID_INACTIVE, + NUMAB_SKIP_IGNORE_PID, + NUMAB_SKIP_SEQ_COMPLETED, +}; + #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p, bool final); -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, - int src_nid, int dst_cpu); +bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio, + int src_nid, int dst_cpu); #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) @@ -38,7 +48,7 @@ static inline void task_numa_free(struct task_struct *p, bool final) { } static inline bool should_numa_migrate_memory(struct task_struct *p, - struct page *page, int src_nid, int dst_cpu) + struct folio *folio, int src_nid, int dst_cpu) { return true; } diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 994c25640e15..b2b9e6eb9683 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) } #ifdef CONFIG_RT_MUTEXES +extern void rt_mutex_pre_schedule(void); +extern void rt_mutex_schedule(void); +extern void rt_mutex_post_schedule(void); + /* * Must hold either p->pi_lock or task_rq(p)->lock. */ diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index fad77b5172e2..b04a5d04dee9 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -110,13 +110,20 @@ SD_FLAG(SD_ASYM_CPUCAPACITY_FULL, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* - * Domain members share CPU package resources (i.e. caches) + * Domain members share CPU cluster (LLC tags or L2 cache) + * + * NEEDS_GROUPS: Clusters are shared between groups. + */ +SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS) + +/* + * Domain members share CPU Last Level Caches * * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share * the same cache(s). * NEEDS_GROUPS: Caches are shared between groups. */ -SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) +SD_FLAG(SD_SHARE_LLC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Only a single load balancing instance diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 669e8cff40c7..0a0e23c45406 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -9,6 +9,7 @@ #include <linux/sched/task.h> #include <linux/cred.h> #include <linux/refcount.h> +#include <linux/pid.h> #include <linux/posix-timers.h> #include <linux/mm_types.h> #include <asm/ptrace.h> @@ -303,20 +304,11 @@ static inline void kernel_signal_stop(void) schedule(); } -#ifdef __ia64__ -# define ___ARCH_SI_IA64(_a1, _a2, _a3) , _a1, _a2, _a3 -#else -# define ___ARCH_SI_IA64(_a1, _a2, _a3) -#endif -int force_sig_fault_to_task(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); -int force_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)); -int send_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); +int force_sig_fault_to_task(int sig, int code, void __user *addr, + struct task_struct *t); +int force_sig_fault(int sig, int code, void __user *addr); +int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); @@ -441,7 +433,6 @@ static inline bool fault_signal_pending(vm_fault_t fault_flags, * This is required every time the blocked sigset_t changes. * callers must hold sighand->siglock. */ -extern void recalc_sigpending_and_wake(struct task_struct *t); extern void recalc_sigpending(void); extern void calculate_sigpending(void); @@ -649,17 +640,18 @@ extern void flush_itimer_signals(void); extern bool current_is_single_threaded(void); /* - * Careful: do_each_thread/while_each_thread is a double loop so - * 'break' will not work as expected - use goto instead. + * Without tasklist/siglock it is only rcu-safe if g can't exit/exec, + * otherwise next_thread(t) will never reach g after list_del_rcu(g). */ -#define do_each_thread(g, t) \ - for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do - #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) +#define for_other_threads(p, t) \ + for (t = p; (t = next_thread(t)) != p; ) + #define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \ + lockdep_is_held(&tasklist_lock)) #define for_each_thread(p, t) \ __for_each_thread((p)->signal, t) @@ -718,22 +710,31 @@ bool same_thread_group(struct task_struct *p1, struct task_struct *p2) return p1->signal == p2->signal; } -static inline struct task_struct *next_thread(const struct task_struct *p) +/* + * returns NULL if p is the last thread in the thread group + */ +static inline struct task_struct *__next_thread(struct task_struct *p) +{ + return list_next_or_null_rcu(&p->signal->thread_head, + &p->thread_node, + struct task_struct, + thread_node); +} + +static inline struct task_struct *next_thread(struct task_struct *p) { - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); + return __next_thread(p) ?: p->group_leader; } static inline int thread_group_empty(struct task_struct *p) { - return list_empty(&p->thread_group); + return thread_group_leader(p) && + list_is_last(&p->thread_node, &p->signal->thread_head); } #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern bool thread_group_exited(struct pid *pid); - extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index 59d3736c454c..fb1e295e7e63 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -17,4 +17,4 @@ static inline bool sched_smt_active(void) { return false; } void arch_smt_update(void); -#endif +#endif /* _LINUX_SCHED_SMT_H */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index dd35ce28bb90..d362aacf9f89 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -7,6 +7,8 @@ * functionality: */ +#include <linux/rcupdate.h> +#include <linux/refcount.h> #include <linux/sched.h> #include <linux/uaccess.h> @@ -118,11 +120,47 @@ static inline struct task_struct *get_task_struct(struct task_struct *t) } extern void __put_task_struct(struct task_struct *t); +extern void __put_task_struct_rcu_cb(struct rcu_head *rhp); static inline void put_task_struct(struct task_struct *t) { - if (refcount_dec_and_test(&t->usage)) + if (!refcount_dec_and_test(&t->usage)) + return; + + /* + * In !RT, it is always safe to call __put_task_struct(). + * Under RT, we can only call it in preemptible context. + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) { + static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP); + + lock_map_acquire_try(&put_task_map); __put_task_struct(t); + lock_map_release(&put_task_map); + return; + } + + /* + * under PREEMPT_RT, we can't call put_task_struct + * in atomic context because it will indirectly + * acquire sleeping locks. + * + * call_rcu() will schedule delayed_put_task_struct_rcu() + * to be called in process context. + * + * __put_task_struct() is called when + * refcount_dec_and_test(&t->usage) succeeds. + * + * This means that it can't "conflict" with + * put_task_struct_rcu_user() which abuses ->rcu the same + * way; rcu_users has a reference so task->usage can't be + * zero after rcu_users 1 -> 0 transition. + * + * delayed_free_task() also uses ->rcu, but it is only called + * when it fails to fork a process. Therefore, there is no + * way it can conflict with put_task_struct(). + */ + call_rcu(&t->rcu, __put_task_struct_rcu_cb); } DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T)) @@ -190,4 +228,6 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } +DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T)) + #endif /* _LINUX_SCHED_TASK_H */ diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index f158b025c175..ccd72b978e1f 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/magic.h> +#include <linux/refcount.h> #ifdef CONFIG_THREAD_INFO_IN_TASK diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 67b573d5bf28..18572c9ea724 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -38,21 +38,21 @@ extern const struct sd_flag_debug sd_flag_debug[]; #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { - return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; } #endif #ifdef CONFIG_SCHED_CLUSTER static inline int cpu_cluster_flags(void) { - return SD_SHARE_PKG_RESOURCES; + return SD_CLUSTER | SD_SHARE_LLC; } #endif #ifdef CONFIG_SCHED_MC static inline int cpu_core_flags(void) { - return SD_SHARE_PKG_RESOURCES; + return SD_SHARE_LLC; } #endif @@ -109,8 +109,6 @@ struct sched_domain { u64 max_newidle_lb_cost; unsigned long last_decay_max_lb_cost; - u64 avg_scan_cost; /* select_idle_sibling */ - #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -178,7 +176,9 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); +bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); +bool cpus_share_resources(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void); @@ -227,11 +227,21 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], { } +static inline bool cpus_equal_capacity(int this_cpu, int that_cpu) +{ + return true; +} + static inline bool cpus_share_cache(int this_cpu, int that_cpu) { return true; } +static inline bool cpus_share_resources(int this_cpu, int that_cpu) +{ + return true; +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) @@ -275,6 +285,14 @@ void arch_update_thermal_pressure(const struct cpumask *cpus, { } #endif +#ifndef arch_scale_freq_ref +static __always_inline +unsigned int arch_scale_freq_ref(int cpu) +{ + return 0; +} +#endif + static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h index 3c3e049224ae..969aaf5ef9d6 100644 --- a/include/linux/sched/types.h +++ b/include/linux/sched/types.h @@ -20,4 +20,4 @@ struct task_cputime { unsigned long long sum_exec_runtime; }; -#endif +#endif /* _LINUX_SCHED_TYPES_H */ diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index 837a23624a66..bc60243d43b3 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_VHOST_TASK_H -#define _LINUX_VHOST_TASK_H - +#ifndef _LINUX_SCHED_VHOST_TASK_H +#define _LINUX_SCHED_VHOST_TASK_H struct vhost_task; @@ -11,4 +10,4 @@ void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); void vhost_task_wake(struct vhost_task *vtsk); -#endif +#endif /* _LINUX_SCHED_VHOST_TASK_H */ diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e6fe4f73ffe6..b807141acc14 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -47,6 +47,10 @@ struct scmi_clock_info { bool rate_discrete; bool rate_changed_notifications; bool rate_change_requested_notifications; + bool state_ctrl_forbidden; + bool rate_ctrl_forbidden; + bool parent_ctrl_forbidden; + bool extended_config; union { struct { int num_rates; @@ -58,6 +62,8 @@ struct scmi_clock_info { u64 step_size; } range; }; + int num_parents; + u32 *parents; }; enum scmi_power_scale { @@ -70,6 +76,13 @@ struct scmi_handle; struct scmi_device; struct scmi_protocol_handle; +enum scmi_clock_oem_config { + SCMI_CLOCK_CFG_DUTY_CYCLE = 0x1, + SCMI_CLOCK_CFG_PHASE, + SCMI_CLOCK_CFG_OEM_START = 0x80, + SCMI_CLOCK_CFG_OEM_END = 0xFF, +}; + /** * struct scmi_clk_proto_ops - represents the various operations provided * by SCMI Clock Protocol @@ -80,6 +93,11 @@ struct scmi_protocol_handle; * @rate_set: set the clock rate of a clock * @enable: enables the specified clock * @disable: disables the specified clock + * @state_get: get the status of the specified clock + * @config_oem_get: get the value of an OEM specific clock config + * @config_oem_set: set the value of an OEM specific clock config + * @parent_get: get the parent id of a clk + * @parent_set: set the parent of a clock */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -90,23 +108,40 @@ struct scmi_clk_proto_ops { u64 *rate); int (*rate_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u64 rate); - int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*enable_atomic)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable_atomic)(const struct scmi_protocol_handle *ph, - u32 clk_id); + int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); + int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); + int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool *enabled, bool atomic); + int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + enum scmi_clock_oem_config oem_type, + u32 *oem_val, u32 *attributes, bool atomic); + int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, + enum scmi_clock_oem_config oem_type, + u32 oem_val, bool atomic); + int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id); + int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id); +}; + +struct scmi_perf_domain_info { + char name[SCMI_MAX_STR_SIZE]; + bool set_perf; }; /** * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * + * @num_domains_get: gets the number of supported performance domains + * @info_get: get the information of a performance domain * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain * @level_get: gets the performance level of a domain - * @device_domain_id: gets the scmi domain id for a given device * @transition_latency_get: gets the DVFS transition latency for a given device + * @rate_limit_get: gets the minimum time (us) required between successive + * requests * @device_opps_add: adds all the OPPs for a given device * @freq_set: sets the frequency for a given device using sustained frequency * to sustained performance level mapping @@ -116,10 +151,15 @@ struct scmi_clk_proto_ops { * at a given frequency * @fast_switch_possible: indicates if fast DVFS switching is possible or not * for a given device + * @fast_switch_rate_limit: gets the minimum time (us) required between + * successive fast_switching requests * @power_scale_mw_get: indicates if the power values provided are in milliWatts * or in some other (abstract) scale */ struct scmi_perf_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_perf_domain_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, @@ -128,11 +168,12 @@ struct scmi_perf_proto_ops { u32 level, bool poll); int (*level_get)(const struct scmi_protocol_handle *ph, u32 domain, u32 *level, bool poll); - int (*device_domain_id)(struct device *dev); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); + int (*rate_limit_get)(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit); int (*device_opps_add)(const struct scmi_protocol_handle *ph, - struct device *dev); + struct device *dev, u32 domain); int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long rate, bool poll); int (*freq_get)(const struct scmi_protocol_handle *ph, u32 domain, @@ -140,7 +181,9 @@ struct scmi_perf_proto_ops { int (*est_power_get)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); + int (*fast_switch_rate_limit)(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit); enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; @@ -930,6 +973,8 @@ struct scmi_perf_limits_report { unsigned int domain_id; unsigned int range_max; unsigned int range_min; + unsigned long range_max_freq; + unsigned long range_min_freq; }; struct scmi_perf_level_report { @@ -937,6 +982,7 @@ struct scmi_perf_level_report { unsigned int agent_id; unsigned int domain_id; unsigned int performance_level; + unsigned long performance_level_freq; }; struct scmi_sensor_trip_point_report { diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index eab7081392d5..75303c126285 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -4,6 +4,132 @@ #include <uapi/linux/screen_info.h> +#include <linux/bits.h> + +/** + * SCREEN_INFO_MAX_RESOURCES - maximum number of resources per screen_info + */ +#define SCREEN_INFO_MAX_RESOURCES 3 + +struct pci_dev; +struct resource; + +static inline bool __screen_info_has_lfb(unsigned int type) +{ + return (type == VIDEO_TYPE_VLFB) || (type == VIDEO_TYPE_EFI); +} + +static inline u64 __screen_info_lfb_base(const struct screen_info *si) +{ + u64 lfb_base = si->lfb_base; + + if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE) + lfb_base |= (u64)si->ext_lfb_base << 32; + + return lfb_base; +} + +static inline void __screen_info_set_lfb_base(struct screen_info *si, u64 lfb_base) +{ + si->lfb_base = lfb_base & GENMASK_ULL(31, 0); + si->ext_lfb_base = (lfb_base & GENMASK_ULL(63, 32)) >> 32; + + if (si->ext_lfb_base) + si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + else + si->capabilities &= ~VIDEO_CAPABILITY_64BIT_BASE; +} + +static inline u64 __screen_info_lfb_size(const struct screen_info *si, unsigned int type) +{ + u64 lfb_size = si->lfb_size; + + if (type == VIDEO_TYPE_VLFB) + lfb_size <<= 16; + return lfb_size; +} + +static inline unsigned int __screen_info_video_type(unsigned int type) +{ + switch (type) { + case VIDEO_TYPE_MDA: + case VIDEO_TYPE_CGA: + case VIDEO_TYPE_EGAM: + case VIDEO_TYPE_EGAC: + case VIDEO_TYPE_VGAC: + case VIDEO_TYPE_VLFB: + case VIDEO_TYPE_PICA_S3: + case VIDEO_TYPE_MIPS_G364: + case VIDEO_TYPE_SGI: + case VIDEO_TYPE_TGAC: + case VIDEO_TYPE_SUN: + case VIDEO_TYPE_SUNPCI: + case VIDEO_TYPE_PMAC: + case VIDEO_TYPE_EFI: + return type; + default: + return 0; + } +} + +/** + * screen_info_video_type() - Decodes the video type from struct screen_info + * @si: an instance of struct screen_info + * + * Returns: + * A VIDEO_TYPE_ constant representing si's type of video display, or 0 otherwise. + */ +static inline unsigned int screen_info_video_type(const struct screen_info *si) +{ + unsigned int type; + + // check if display output is on + if (!si->orig_video_isVGA) + return 0; + + // check for a known VIDEO_TYPE_ constant + type = __screen_info_video_type(si->orig_video_isVGA); + if (type) + return si->orig_video_isVGA; + + // check if text mode has been initialized + if (!si->orig_video_lines || !si->orig_video_cols) + return 0; + + // 80x25 text, mono + if (si->orig_video_mode == 0x07) { + if ((si->orig_video_ega_bx & 0xff) != 0x10) + return VIDEO_TYPE_EGAM; + else + return VIDEO_TYPE_MDA; + } + + // EGA/VGA, 16 colors + if ((si->orig_video_ega_bx & 0xff) != 0x10) { + if (si->orig_video_isVGA) + return VIDEO_TYPE_VGAC; + else + return VIDEO_TYPE_EGAC; + } + + // the rest... + return VIDEO_TYPE_CGA; +} + +ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num); + +#if defined(CONFIG_PCI) +void screen_info_apply_fixups(void); +struct pci_dev *screen_info_pci_dev(const struct screen_info *si); +#else +static inline void screen_info_apply_fixups(void) +{ } +static inline struct pci_dev *screen_info_pci_dev(const struct screen_info *si) +{ + return NULL; +} +#endif + extern struct screen_info screen_info; #endif /* _SCREEN_INFO_H */ diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 175079552f68..709ad84809e1 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,6 +3,7 @@ #define _LINUX_SECCOMP_H #include <uapi/linux/seccomp.h> +#include <linux/seccomp_types.h> #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ SECCOMP_FILTER_FLAG_LOG | \ @@ -21,25 +22,6 @@ #include <linux/atomic.h> #include <asm/seccomp.h> -struct seccomp_filter; -/** - * struct seccomp - the state of a seccomp'ed process - * - * @mode: indicates one of the valid values above for controlled - * system calls available to a process. - * @filter_count: number of seccomp filters - * @filter: must always point to a valid seccomp-filter or NULL as it is - * accessed without locking during system call entry. - * - * @filter must only be accessed from the context of current as there - * is no read locking. - */ -struct seccomp { - int mode; - atomic_t filter_count; - struct seccomp_filter *filter; -}; - #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) @@ -64,8 +46,6 @@ static inline int seccomp_mode(struct seccomp *s) #include <linux/errno.h> -struct seccomp { }; -struct seccomp_filter { }; struct seccomp_data; #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER @@ -126,6 +106,8 @@ static inline long seccomp_get_metadata(struct task_struct *task, #ifdef CONFIG_SECCOMP_CACHE_DEBUG struct seq_file; +struct pid_namespace; +struct pid; int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); diff --git a/include/linux/seccomp_types.h b/include/linux/seccomp_types.h new file mode 100644 index 000000000000..cf0a0355024f --- /dev/null +++ b/include/linux/seccomp_types.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SECCOMP_TYPES_H +#define _LINUX_SECCOMP_TYPES_H + +#include <linux/types.h> + +#ifdef CONFIG_SECCOMP + +struct seccomp_filter; +/** + * struct seccomp - the state of a seccomp'ed process + * + * @mode: indicates one of the valid values above for controlled + * system calls available to a process. + * @filter_count: number of seccomp filters + * @filter: must always point to a valid seccomp-filter or NULL as it is + * accessed without locking during system call entry. + * + * @filter must only be accessed from the context of current as there + * is no read locking. + */ +struct seccomp { + int mode; + atomic_t filter_count; + struct seccomp_filter *filter; +}; + +#else + +struct seccomp { }; +struct seccomp_filter { }; + +#endif + +#endif /* _LINUX_SECCOMP_TYPES_H */ diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 988528b5da43..acf7e1a3f3de 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -6,24 +6,23 @@ extern const struct address_space_operations secretmem_aops; -static inline bool page_is_secretmem(struct page *page) +static inline bool folio_is_secretmem(struct folio *folio) { struct address_space *mapping; /* - * Using page_mapping() is quite slow because of the actual call - * instruction and repeated compound_head(page) inside the - * page_mapping() function. - * We know that secretmem pages are not compound and LRU so we can + * Using folio_mapping() is quite slow because of the actual call + * instruction. + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (PageCompound(page) || !PageLRU(page)) + if (folio_test_large(folio)) return false; mapping = (struct address_space *) - ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); + ((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS); - if (!mapping || mapping != page->mapping) + if (!mapping || mapping != folio->mapping) return false; return mapping->a_ops == &secretmem_aops; @@ -39,7 +38,7 @@ static inline bool vma_is_secretmem(struct vm_area_struct *vma) return false; } -static inline bool page_is_secretmem(struct page *page) +static inline bool folio_is_secretmem(struct folio *folio) { return false; } diff --git a/include/linux/security.h b/include/linux/security.h index 32828502f09e..41a8f667bdfa 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,8 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/sockptr.h> +#include <linux/bpf.h> +#include <uapi/linux/lsm.h> struct linux_binprm; struct cred; @@ -60,6 +62,7 @@ struct fs_parameter; enum fs_value_type; struct watch; struct watch_notification; +struct lsm_ctx; /* Default (no) options for the capable function */ #define CAP_OPT_NONE 0x0 @@ -138,6 +141,8 @@ enum lockdown_reason { }; extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1]; +extern u32 lsm_active_cnt; +extern const struct lsm_id *lsm_idlist[]; /* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, @@ -145,12 +150,13 @@ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, extern int cap_settime(const struct timespec64 *ts, const struct timezone *tz); extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); -extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern int cap_capget(const struct task_struct *target, kernel_cap_t *effective, + kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int cap_inode_removexattr(struct mnt_idmap *idmap, @@ -260,6 +266,7 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb); /* prototypes */ extern int security_init(void); extern int early_security_init(void); +extern u64 lsm_name_to_attr(const char *name); /* Security operations */ int security_binder_set_context_mgr(const struct cred *mgr); @@ -268,10 +275,10 @@ int security_binder_transaction(const struct cred *from, int security_binder_transfer_binder(const struct cred *from, const struct cred *to); int security_binder_transfer_file(const struct cred *from, - const struct cred *to, struct file *file); + const struct cred *to, const struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); -int security_capget(struct task_struct *target, +int security_capget(const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -283,16 +290,17 @@ int security_capable(const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts); -int security_quotactl(int cmds, int type, int id, struct super_block *sb); +int security_quotactl(int cmds, int type, int id, const struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); int security_settime64(const struct timespec64 *ts, const struct timezone *tz); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_creds_for_exec(struct linux_binprm *bprm); -int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); -void security_bprm_committing_creds(struct linux_binprm *bprm); -void security_bprm_committed_creds(struct linux_binprm *bprm); +void security_bprm_committing_creds(const struct linux_binprm *bprm); +void security_bprm_committed_creds(const struct linux_binprm *bprm); +int security_fs_context_submount(struct fs_context *fc, struct super_block *reference); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); int security_sb_alloc(struct super_block *sb); @@ -302,7 +310,7 @@ void security_free_mnt_opts(void **mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts); int security_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts); int security_sb_remount(struct super_block *sb, void *mnt_opts); -int security_sb_kern_mount(struct super_block *sb); +int security_sb_kern_mount(const struct super_block *sb); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(const char *dev_name, const struct path *path, @@ -337,6 +345,8 @@ int security_inode_init_security_anon(struct inode *inode, const struct qstr *name, const struct inode *context_inode); int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode); +void security_inode_post_create_tmpfile(struct mnt_idmap *idmap, + struct inode *inode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); int security_inode_unlink(struct inode *dir, struct dentry *dentry); @@ -354,6 +364,8 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode, int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); +void security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + int ia_valid); int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, @@ -361,16 +373,22 @@ int security_inode_setxattr(struct mnt_idmap *idmap, int security_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); +void security_inode_post_set_acl(struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); int security_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); +void security_inode_post_remove_acl(struct mnt_idmap *idmap, + struct dentry *dentry, + const char *acl_name); void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int security_inode_getxattr(struct dentry *dentry, const char *name); int security_inode_listxattr(struct dentry *dentry); int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name); +void security_inode_post_removexattr(struct dentry *dentry, const char *name); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry); int security_inode_getsecurity(struct mnt_idmap *idmap, @@ -385,8 +403,11 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn); int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); +void security_file_release(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int security_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg); int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags); int security_mmap_addr(unsigned long addr); @@ -399,6 +420,7 @@ int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_file_open(struct file *file); +int security_file_post_open(struct file *file, int mask); int security_file_truncate(struct file *file); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); @@ -468,10 +490,13 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd); int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); -int security_getprocattr(struct task_struct *p, const char *lsm, const char *name, +int security_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx, + u32 __user *size, u32 flags); +int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx, + u32 size, u32 flags); +int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value); -int security_setprocattr(const char *lsm, const char *name, void *value, - size_t size); +int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); @@ -482,6 +507,8 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); +int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, + void *val, size_t val_len, u64 id, u64 flags); #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@ -499,6 +526,11 @@ static inline int unregister_blocking_lsm_notifier(struct notifier_block *nb) return 0; } +static inline u64 lsm_name_to_attr(const char *name) +{ + return LSM_ATTR_UNDEF; +} + static inline void security_free_mnt_opts(void **mnt_opts) { } @@ -537,7 +569,7 @@ static inline int security_binder_transfer_binder(const struct cred *from, static inline int security_binder_transfer_file(const struct cred *from, const struct cred *to, - struct file *file) + const struct file *file) { return 0; } @@ -553,7 +585,7 @@ static inline int security_ptrace_traceme(struct task_struct *parent) return cap_ptrace_traceme(parent); } -static inline int security_capget(struct task_struct *target, +static inline int security_capget(const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) @@ -579,7 +611,7 @@ static inline int security_capable(const struct cred *cred, } static inline int security_quotactl(int cmds, int type, int id, - struct super_block *sb) + const struct super_block *sb) { return 0; } @@ -611,7 +643,7 @@ static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm) } static inline int security_bprm_creds_from_file(struct linux_binprm *bprm, - struct file *file) + const struct file *file) { return cap_bprm_creds_from_file(bprm, file); } @@ -621,14 +653,19 @@ static inline int security_bprm_check(struct linux_binprm *bprm) return 0; } -static inline void security_bprm_committing_creds(struct linux_binprm *bprm) +static inline void security_bprm_committing_creds(const struct linux_binprm *bprm) { } -static inline void security_bprm_committed_creds(struct linux_binprm *bprm) +static inline void security_bprm_committed_creds(const struct linux_binprm *bprm) { } +static inline int security_fs_context_submount(struct fs_context *fc, + struct super_block *reference) +{ + return 0; +} static inline int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { @@ -782,6 +819,10 @@ static inline int security_inode_create(struct inode *dir, return 0; } +static inline void +security_inode_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode) +{ } + static inline int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) @@ -855,6 +896,11 @@ static inline int security_inode_setattr(struct mnt_idmap *idmap, return 0; } +static inline void +security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + int ia_valid) +{ } + static inline int security_inode_getattr(const struct path *path) { return 0; @@ -875,6 +921,11 @@ static inline int security_inode_set_acl(struct mnt_idmap *idmap, return 0; } +static inline void security_inode_post_set_acl(struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ } + static inline int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) @@ -889,6 +940,11 @@ static inline int security_inode_remove_acl(struct mnt_idmap *idmap, return 0; } +static inline void security_inode_post_remove_acl(struct mnt_idmap *idmap, + struct dentry *dentry, + const char *acl_name) +{ } + static inline void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { } @@ -911,6 +967,10 @@ static inline int security_inode_removexattr(struct mnt_idmap *idmap, return cap_inode_removexattr(idmap, dentry, name); } +static inline void security_inode_post_removexattr(struct dentry *dentry, + const char *name) +{ } + static inline int security_inode_need_killpriv(struct dentry *dentry) { return cap_inode_need_killpriv(dentry); @@ -971,6 +1031,9 @@ static inline int security_file_alloc(struct file *file) return 0; } +static inline void security_file_release(struct file *file) +{ } + static inline void security_file_free(struct file *file) { } @@ -980,6 +1043,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } +static inline int security_file_ioctl_compat(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + return 0; +} + static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { @@ -1031,6 +1101,11 @@ static inline int security_file_open(struct file *file) return 0; } +static inline int security_file_post_open(struct file *file, int mask) +{ + return 0; +} + static inline int security_file_truncate(struct file *file) { return 0; @@ -1330,14 +1405,28 @@ static inline void security_d_instantiate(struct dentry *dentry, struct inode *inode) { } -static inline int security_getprocattr(struct task_struct *p, const char *lsm, +static inline int security_getselfattr(unsigned int attr, + struct lsm_ctx __user *ctx, + size_t __user *size, u32 flags) +{ + return -EOPNOTSUPP; +} + +static inline int security_setselfattr(unsigned int attr, + struct lsm_ctx __user *ctx, + size_t size, u32 flags) +{ + return -EOPNOTSUPP; +} + +static inline int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value) { return -EINVAL; } -static inline int security_setprocattr(const char *lsm, char *name, - void *value, size_t size) +static inline int security_setprocattr(int lsmid, char *name, void *value, + size_t size) { return -EINVAL; } @@ -1388,6 +1477,12 @@ static inline int security_locked_down(enum lockdown_reason what) { return 0; } +static inline int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, + u32 *uctx_len, void *val, size_t val_len, + u64 id, u64 flags) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_SECURITY */ #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) @@ -1439,7 +1534,8 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); void security_sk_clone(const struct sock *sk, struct sock *newsk); -void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic); +void security_sk_classify_flow(const struct sock *sk, + struct flowi_common *flic); void security_req_classify_flow(const struct request_sock *req, struct flowi_common *flic); void security_sock_graft(struct sock*sk, struct socket *parent); @@ -1597,7 +1693,7 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline void security_sk_classify_flow(struct sock *sk, +static inline void security_sk_classify_flow(const struct sock *sk, struct flowi_common *flic) { } @@ -1819,6 +1915,7 @@ int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t m int security_path_rmdir(const struct path *dir, struct dentry *dentry); int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); +void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry); int security_path_truncate(const struct path *path); int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name); @@ -1853,6 +1950,10 @@ static inline int security_path_mknod(const struct path *dir, struct dentry *den return 0; } +static inline void security_path_post_mknod(struct mnt_idmap *idmap, + struct dentry *dentry) +{ } + static inline int security_path_truncate(const struct path *path) { return 0; @@ -1904,6 +2005,9 @@ void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm); int security_key_getsecurity(struct key *key, char **_buffer); +void security_key_post_create_or_update(struct key *keyring, struct key *key, + const void *payload, size_t payload_len, + unsigned long flags, bool create); #else @@ -1931,6 +2035,14 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer) return 0; } +static inline void security_key_post_create_or_update(struct key *keyring, + struct key *key, + const void *payload, + size_t payload_len, + unsigned long flags, + bool create) +{ } + #endif #endif /* CONFIG_KEYS */ @@ -2012,15 +2124,22 @@ static inline void securityfs_remove(struct dentry *dentry) union bpf_attr; struct bpf_map; struct bpf_prog; -struct bpf_prog_aux; +struct bpf_token; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); -extern int security_bpf_map_alloc(struct bpf_map *map); +extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token); extern void security_bpf_map_free(struct bpf_map *map); -extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); -extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token); +extern void security_bpf_prog_free(struct bpf_prog *prog); +extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path); +extern void security_bpf_token_free(struct bpf_token *token); +extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +extern int security_bpf_token_capable(const struct bpf_token *token, int cap); #else static inline int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) @@ -2038,7 +2157,8 @@ static inline int security_bpf_prog(struct bpf_prog *prog) return 0; } -static inline int security_bpf_map_alloc(struct bpf_map *map) +static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) { return 0; } @@ -2046,13 +2166,33 @@ static inline int security_bpf_map_alloc(struct bpf_map *map) static inline void security_bpf_map_free(struct bpf_map *map) { } -static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) { return 0; } -static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +static inline void security_bpf_prog_free(struct bpf_prog *prog) { } + +static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path) +{ + return 0; +} + +static inline void security_bpf_token_free(struct bpf_token *token) +{ } + +static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +{ + return 0; +} + +static inline int security_bpf_token_capable(const struct bpf_token *token, int cap) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/linux/sed-opal-key.h b/include/linux/sed-opal-key.h new file mode 100644 index 000000000000..0ca03054e8f6 --- /dev/null +++ b/include/linux/sed-opal-key.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * SED key operations. + * + * Copyright (C) 2023 IBM Corporation + * + * These are the accessor functions (read/write) for SED Opal + * keys. Specific keystores can provide overrides. + * + */ + +#include <linux/kernel.h> + +#ifdef CONFIG_PSERIES_PLPKS_SED +int sed_read_key(char *keyname, char *key, u_int *keylen); +int sed_write_key(char *keyname, char *key, u_int keylen); +#else +static inline +int sed_read_key(char *keyname, char *key, u_int *keylen) { + return -EOPNOTSUPP; +} +static inline +int sed_write_key(char *keyname, char *key, u_int keylen) { + return -EOPNOTSUPP; +} +#endif diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index bbae1e52ab4f..2ac50822554e 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -25,6 +25,9 @@ bool opal_unlock_from_suspend(struct opal_dev *dev); struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv); int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); +#define OPAL_AUTH_KEY "opal-boot-pin" +#define OPAL_AUTH_KEY_PREV "opal-boot-pin-prev" + static inline bool is_sed_ioctl(unsigned int cmd) { switch (cmd) { @@ -47,6 +50,8 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_GET_STATUS: case IOC_OPAL_GET_LR_STATUS: case IOC_OPAL_GET_GEOMETRY: + case IOC_OPAL_DISCOVERY: + case IOC_OPAL_REVERT_LSP: return true; } return false; diff --git a/include/linux/selection.h b/include/linux/selection.h index 170ef28ff26b..bab7d30d3446 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -14,17 +14,16 @@ struct tty_struct; struct vc_data; -extern void clear_selection(void); -extern int set_selection_user(const struct tiocl_selection __user *sel, - struct tty_struct *tty); -extern int set_selection_kernel(struct tiocl_selection *v, - struct tty_struct *tty); -extern int paste_selection(struct tty_struct *tty); -extern int sel_loadlut(char __user *p); -extern int mouse_reporting(void); -extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); - -bool vc_is_sel(struct vc_data *vc); +void clear_selection(void); +int set_selection_user(const struct tiocl_selection __user *sel, + struct tty_struct *tty); +int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty); +int paste_selection(struct tty_struct *tty); +int sel_loadlut(u32 __user *lut); +int mouse_reporting(void); +void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry); + +bool vc_is_sel(const struct vc_data *vc); extern int console_blanked; @@ -33,24 +32,21 @@ extern unsigned char default_red[]; extern unsigned char default_grn[]; extern unsigned char default_blu[]; -extern unsigned short *screen_pos(const struct vc_data *vc, int w_offset, - bool viewed); -extern u16 screen_glyph(const struct vc_data *vc, int offset); -extern u32 screen_glyph_unicode(const struct vc_data *vc, int offset); -extern void complement_pos(struct vc_data *vc, int offset); -extern void invert_screen(struct vc_data *vc, int offset, int count, bool viewed); - -extern void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]); -extern void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]); - -extern u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org); -extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); -extern void vcs_scr_updated(struct vc_data *vc); - -extern int vc_uniscr_check(struct vc_data *vc); -extern void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, - bool viewed, - unsigned int row, unsigned int col, - unsigned int nr); +unsigned short *screen_pos(const struct vc_data *vc, int w_offset, bool viewed); +u16 screen_glyph(const struct vc_data *vc, int offset); +u32 screen_glyph_unicode(const struct vc_data *vc, int offset); +void complement_pos(struct vc_data *vc, int offset); +void invert_screen(struct vc_data *vc, int offset, int count, bool viewed); + +void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]); +void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]); + +u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org); +void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); +void vcs_scr_updated(struct vc_data *vc); + +int vc_uniscr_check(struct vc_data *vc); +void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, bool viewed, + unsigned int row, unsigned int col, unsigned int nr); #endif diff --git a/include/linux/sem.h b/include/linux/sem.h index 5608a500c43e..c4deefe42aeb 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -3,25 +3,17 @@ #define _LINUX_SEM_H #include <uapi/linux/sem.h> +#include <linux/sem_types.h> struct task_struct; -struct sem_undo_list; #ifdef CONFIG_SYSVIPC -struct sysv_sem { - struct sem_undo_list *undo_list; -}; - extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk); extern void exit_sem(struct task_struct *tsk); #else -struct sysv_sem { - /* empty */ -}; - static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) { return 0; diff --git a/include/linux/sem_types.h b/include/linux/sem_types.h new file mode 100644 index 000000000000..73df1971a7ae --- /dev/null +++ b/include/linux/sem_types.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SEM_TYPES_H +#define _LINUX_SEM_TYPES_H + +struct sem_undo_list; + +struct sysv_sem { +#ifdef CONFIG_SYSVIPC + struct sem_undo_list *undo_list; +#endif +}; + +#endif /* _LINUX_SEM_TYPES_H */ diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 515d7fcb9634..fe41da005970 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -2,7 +2,10 @@ #ifndef _LINUX_SEQ_BUF_H #define _LINUX_SEQ_BUF_H -#include <linux/fs.h> +#include <linux/bug.h> +#include <linux/minmax.h> +#include <linux/seq_file.h> +#include <linux/types.h> /* * Trace sequences are used to allow a function to call several other functions @@ -10,23 +13,28 @@ */ /** - * seq_buf - seq buffer structure + * struct seq_buf - seq buffer structure * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer - * @readpos: The next position to read in the buffer. */ struct seq_buf { char *buffer; size_t size; size_t len; - loff_t readpos; }; +#define DECLARE_SEQ_BUF(NAME, SIZE) \ + struct seq_buf NAME = { \ + .buffer = (char[SIZE]) { 0 }, \ + .size = SIZE, \ + } + static inline void seq_buf_clear(struct seq_buf *s) { s->len = 0; - s->readpos = 0; + if (s->size) + s->buffer[0] = '\0'; } static inline void @@ -39,7 +47,7 @@ seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) /* * seq_buf have a buffer that might overflow. When this happens - * the len and size are set to be equal. + * len is set to be greater than size. */ static inline bool seq_buf_has_overflowed(struct seq_buf *s) @@ -72,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_terminate - Make sure buffer is nul terminated - * @s: the seq_buf descriptor to terminate. + * seq_buf_str - get NUL-terminated C string from seq_buf + * @s: the seq_buf handle * - * This makes sure that the buffer in @s is nul terminated and + * This makes sure that the buffer in @s is NUL-terminated and * safe to read as a string. * * Note, if this is called when the buffer has overflowed, then @@ -84,16 +92,20 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * * After this function is called, s->buffer is safe to use * in string operations. + * + * Returns: @s->buf after making sure it is terminated. */ -static inline void seq_buf_terminate(struct seq_buf *s) +static inline const char *seq_buf_str(struct seq_buf *s) { if (WARN_ON(s->size == 0)) - return; + return ""; if (seq_buf_buffer_left(s)) s->buffer[s->len] = 0; else s->buffer[s->size - 1] = 0; + + return s->buffer; } /** @@ -101,7 +113,7 @@ static inline void seq_buf_terminate(struct seq_buf *s) * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here * - * Return the number of bytes available in the buffer, or zero if + * Returns: the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) @@ -123,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired - * by seq_buf_get. To signal an error condition, or that the data + * by seq_buf_get_buf(). To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_buf_commit(struct seq_buf *s, int num) @@ -143,7 +155,7 @@ extern __printf(2, 0) int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args); extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s); extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, - int cnt); + size_t start, int cnt); extern int seq_buf_puts(struct seq_buf *s, const char *str); extern int seq_buf_putc(struct seq_buf *s, unsigned char c); extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index bd023dd38ae6..234bcdb1fba4 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -207,6 +207,21 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } +#define DEFINE_SHOW_STORE_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .write = __name ## _write, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ @@ -249,18 +264,19 @@ static inline void seq_show_option(struct seq_file *m, const char *name, /** * seq_show_option_n - display mount options with appropriate escapes - * where @value must be a specific length. + * where @value must be a specific length (i.e. + * not NUL-terminated). * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, cannot be NULL - * @length: the length of @value to display + * @length: the exact length of @value to display, must be constant expression * * This is a macro since this uses "length" to define the size of the * stack buffer. */ #define seq_show_option_n(m, name, value, length) { \ char val_buf[length + 1]; \ - strncpy(val_buf, value, length); \ + memcpy(val_buf, value, length); \ val_buf[length] = '\0'; \ seq_show_option(m, name, val_buf); \ } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..d90d8ee29d81 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -18,6 +18,7 @@ #include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/preempt.h> +#include <linux/seqlock_types.h> #include <linux/spinlock.h> #include <asm/processor.h> @@ -37,37 +38,6 @@ */ #define KCSAN_SEQLOCK_REGION_MAX 1000 -/* - * Sequence counters (seqcount_t) - * - * This is the raw counting mechanism, without any writer protection. - * - * Write side critical sections must be serialized and non-preemptible. - * - * If readers can be invoked from hardirq or softirq contexts, - * interrupts or bottom halves must also be respectively disabled before - * entering the write section. - * - * This mechanism can't be used if the protected data contains pointers, - * as the writer can invalidate a pointer that a reader is following. - * - * If the write serialization mechanism is one of the common kernel - * locking primitives, use a sequence counter with associated lock - * (seqcount_LOCKNAME_t) instead. - * - * If it's desired to automatically handle the sequence counter writer - * serialization and non-preemptibility requirements, use a sequential - * lock (seqlock_t) instead. - * - * See Documentation/locking/seqlock.rst - */ -typedef struct seqcount { - unsigned sequence; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -} seqcount_t; - static inline void __seqcount_init(seqcount_t *s, const char *name, struct lock_class_key *key) { @@ -132,28 +102,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) */ /* - * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot - * disable preemption. It can lead to higher latencies, and the write side - * sections will not be able to acquire locks which become sleeping locks - * (e.g. spinlock_t). - * - * To remain preemptible while avoiding a possible livelock caused by the - * reader preempting the writer, use a different technique: let the reader - * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the - * case, acquire then release the associated LOCKNAME writer serialization - * lock. This will allow any possibly-preempted writer to make progress - * until the end of its writer serialization lock critical section. - * - * This lock-unlock technique must be implemented for all of PREEMPT_RT - * sleeping locks. See Documentation/locking/locktypes.rst - */ -#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) -#define __SEQ_LOCK(expr) expr -#else -#define __SEQ_LOCK(expr) -#endif - -/* * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated * @seqcount: The real sequence counter * @lock: Pointer to the associated lock @@ -191,22 +139,21 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype - * @lockmember: argument for lockdep_assert_held() - * @lockbase: associated lock release function (prefix only) - * @lock_acquire: associated lock acquisition function (full call) + * @lockbase: prefix for associated lock/unlock */ -#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ -typedef struct seqcount_##lockname { \ - seqcount_t seqcount; \ - __SEQ_LOCK(locktype *lock); \ -} seqcount_##lockname##_t; \ - \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ static __always_inline seqcount_t * \ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ return &s->seqcount; \ } \ \ +static __always_inline const seqcount_t * \ +__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ +{ \ + return &s->seqcount; \ +} \ + \ static __always_inline unsigned \ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ { \ @@ -216,7 +163,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ - __SEQ_LOCK(lock_acquire); \ + __SEQ_LOCK(lockbase##_lock(s->lock)); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ @@ -242,7 +189,7 @@ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ - __SEQ_LOCK(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(s->lock)); \ } /* @@ -254,6 +201,11 @@ static inline seqcount_t *__seqprop_ptr(seqcount_t *s) return s; } +static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) +{ + return s; +} + static inline unsigned __seqprop_sequence(const seqcount_t *s) { return READ_ONCE(s->sequence); @@ -271,10 +223,11 @@ static inline void __seqprop_assert(const seqcount_t *s) #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) -SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) -SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) +#undef SEQCOUNT_LOCKNAME /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t @@ -294,19 +247,20 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) + seqcount_##lockname##_t: __seqprop_##lockname##_##prop #define __seqprop(s, prop) _Generic(*(s), \ - seqcount_t: __seqprop_##prop((void *)(s)), \ + seqcount_t: __seqprop_##prop, \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop)) -#define seqprop_ptr(s) __seqprop(s, ptr) -#define seqprop_sequence(s) __seqprop(s, sequence) -#define seqprop_preemptible(s) __seqprop(s, preemptible) -#define seqprop_assert(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) +#define seqprop_sequence(s) __seqprop(s, sequence)(s) +#define seqprop_preemptible(s) __seqprop(s, preemptible)(s) +#define seqprop_assert(s) __seqprop(s, assert)(s) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier @@ -355,7 +309,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(seqprop_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -421,7 +375,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - do___read_seqcount_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -441,7 +395,7 @@ static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - do_read_seqcount_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -512,8 +466,8 @@ do { \ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) { - do_raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); + do_raw_write_seqcount_begin(s); } /** @@ -574,7 +528,7 @@ static inline void do_write_seqcount_end(seqcount_t *s) * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because - * neither writes before and after the barrier are enclosed in a seq-writer + * neither writes before nor after the barrier are enclosed in a seq-writer * critical section that would ensure readers are aware of ongoing writes:: * * seqcount_t seq; @@ -784,25 +738,6 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s) smp_wmb(); /* increment "sequence" before following stores */ } -/* - * Sequential locks (seqlock_t) - * - * Sequence counters with an embedded spinlock for writer serialization - * and non-preemptibility. - * - * For more info, see: - * - Comments on top of seqcount_t - * - Documentation/locking/seqlock.rst - */ -typedef struct { - /* - * Make sure that readers don't starve writers on PREEMPT_RT: use - * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK(). - */ - seqcount_spinlock_t seqcount; - spinlock_t lock; -} seqlock_t; - #define __SEQLOCK_UNLOCKED(lockname) \ { \ .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \ @@ -864,7 +799,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use the the internal + * For all seqlock_t write side functions, use the internal * do_write_seqcount_begin() instead of generic write_seqcount_begin(). * This way, no redundant lockdep_assert_held() checks are added. */ diff --git a/include/linux/seqlock_types.h b/include/linux/seqlock_types.h new file mode 100644 index 000000000000..dfdf43e3fa3d --- /dev/null +++ b/include/linux/seqlock_types.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_SEQLOCK_TYPES_H +#define __LINUX_SEQLOCK_TYPES_H + +#include <linux/lockdep_types.h> +#include <linux/mutex_types.h> +#include <linux/spinlock_types.h> + +/* + * Sequence counters (seqcount_t) + * + * This is the raw counting mechanism, without any writer protection. + * + * Write side critical sections must be serialized and non-preemptible. + * + * If readers can be invoked from hardirq or softirq contexts, + * interrupts or bottom halves must also be respectively disabled before + * entering the write section. + * + * This mechanism can't be used if the protected data contains pointers, + * as the writer can invalidate a pointer that a reader is following. + * + * If the write serialization mechanism is one of the common kernel + * locking primitives, use a sequence counter with associated lock + * (seqcount_LOCKNAME_t) instead. + * + * If it's desired to automatically handle the sequence counter writer + * serialization and non-preemptibility requirements, use a sequential + * lock (seqlock_t) instead. + * + * See Documentation/locking/seqlock.rst + */ +typedef struct seqcount { + unsigned sequence; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} seqcount_t; + +/* + * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot + * disable preemption. It can lead to higher latencies, and the write side + * sections will not be able to acquire locks which become sleeping locks + * (e.g. spinlock_t). + * + * To remain preemptible while avoiding a possible livelock caused by the + * reader preempting the writer, use a different technique: let the reader + * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the + * case, acquire then release the associated LOCKNAME writer serialization + * lock. This will allow any possibly-preempted writer to make progress + * until the end of its writer serialization lock critical section. + * + * This lock-unlock technique must be implemented for all of PREEMPT_RT + * sleeping locks. See Documentation/locking/locktypes.rst + */ +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) +#define __SEQ_LOCK(expr) expr +#else +#define __SEQ_LOCK(expr) +#endif + +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ +typedef struct seqcount_##lockname { \ + seqcount_t seqcount; \ + __SEQ_LOCK(locktype *lock); \ +} seqcount_##lockname##_t; + +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) +#undef SEQCOUNT_LOCKNAME + +/* + * Sequential locks (seqlock_t) + * + * Sequence counters with an embedded spinlock for writer serialization + * and non-preemptibility. + * + * For more info, see: + * - Comments on top of seqcount_t + * - Documentation/locking/seqlock.rst + */ +typedef struct { + /* + * Make sure that readers don't starve writers on PREEMPT_RT: use + * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK(). + */ + seqcount_spinlock_t seqcount; + spinlock_t lock; +} seqlock_t; + +#endif /* __LINUX_SEQLOCK_TYPES_H */ diff --git a/include/linux/serdev.h b/include/linux/serdev.h index f5f97fa25e8a..ff78efc1f60d 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -27,7 +27,7 @@ struct serdev_device; * not sleep. */ struct serdev_device_ops { - int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t); + size_t (*receive_buf)(struct serdev_device *, const u8 *, size_t); void (*write_wakeup)(struct serdev_device *); }; @@ -82,7 +82,7 @@ enum serdev_parity { * serdev controller structures */ struct serdev_controller_ops { - int (*write_buf)(struct serdev_controller *, const unsigned char *, size_t); + ssize_t (*write_buf)(struct serdev_controller *, const u8 *, size_t); void (*write_flush)(struct serdev_controller *); int (*write_room)(struct serdev_controller *); int (*open)(struct serdev_controller *); @@ -99,12 +99,14 @@ struct serdev_controller_ops { /** * struct serdev_controller - interface to the serdev controller * @dev: Driver model representation of the device. + * @host: Serial port hardware controller device * @nr: number identifier for this controller/bus. * @serdev: Pointer to slave device for this controller. * @ops: Controller operations. */ struct serdev_controller { struct device dev; + struct device *host; unsigned int nr; struct serdev_device *serdev; const struct serdev_controller_ops *ops; @@ -167,7 +169,9 @@ struct serdev_device *serdev_device_alloc(struct serdev_controller *); int serdev_device_add(struct serdev_device *); void serdev_device_remove(struct serdev_device *); -struct serdev_controller *serdev_controller_alloc(struct device *, size_t); +struct serdev_controller *serdev_controller_alloc(struct device *host, + struct device *parent, + size_t size); int serdev_controller_add(struct serdev_controller *); void serdev_controller_remove(struct serdev_controller *); @@ -181,9 +185,9 @@ static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl serdev->ops->write_wakeup(serdev); } -static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, - const unsigned char *data, - size_t count) +static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl, + const u8 *data, + size_t count) { struct serdev_device *serdev = ctrl->serdev; @@ -200,13 +204,13 @@ void serdev_device_close(struct serdev_device *); int devm_serdev_device_open(struct device *, struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); -int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); +int serdev_device_write_buf(struct serdev_device *, const u8 *, size_t); void serdev_device_wait_until_sent(struct serdev_device *, long); int serdev_device_get_tiocm(struct serdev_device *); int serdev_device_set_tiocm(struct serdev_device *, int, int); int serdev_device_break_ctl(struct serdev_device *serdev, int break_state); void serdev_device_write_wakeup(struct serdev_device *); -int serdev_device_write(struct serdev_device *, const unsigned char *, size_t, long); +ssize_t serdev_device_write(struct serdev_device *, const u8 *, size_t, long); void serdev_device_write_flush(struct serdev_device *); int serdev_device_write_room(struct serdev_device *); @@ -244,7 +248,7 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev } static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} static inline int serdev_device_write_buf(struct serdev_device *serdev, - const unsigned char *buf, + const u8 *buf, size_t count) { return -ENODEV; @@ -262,8 +266,9 @@ static inline int serdev_device_break_ctl(struct serdev_device *serdev, int brea { return -EOPNOTSUPP; } -static inline int serdev_device_write(struct serdev_device *sdev, const unsigned char *buf, - size_t count, unsigned long timeout) +static inline ssize_t serdev_device_write(struct serdev_device *sdev, + const u8 *buf, size_t count, + unsigned long timeout) { return -ENODEV; } @@ -311,11 +316,13 @@ struct tty_driver; #ifdef CONFIG_SERIAL_DEV_CTRL_TTYPORT struct device *serdev_tty_port_register(struct tty_port *port, + struct device *host, struct device *parent, struct tty_driver *drv, int idx); int serdev_tty_port_unregister(struct tty_port *port); #else static inline struct device *serdev_tty_port_register(struct tty_port *port, + struct device *host, struct device *parent, struct tty_driver *drv, int idx) { diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index be65de65fe61..fd59ed2cca53 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -210,6 +210,12 @@ int serial8250_console_exit(struct uart_port *port); void serial8250_set_isa_configurator(void (*v)(int port, struct uart_port *up, u32 *capabilities)); +#ifdef CONFIG_SERIAL_8250_CONSOLE +extern int hp300_setup_serial_console(void) __init; +#else +static inline int hp300_setup_serial_console(void) { return 0; } +#endif + #ifdef CONFIG_SERIAL_8250_RT288X int rt288x_setup(struct uart_port *p); int au_platform_setup(struct plat_serial8250_port *p); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index a156d2ed8d9e..0a0f6e21d40e 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -467,9 +467,10 @@ struct uart_port { unsigned int fifosize; /* tx fifo size */ unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ + unsigned char iotype; /* io access style */ - unsigned char quirks; /* internal quirks */ +#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ @@ -479,7 +480,9 @@ struct uart_port { #define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ #define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ - /* quirks must be updated while holding port mutex */ + unsigned char quirks; /* internal quirks */ + + /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) unsigned int read_status_mask; /* driver specific */ @@ -570,7 +573,7 @@ struct uart_port { struct serial_port_device *port_dev; /* serial core port device */ unsigned long sysrq; /* sysrq timeout */ - unsigned int sysrq_ch; /* char for sysrq */ + u8 sysrq_ch; /* char for sysrq */ unsigned char has_sysrq; unsigned char sysrq_seq; /* index in sysrq_toggle_seq */ @@ -588,6 +591,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); @@ -669,8 +751,17 @@ struct uart_driver { void uart_write_wakeup(struct uart_port *port); -#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \ - for_post) \ +/** + * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() + * + * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer + */ +enum UART_TX_FLAGS { + UART_TX_NOSTOP = BIT(0), +}; + +#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ + for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct circ_buf *xmit = &__port->state->xmit; \ @@ -698,7 +789,8 @@ void uart_write_wakeup(struct uart_port *port); if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (pending == 0) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \ + __port->ops->tx_empty(__port)) \ __port->ops->stop_tx(__port); \ } \ \ @@ -733,7 +825,7 @@ void uart_write_wakeup(struct uart_port *port); */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ - __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \ + __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) @@ -747,8 +839,21 @@ void uart_write_wakeup(struct uart_port *port); * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ - __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({})) + __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) + +/** + * uart_port_tx_flags -- transmit helper for uart_port with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ @@ -773,9 +878,9 @@ static inline unsigned long uart_fifo_timeout(struct uart_port *port) } /* Base timer interval for polling */ -static inline int uart_poll_timeout(struct uart_port *port) +static inline unsigned long uart_poll_timeout(struct uart_port *port) { - int timeout = uart_fifo_timeout(port); + unsigned long timeout = uart_fifo_timeout(port); return timeout > 6 ? (timeout / 2 - 2) : 1; } @@ -858,6 +963,8 @@ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); +int uart_read_port_properties(struct uart_port *port); +int uart_read_and_validate_port_properties(struct uart_port *port); bool uart_match_port(const struct uart_port *port1, const struct uart_port *port2); @@ -904,16 +1011,16 @@ void uart_handle_dcd_change(struct uart_port *uport, bool active); void uart_handle_cts_change(struct uart_port *uport, bool active); void uart_insert_char(struct uart_port *port, unsigned int status, - unsigned int overrun, unsigned int ch, unsigned int flag); + unsigned int overrun, u8 ch, u8 flag); void uart_xchar_out(struct uart_port *uport, int offset); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) -bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); +bool uart_try_toggle_sysrq(struct uart_port *port, u8 ch); -static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; @@ -932,7 +1039,7 @@ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch return 0; } -static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; @@ -953,17 +1060,17 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int c static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { - int sysrq_ch; + u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock(&port->lock); + uart_port_unlock(port); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock(&port->lock); + uart_port_unlock(port); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -972,38 +1079,38 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { - int sysrq_ch; + u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); if (sysrq_ch) handle_sysrq(sysrq_ch); } #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ -static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { return 0; } -static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { - spin_unlock(&port->lock); + uart_port_unlock(port); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ diff --git a/include/linux/shm.h b/include/linux/shm.h index d8e69aed3d32..c55bef0538e5 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -2,12 +2,12 @@ #ifndef _LINUX_SHM_H_ #define _LINUX_SHM_H_ -#include <linux/list.h> +#include <linux/types.h> #include <asm/page.h> -#include <uapi/linux/shm.h> #include <asm/shmparam.h> struct file; +struct task_struct; #ifdef CONFIG_SYSVIPC struct sysv_shm { diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 9029abd29b1c..3fb18f7eb73e 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -13,20 +13,32 @@ /* inode in-kernel data */ +#ifdef CONFIG_TMPFS_QUOTA +#define SHMEM_MAXQUOTAS 2 +#endif + struct shmem_inode_info { spinlock_t lock; unsigned int seals; /* shmem seals */ unsigned long flags; unsigned long alloced; /* data pages alloced to file */ unsigned long swapped; /* subtotal assigned to swap */ - pgoff_t fallocend; /* highest fallocate endindex */ - struct list_head shrinklist; /* shrinkable hpage inodes */ - struct list_head swaplist; /* chain of maybes on swap */ + union { + struct offset_ctx dir_offsets; /* stable directory offsets */ + struct { + struct list_head shrinklist; /* shrinkable hpage inodes */ + struct list_head swaplist; /* chain of maybes on swap */ + }; + }; + struct timespec64 i_crtime; /* file creation time */ struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ + pgoff_t fallocend; /* highest fallocate endindex */ + unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */ atomic_t stop_eviction; /* hold when working on inode */ - struct timespec64 i_crtime; /* file creation time */ - unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */ +#ifdef CONFIG_TMPFS_QUOTA + struct dquot __rcu *i_dquot[MAXQUOTAS]; +#endif struct inode vfs_inode; }; @@ -35,11 +47,18 @@ struct shmem_inode_info { (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL) #define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL) +struct shmem_quota_limits { + qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */ + qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */ + qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */ + qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */ +}; + struct shmem_sb_info { unsigned long max_blocks; /* How many blocks are allowed */ struct percpu_counter used_blocks; /* How many are allocated */ unsigned long max_inodes; /* How many inodes are allowed */ - unsigned long free_inodes; /* How many are left for allocation */ + unsigned long free_ispace; /* How much ispace left for allocation */ raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ umode_t mode; /* Mount mode for root directory */ unsigned char huge; /* Whether to try for hugepages */ @@ -53,6 +72,7 @@ struct shmem_sb_info { spinlock_t shrinklist_lock; /* Protects shrinklist */ struct list_head shrinklist; /* List of shinkable inodes */ unsigned long shrinklist_len; /* Length of shrinklist */ + struct shmem_quota_limits qlimits; /* Default quota limits */ }; static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) @@ -77,11 +97,7 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts); #ifdef CONFIG_SHMEM -extern const struct address_space_operations shmem_aops; -static inline bool shmem_mapping(struct address_space *mapping) -{ - return mapping->a_ops == &shmem_aops; -} +bool shmem_mapping(struct address_space *mapping); #else static inline bool shmem_mapping(struct address_space *mapping) { @@ -94,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, struct mm_struct *mm, unsigned long vm_flags); +#else +static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, + struct mm_struct *mm, unsigned long vm_flags) +{ + return false; +} +#endif + #ifdef CONFIG_SHMEM extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); #else @@ -172,4 +197,17 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd, #endif /* CONFIG_SHMEM */ #endif /* CONFIG_USERFAULTFD */ +/* + * Used space is stored as unsigned 64-bit value in bytes but + * quota core supports only signed 64-bit values so use that + * as a limit + */ +#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */ +#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL + +#ifdef CONFIG_TMPFS_QUOTA +extern const struct dquot_operations shmem_quota_operations; +extern struct quota_format_type shmem_quota_format; +#endif /* CONFIG_TMPFS_QUOTA */ + #endif diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 224293b2dd06..1a00be90d93a 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,6 +4,25 @@ #include <linux/atomic.h> #include <linux/types.h> +#include <linux/refcount.h> +#include <linux/completion.h> + +#define SHRINKER_UNIT_BITS BITS_PER_LONG + +/* + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to the memcg. + */ +struct shrinker_info_unit { + atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; + DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); +}; + +struct shrinker_info { + struct rcu_head rcu; + int map_nr_max; + struct shrinker_info_unit *unit[]; +}; /* * This struct is used to pass information from page reclaim to the shrinkers. @@ -70,6 +89,19 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + /* + * The reference count of this shrinker. Registered shrinker have an + * initial refcount of 1, then the lookup operations are now allowed + * to use it via shrinker_try_get(). Later in the unregistration step, + * the initial refcount will be discarded, and will free the shrinker + * asynchronously via RCU after its refcount reaches 0. + */ + refcount_t refcount; + struct completion done; /* use to wait for refcount to reach 0 */ + struct rcu_head rcu; + + void *private_data; + /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG @@ -86,48 +118,39 @@ struct shrinker { }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -/* Flags */ -#define SHRINKER_REGISTERED (1 << 0) -#define SHRINKER_NUMA_AWARE (1 << 1) -#define SHRINKER_MEMCG_AWARE (1 << 2) +/* Internal flags */ +#define SHRINKER_REGISTERED BIT(0) +#define SHRINKER_ALLOCATED BIT(1) + +/* Flags for users to use */ +#define SHRINKER_NUMA_AWARE BIT(2) +#define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 3) +#define SHRINKER_NONSLAB BIT(4) -extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void register_shrinker_prepared(struct shrinker *shrinker); -extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void unregister_shrinker(struct shrinker *shrinker); -extern void free_prealloced_shrinker(struct shrinker *shrinker); -extern void synchronize_shrinkers(void); +__printf(2, 3) +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); +void shrinker_register(struct shrinker *shrinker); +void shrinker_free(struct shrinker *shrinker); -#ifdef CONFIG_SHRINKER_DEBUG -extern int shrinker_debugfs_add(struct shrinker *shrinker); -extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id); -extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id); -extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, - const char *fmt, ...); -#else /* CONFIG_SHRINKER_DEBUG */ -static inline int shrinker_debugfs_add(struct shrinker *shrinker) -{ - return 0; -} -static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id) +static inline bool shrinker_try_get(struct shrinker *shrinker) { - *debugfs_id = -1; - return NULL; + return refcount_inc_not_zero(&shrinker->refcount); } -static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id) + +static inline void shrinker_put(struct shrinker *shrinker) { + if (refcount_dec_and_test(&shrinker->refcount)) + complete(&shrinker->done); } + +#ifdef CONFIG_SHRINKER_DEBUG +extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, + const char *fmt, ...); +#else /* CONFIG_SHRINKER_DEBUG */ static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) { diff --git a/include/linux/signal.h b/include/linux/signal.h index 3b98e7a28538..f19816832f05 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -3,6 +3,7 @@ #define _LINUX_SIGNAL_H #include <linux/bug.h> +#include <linux/list.h> #include <linux/signal_types.h> #include <linux/string.h> diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index a70b2bdbf4d9..caf4f7a59ab9 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -6,7 +6,7 @@ * Basic signal handling related data type definitions: */ -#include <linux/list.h> +#include <linux/types.h> #include <uapi/linux/signal.h> typedef struct kernel_siginfo { diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 84aa448d8bb3..c3a00b967d18 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -47,8 +47,17 @@ #define SZ_8G _AC(0x200000000, ULL) #define SZ_16G _AC(0x400000000, ULL) #define SZ_32G _AC(0x800000000, ULL) +#define SZ_64G _AC(0x1000000000, ULL) +#define SZ_128G _AC(0x2000000000, ULL) +#define SZ_256G _AC(0x4000000000, ULL) +#define SZ_512G _AC(0x8000000000, ULL) #define SZ_1T _AC(0x10000000000, ULL) +#define SZ_2T _AC(0x20000000000, ULL) +#define SZ_4T _AC(0x40000000000, ULL) +#define SZ_8T _AC(0x80000000000, ULL) +#define SZ_16T _AC(0x100000000000, ULL) +#define SZ_32T _AC(0x200000000000, ULL) #define SZ_64T _AC(0x400000000000, ULL) #endif /* __LINUX_SIZES_H__ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 91ed66952580..4ff48eda3f64 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,12 +32,12 @@ #include <linux/if_packet.h> #include <linux/llist.h> #include <net/flow.h> -#include <net/page_pool.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_common.h> #endif #include <net/net_debug.h> #include <net/dropreason-core.h> +#include <net/netmem.h> /** * DOC: skb checksums @@ -296,7 +296,7 @@ struct nf_bridge_info { u8 bridged_dnat:1; u8 sabotage_in_done:1; __u16 frag_max_size; - struct net_device *physindev; + int physinif; /* always valid & non-NULL from FORWARD on, for physdev match */ struct net_device *physoutdev; @@ -360,7 +360,11 @@ extern int sysctl_max_skb_frags; */ #define GSO_BY_FRAGS 0xFFFF -typedef struct bio_vec skb_frag_t; +typedef struct skb_frag { + netmem_ref netmem; + unsigned int len; + unsigned int offset; +} skb_frag_t; /** * skb_frag_size() - Returns the size of a skb fragment @@ -368,7 +372,7 @@ typedef struct bio_vec skb_frag_t; */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->bv_len; + return frag->len; } /** @@ -378,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->bv_len = size; + frag->len = size; } /** @@ -388,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->bv_len += delta; + frag->len += delta; } /** @@ -398,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->bv_len -= delta; + frag->len -= delta; } /** @@ -418,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p) * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->bv_page + * @f_off: offset from start of f->netmem * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@ -441,8 +445,6 @@ static inline bool skb_frag_must_loop(struct page *p) copied += p_len, p++, p_off = 0, \ p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ -#define HAVE_HW_TIME_STAMP - /** * struct skb_shared_hwtstamps - hardware time stamps * @hwtstamp: hardware time stamp transformed into duration @@ -569,6 +571,15 @@ struct ubuf_info_msgzc { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); +/* Preserve some data across TX submission and completion. + * + * Note, this state is stored in the driver. Extending the layout + * might need some special care. + */ +struct xsk_tx_metadata_compl { + __u64 *tx_timestamp; +}; + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -581,7 +592,10 @@ struct skb_shared_info { /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; struct sk_buff *frag_list; - struct skb_shared_hwtstamps hwtstamps; + union { + struct skb_shared_hwtstamps hwtstamps; + struct xsk_tx_metadata_compl xsk_meta; + }; unsigned int gso_type; u32 tskey; @@ -739,13 +753,10 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) - * @sp: the security path, used for xfrm * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header @@ -779,7 +790,6 @@ typedef unsigned char *sk_buff_data_t; * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) - * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @hash: the packet hash @@ -863,10 +873,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; @@ -944,7 +951,7 @@ struct sk_buff { __u8 __mono_tc_offset[0]; /* public: */ __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; #endif @@ -993,7 +1000,7 @@ struct sk_buff { __u8 csum_not_inet:1; #endif -#ifdef CONFIG_NET_SCHED +#if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif @@ -1060,7 +1067,7 @@ struct sk_buff { refcount_t users; #ifdef CONFIG_SKB_EXTENSIONS - /* only useable after checking ->active_extensions != 0 */ + /* only usable after checking ->active_extensions != 0 */ struct skb_ext *extensions; #endif }; @@ -1225,6 +1232,24 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +static inline bool skb_data_unref(const struct sk_buff *skb, + struct skb_shared_info *shinfo) +{ + int bias; + + if (!skb->cloned) + return true; + + bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + if (atomic_read(&shinfo->dataref) == bias) + smp_rmb(); + else if (atomic_sub_return(bias, &shinfo->dataref)) + return false; + + return true; +} + void __fix_address kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); @@ -1259,7 +1284,6 @@ static inline void consume_skb(struct sk_buff *skb) void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); -extern struct kmem_cache *skbuff_cache; void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, @@ -1312,7 +1336,7 @@ struct sk_buff_fclones { * * Returns true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), - * so we also check that this didnt happen. + * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) @@ -2019,7 +2043,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. + * a packet that's being forwarded. */ /** @@ -2422,22 +2446,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, + netmem_ref netmem, int off, + int size) +{ + frag->netmem = netmem; + frag->offset = off; + skb_frag_size_set(frag, size); +} + static inline void skb_frag_fill_page_desc(skb_frag_t *frag, struct page *page, int off, int size) { - frag->bv_page = page; - frag->bv_offset = off; - skb_frag_size_set(frag, size); + skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); +} + +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, + int i, netmem_ref netmem, + int off, int size) +{ + skb_frag_t *frag = &shinfo->frags[i]; + + skb_frag_fill_netmem_desc(frag, netmem, off, size); } static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) { - skb_frag_t *frag = &shinfo->frags[i]; - - skb_frag_fill_page_desc(frag, page, off, size); + __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, + size); } /** @@ -2453,10 +2492,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) } /** - * __skb_fill_page_desc - initialise a paged fragment in an skb + * __skb_fill_netmem_desc - initialise a fragment in an skb * @skb: buffer containing fragment to be initialised - * @i: paged fragment index to initialise - * @page: the page to use for this fragment + * @i: fragment index to initialise + * @netmem: the netmem to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * @@ -2465,10 +2504,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) * * Does not take any additional reference on the fragment. */ -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) { - __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); + struct page *page = netmem_to_page(netmem); + + __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely @@ -2476,7 +2517,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, */ page = compound_head(page); if (page_is_pfmemalloc(page)) - skb->pfmemalloc = true; + skb->pfmemalloc = true; +} + +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); +} + +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, netmem, off, size); + skb_shinfo(skb)->nr_frags = i + 1; } /** @@ -2496,8 +2550,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { - __skb_fill_page_desc(skb, i, page, off, size); - skb_shinfo(skb)->nr_frags = i + 1; + skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } /** @@ -2521,8 +2574,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, shinfo->nr_frags = i + 1; } -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize); +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size, unsigned int truesize); + +static inline void skb_add_rx_frag(struct sk_buff *skb, int i, + struct page *page, int off, int size, + unsigned int truesize) +{ + skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, + truesize); +} void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); @@ -2635,6 +2696,8 @@ static inline void skb_put_u8(struct sk_buff *skb, u8 val) void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->data -= len; skb->len += len; return skb->data; @@ -2643,6 +2706,8 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len) void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->len -= len; if (unlikely(skb->len < skb->data_len)) { #if defined(CONFIG_DEBUG_NET) @@ -2667,6 +2732,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; @@ -2839,6 +2906,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) +{ + return skb->inner_network_header > 0; +} + static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; @@ -2959,6 +3031,21 @@ static inline void skb_mac_header_rebuild(struct sk_buff *skb) } } +/* Move the full mac header up to current network_header. + * Leaves skb->data pointing at offset skb->mac_len into the mac_header. + * Must be provided the complete mac header length. + */ +static inline void skb_mac_header_rebuild_full(struct sk_buff *skb, u32 full_mac_len) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -full_mac_len); + memmove(skb_mac_header(skb), old_mac, full_mac_len); + __skb_push(skb, full_mac_len - skb->mac_len); + } +} + static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); @@ -2976,6 +3063,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb) static inline u32 skb_network_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } @@ -3152,22 +3240,38 @@ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) } /** - * __skb_queue_purge - empty a list + * __skb_queue_purge_reason - empty a list * @list: list to empty + * @reason: drop reason * * Delete all buffers on an &sk_buff list. Each buffer is removed from * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ -static inline void __skb_queue_purge(struct sk_buff_head *list) +static inline void __skb_queue_purge_reason(struct sk_buff_head *list, + enum skb_drop_reason reason) { struct sk_buff *skb; + while ((skb = __skb_dequeue(list)) != NULL) - kfree_skb(skb); + kfree_skb_reason(skb, reason); +} + +static inline void __skb_queue_purge(struct sk_buff_head *list) +{ + __skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE); +} + +void skb_queue_purge_reason(struct sk_buff_head *list, + enum skb_drop_reason reason); + +static inline void skb_queue_purge(struct sk_buff_head *list) +{ + skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE); } -void skb_queue_purge(struct sk_buff_head *list); unsigned int skb_rbtree_purge(struct rb_root *root); +void skb_errqueue_purge(struct sk_buff_head *list); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); @@ -3286,7 +3390,7 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, unsigned int order) { /* This piece of code contains several assumptions. - * 1. This is for device Rx, therefor a cold page is preferred. + * 1. This is for device Rx, therefore a cold page is preferred. * 2. The expectation is the user wants a compound page. * 3. If requesting a order 0 page it will not be compound * due to the check to see if order has a value in prep_new_page @@ -3355,7 +3459,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page, */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { - return frag->bv_offset; + return frag->offset; } /** @@ -3365,7 +3469,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag) */ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) { - frag->bv_offset += delta; + frag->offset += delta; } /** @@ -3375,7 +3479,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) { - frag->bv_offset = offset; + frag->offset = offset; } /** @@ -3386,7 +3490,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) static inline void skb_frag_off_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_offset = fragfrom->bv_offset; + fragto->offset = fragfrom->offset; } /** @@ -3397,7 +3501,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->bv_page; + return netmem_to_page(frag->netmem); } /** @@ -3423,13 +3527,29 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } +int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom); +int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, + struct bpf_prog *prog); +bool napi_pp_put_page(struct page *page, bool napi_safe); + +static inline void +skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe) +{ +#ifdef CONFIG_PAGE_POOL + if (skb->pp_recycle && napi_pp_put_page(page, napi_safe)) + return; +#endif + put_page(page); +} + static inline void napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) { struct page *page = skb_frag_page(frag); #ifdef CONFIG_PAGE_POOL - if (recycle && page_pool_return_skb_page(page, napi_safe)) + if (recycle && napi_pp_put_page(page, napi_safe)) return; #endif put_page(page); @@ -3499,7 +3619,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) static inline void skb_frag_page_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_page = fragfrom->bv_page; + fragto->netmem = fragfrom->netmem; } bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); @@ -3664,6 +3784,9 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) + __must_check; + static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) { @@ -3979,6 +4102,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); +int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); @@ -4023,7 +4147,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len, if (likely(hlen - offset >= len)) return (void *)data + offset; - if (!skb || !buffer || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) + if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) return NULL; return buffer; @@ -4036,6 +4160,14 @@ skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) skb_headlen(skb), buffer); } +static inline void * __must_check +skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len) +{ + if (likely(skb_headlen(skb) - offset >= len)) + return skb->data + offset; + return NULL; +} + /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. @@ -4209,10 +4341,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, { const void *a = skb_metadata_end(skb_a); const void *b = skb_metadata_end(skb_b); - /* Using more efficient varaiant than plain call to memcmp(). */ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 diffs = 0; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + BITS_PER_LONG != 64) + goto slow; + + /* Using more efficient variant than plain call to memcmp(). */ switch (meta_len) { #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) @@ -4232,11 +4367,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; + default: +slow: + return memcmp(a - meta_len, b - meta_len, meta_len); } return diffs; -#else - return memcmp(a - meta_len, b - meta_len, meta_len); -#endif } static inline bool skb_metadata_differs(const struct sk_buff *skb_a, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c1637515a8a4..a509caf823d6 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -100,12 +100,18 @@ struct sk_psock { void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); + /* psock_update_sk_prot may be called with restore=false many times + * so the handler must be safe for this case. It will be called + * exactly once with restore=true when the psock is being destroyed + * and psock refcnt is zero, but before an RCU grace period. + */ int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, bool restore); struct proto *sk_proto; struct mutex work_mutex; struct sk_psock_work_state work_state; struct delayed_work work; + struct sock *sk_pair; struct rcu_work rwork; }; @@ -455,10 +461,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { + read_lock_bh(&sk->sk_callback_lock); if (psock->saved_data_ready) psock->saved_data_ready(sk); else sk->sk_data_ready(sk); + read_unlock_bh(&sk->sk_callback_lock); } static inline void psock_set_prog(struct bpf_prog **pprog, @@ -499,12 +507,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } -static inline bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/linux/slab.h b/include/linux/slab.h index 848c7c82ad5a..739b21262507 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -19,30 +19,71 @@ #include <linux/workqueue.h> #include <linux/percpu-refcount.h> #include <linux/cleanup.h> +#include <linux/hash.h> + +enum _slab_flag_bits { + _SLAB_CONSISTENCY_CHECKS, + _SLAB_RED_ZONE, + _SLAB_POISON, + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_CACHE_DMA, + _SLAB_CACHE_DMA32, + _SLAB_STORE_USER, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_TRACE, +#ifdef CONFIG_DEBUG_OBJECTS + _SLAB_DEBUG_OBJECTS, +#endif + _SLAB_NOLEAKTRACE, + _SLAB_NO_MERGE, +#ifdef CONFIG_FAILSLAB + _SLAB_FAILSLAB, +#endif +#ifdef CONFIG_MEMCG_KMEM + _SLAB_ACCOUNT, +#endif +#ifdef CONFIG_KASAN_GENERIC + _SLAB_KASAN, +#endif + _SLAB_NO_USER_FLAGS, +#ifdef CONFIG_KFENCE + _SLAB_SKIP_KFENCE, +#endif +#ifndef CONFIG_SLUB_TINY + _SLAB_RECLAIM_ACCOUNT, +#endif + _SLAB_OBJECT_POISON, + _SLAB_CMPXCHG_DOUBLE, + _SLAB_FLAGS_LAST_BIT +}; +#define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) /* * Flags to pass to kmem_cache_create(). - * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. + * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op */ /* DEBUG: Perform (expensive) checks on alloc/free */ -#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U) +#define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) /* DEBUG: Red zone objs in a cache */ -#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) +#define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) /* DEBUG: Poison objects */ -#define SLAB_POISON ((slab_flags_t __force)0x00000800U) +#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) /* Indicate a kmalloc slab */ -#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U) +#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) /* Align objs on cache lines */ -#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) /* Use GFP_DMA memory */ -#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) +#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) /* Use GFP_DMA32 memory */ -#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U) +#define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) /* DEBUG: Store the last owner for bug hunting */ -#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) +#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) /* Panic if kmem_cache_create() fails */ -#define SLAB_PANIC ((slab_flags_t __force)0x00040000U) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) /* * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * @@ -94,21 +135,19 @@ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ -#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000U) -/* Spread some memory over cpuset */ -#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000U) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) /* Trace allocations and frees */ -#define SLAB_TRACE ((slab_flags_t __force)0x00200000U) +#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS -# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000U) +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) #else -# define SLAB_DEBUG_OBJECTS 0 +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED #endif /* Avoid kmemleak tracing */ -#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U) +#define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) /* * Prevent merging with compatible kmem caches. This flag should be used @@ -120,25 +159,25 @@ * - performance critical caches, should be very rare and consulted with slab * maintainers, and not used together with CONFIG_SLUB_TINY */ -#define SLAB_NO_MERGE ((slab_flags_t __force)0x01000000U) +#define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) /* Fault injection mark */ #ifdef CONFIG_FAILSLAB -# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U) +# define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) #else -# define SLAB_FAILSLAB 0 +# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif /* Account to memcg */ #ifdef CONFIG_MEMCG_KMEM -# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000U) +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else -# define SLAB_ACCOUNT 0 +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED #endif #ifdef CONFIG_KASAN_GENERIC -#define SLAB_KASAN ((slab_flags_t __force)0x08000000U) +#define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) #else -#define SLAB_KASAN 0 +#define SLAB_KASAN __SLAB_FLAG_UNUSED #endif /* @@ -146,20 +185,20 @@ * Intended for caches created for self-tests so they have only flags * specified in the code and other flags are ignored. */ -#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) +#define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) #ifdef CONFIG_KFENCE -#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U) +#define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) #else -#define SLAB_SKIP_KFENCE 0 +#define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED #endif /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #ifndef CONFIG_SLUB_TINY -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) #else -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0) +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED #endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ @@ -227,7 +266,7 @@ void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); -DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) +DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) /** * ksize - Report actual allocation size of associated object @@ -244,8 +283,9 @@ DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) size_t ksize(const void *objp); #ifdef CONFIG_PRINTK -bool kmem_valid_obj(void *object); -void kmem_dump_obj(void *object); +bool kmem_dump_obj(void *object); +#else +static inline bool kmem_dump_obj(void *object) { return false; } #endif /* @@ -300,25 +340,15 @@ static inline unsigned int arch_slab_minalign(void) * Kmalloc array related definitions */ -#ifdef CONFIG_SLAB /* - * SLAB and SLUB directly allocates requests fitting in to an order-1 page + * SLUB directly allocates requests fitting in to an order-1 page * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) -#ifndef KMALLOC_SHIFT_LOW -#define KMALLOC_SHIFT_LOW 5 -#endif -#endif - -#ifdef CONFIG_SLUB -#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif -#endif /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) @@ -345,6 +375,12 @@ static inline unsigned int arch_slab_minalign(void) #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies +#else +#define RANDOM_KMALLOC_CACHES_NR 0 +#endif + /* * Whenever changing this, take care of that kmalloc_type() and * create_kmalloc_caches() still work as intended. @@ -361,6 +397,8 @@ enum kmalloc_cache_type { #ifndef CONFIG_MEMCG_KMEM KMALLOC_CGROUP = KMALLOC_NORMAL, #endif + KMALLOC_RANDOM_START = KMALLOC_NORMAL, + KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, #ifdef CONFIG_SLUB_TINY KMALLOC_RECLAIM = KMALLOC_NORMAL, #else @@ -386,14 +424,22 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0)) -static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) +extern unsigned long random_kmalloc_seed; + +static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) { /* * The most common case is KMALLOC_NORMAL, so test for it * with a single branch for all the relevant flags. */ if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES + /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ + return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, + ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); +#else return KMALLOC_NORMAL; +#endif /* * At least one of the flags has to be set. Their priorities in @@ -580,7 +626,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) index = kmalloc_index(size); return kmalloc_trace( - kmalloc_caches[kmalloc_type(flags)][index], + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); } return __kmalloc(size, flags); @@ -596,7 +642,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla index = kmalloc_index(size); return kmalloc_node_trace( - kmalloc_caches[kmalloc_type(flags)][index], + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } return __kmalloc_node(size, flags, node); @@ -746,6 +792,8 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) __realloc_size(3); extern void kvfree(const void *addr); +DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) + extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); @@ -768,12 +816,4 @@ size_t kmalloc_size_roundup(size_t size); void __init kmem_cache_init_late(void); -#if defined(CONFIG_SMP) && defined(CONFIG_SLAB) -int slab_prepare_cpu(unsigned int cpu); -int slab_dead_cpu(unsigned int cpu); -#else -#define slab_prepare_cpu NULL -#define slab_dead_cpu NULL -#endif - #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h deleted file mode 100644 index a61e7d55d0d3..000000000000 --- a/include/linux/slab_def.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SLAB_DEF_H -#define _LINUX_SLAB_DEF_H - -#include <linux/kfence.h> -#include <linux/reciprocal_div.h> - -/* - * Definitions unique to the original Linux SLAB allocator. - */ - -struct kmem_cache { - struct array_cache __percpu *cpu_cache; - -/* 1) Cache tunables. Protected by slab_mutex */ - unsigned int batchcount; - unsigned int limit; - unsigned int shared; - - unsigned int size; - struct reciprocal_value reciprocal_buffer_size; -/* 2) touched by every alloc & free from the backend */ - - slab_flags_t flags; /* constant flags */ - unsigned int num; /* # of objs per slab */ - -/* 3) cache_grow/shrink */ - /* order of pgs per slab (2^n) */ - unsigned int gfporder; - - /* force GFP flags, e.g. GFP_DMA */ - gfp_t allocflags; - - size_t colour; /* cache colouring range */ - unsigned int colour_off; /* colour offset */ - unsigned int freelist_size; - - /* constructor func */ - void (*ctor)(void *obj); - -/* 4) cache creation/removal */ - const char *name; - struct list_head list; - int refcount; - int object_size; - int align; - -/* 5) statistics */ -#ifdef CONFIG_DEBUG_SLAB - unsigned long num_active; - unsigned long num_allocations; - unsigned long high_mark; - unsigned long grown; - unsigned long reaped; - unsigned long errors; - unsigned long max_freeable; - unsigned long node_allocs; - unsigned long node_frees; - unsigned long node_overflow; - atomic_t allochit; - atomic_t allocmiss; - atomic_t freehit; - atomic_t freemiss; - - /* - * If debugging is enabled, then the allocator can add additional - * fields and/or padding to every object. 'size' contains the total - * object size including these internal fields, while 'obj_offset' - * and 'object_size' contain the offset to the user object and its - * size. - */ - int obj_offset; -#endif /* CONFIG_DEBUG_SLAB */ - -#ifdef CONFIG_KASAN_GENERIC - struct kasan_cache kasan_info; -#endif - -#ifdef CONFIG_SLAB_FREELIST_RANDOM - unsigned int *random_seq; -#endif - -#ifdef CONFIG_HARDENED_USERCOPY - unsigned int useroffset; /* Usercopy region offset */ - unsigned int usersize; /* Usercopy region size */ -#endif - - struct kmem_cache_node *node[MAX_NUMNODES]; -}; - -static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, - void *x) -{ - void *object = x - (x - slab->s_mem) % cache->size; - void *last_object = slab->s_mem + (cache->num - 1) * cache->size; - - if (unlikely(object > last_object)) - return last_object; - else - return object; -} - -/* - * We want to avoid an expensive divide : (offset / cache->size) - * Using the fact that size is a constant for a particular cache, - * we can replace (offset / cache->size) by - * reciprocal_divide(offset, cache->reciprocal_buffer_size) - */ -static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct slab *slab, void *obj) -{ - u32 offset = (obj - slab->s_mem); - return reciprocal_divide(offset, cache->reciprocal_buffer_size); -} - -static inline int objs_per_slab(const struct kmem_cache *cache, - const struct slab *slab) -{ - if (is_kfence_address(slab_address(slab))) - return 1; - return cache->num; -} - -#endif /* _LINUX_SLAB_DEF_H */ diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h index 12c9719b2a55..3042385b7b40 100644 --- a/include/linux/slimbus.h +++ b/include/linux/slimbus.h @@ -10,7 +10,7 @@ #include <linux/completion.h> #include <linux/mod_devicetable.h> -extern struct bus_type slimbus_bus; +extern const struct bus_type slimbus_bus; /** * struct slim_eaddr - Enumeration address for a SLIMbus device diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h deleted file mode 100644 index deb90cf4bffb..000000000000 --- a/include/linux/slub_def.h +++ /dev/null @@ -1,204 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SLUB_DEF_H -#define _LINUX_SLUB_DEF_H - -/* - * SLUB : A Slab allocator without object queues. - * - * (C) 2007 SGI, Christoph Lameter - */ -#include <linux/kfence.h> -#include <linux/kobject.h> -#include <linux/reciprocal_div.h> -#include <linux/local_lock.h> - -enum stat_item { - ALLOC_FASTPATH, /* Allocation from cpu slab */ - ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ - FREE_FASTPATH, /* Free to cpu slab */ - FREE_SLOWPATH, /* Freeing not to cpu slab */ - FREE_FROZEN, /* Freeing to frozen slab */ - FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ - FREE_REMOVE_PARTIAL, /* Freeing removes last object */ - ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */ - ALLOC_SLAB, /* Cpu slab acquired from page allocator */ - ALLOC_REFILL, /* Refill cpu slab from slab freelist */ - ALLOC_NODE_MISMATCH, /* Switching cpu slab */ - FREE_SLAB, /* Slab freed to the page allocator */ - CPUSLAB_FLUSH, /* Abandoning of the cpu slab */ - DEACTIVATE_FULL, /* Cpu slab was full when deactivated */ - DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */ - DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ - DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ - DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ - DEACTIVATE_BYPASS, /* Implicit deactivation */ - ORDER_FALLBACK, /* Number of times fallback was necessary */ - CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ - CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */ - CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */ - CPU_PARTIAL_FREE, /* Refill cpu partial on free */ - CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */ - CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ - NR_SLUB_STAT_ITEMS -}; - -#ifndef CONFIG_SLUB_TINY -/* - * When changing the layout, make sure freelist and tid are still compatible - * with this_cpu_cmpxchg_double() alignment requirements. - */ -struct kmem_cache_cpu { - union { - struct { - void **freelist; /* Pointer to next available object */ - unsigned long tid; /* Globally unique transaction id */ - }; - freelist_aba_t freelist_tid; - }; - struct slab *slab; /* The slab from which we are allocating */ -#ifdef CONFIG_SLUB_CPU_PARTIAL - struct slab *partial; /* Partially allocated frozen slabs */ -#endif - local_lock_t lock; /* Protects the fields above */ -#ifdef CONFIG_SLUB_STATS - unsigned stat[NR_SLUB_STAT_ITEMS]; -#endif -}; -#endif /* CONFIG_SLUB_TINY */ - -#ifdef CONFIG_SLUB_CPU_PARTIAL -#define slub_percpu_partial(c) ((c)->partial) - -#define slub_set_percpu_partial(c, p) \ -({ \ - slub_percpu_partial(c) = (p)->next; \ -}) - -#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) -#else -#define slub_percpu_partial(c) NULL - -#define slub_set_percpu_partial(c, p) - -#define slub_percpu_partial_read_once(c) NULL -#endif // CONFIG_SLUB_CPU_PARTIAL - -/* - * Word size structure that can be atomically updated or read and that - * contains both the order and the number of objects that a slab of the - * given order would contain. - */ -struct kmem_cache_order_objects { - unsigned int x; -}; - -/* - * Slab cache management. - */ -struct kmem_cache { -#ifndef CONFIG_SLUB_TINY - struct kmem_cache_cpu __percpu *cpu_slab; -#endif - /* Used for retrieving partial slabs, etc. */ - slab_flags_t flags; - unsigned long min_partial; - unsigned int size; /* The size of an object including metadata */ - unsigned int object_size;/* The size of an object without metadata */ - struct reciprocal_value reciprocal_size; - unsigned int offset; /* Free pointer offset */ -#ifdef CONFIG_SLUB_CPU_PARTIAL - /* Number of per cpu partial objects to keep around */ - unsigned int cpu_partial; - /* Number of per cpu partial slabs to keep around */ - unsigned int cpu_partial_slabs; -#endif - struct kmem_cache_order_objects oo; - - /* Allocation and freeing of slabs */ - struct kmem_cache_order_objects min; - gfp_t allocflags; /* gfp flags to use on each alloc */ - int refcount; /* Refcount for slab cache destroy */ - void (*ctor)(void *); - unsigned int inuse; /* Offset to metadata */ - unsigned int align; /* Alignment */ - unsigned int red_left_pad; /* Left redzone padding size */ - const char *name; /* Name (only for display!) */ - struct list_head list; /* List of slab caches */ -#ifdef CONFIG_SYSFS - struct kobject kobj; /* For sysfs */ -#endif -#ifdef CONFIG_SLAB_FREELIST_HARDENED - unsigned long random; -#endif - -#ifdef CONFIG_NUMA - /* - * Defragmentation by allocating from a remote node. - */ - unsigned int remote_node_defrag_ratio; -#endif - -#ifdef CONFIG_SLAB_FREELIST_RANDOM - unsigned int *random_seq; -#endif - -#ifdef CONFIG_KASAN_GENERIC - struct kasan_cache kasan_info; -#endif - -#ifdef CONFIG_HARDENED_USERCOPY - unsigned int useroffset; /* Usercopy region offset */ - unsigned int usersize; /* Usercopy region size */ -#endif - - struct kmem_cache_node *node[MAX_NUMNODES]; -}; - -#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) -#define SLAB_SUPPORTS_SYSFS -void sysfs_slab_unlink(struct kmem_cache *); -void sysfs_slab_release(struct kmem_cache *); -#else -static inline void sysfs_slab_unlink(struct kmem_cache *s) -{ -} -static inline void sysfs_slab_release(struct kmem_cache *s) -{ -} -#endif - -void *fixup_red_left(struct kmem_cache *s, void *p); - -static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, - void *x) { - void *object = x - (x - slab_address(slab)) % cache->size; - void *last_object = slab_address(slab) + - (slab->objects - 1) * cache->size; - void *result = (unlikely(object > last_object)) ? last_object : object; - - result = fixup_red_left(cache, result); - return result; -} - -/* Determine object index from a given position */ -static inline unsigned int __obj_to_index(const struct kmem_cache *cache, - void *addr, void *obj) -{ - return reciprocal_divide(kasan_reset_tag(obj) - addr, - cache->reciprocal_size); -} - -static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct slab *slab, void *obj) -{ - if (is_kfence_address(obj)) - return 0; - return __obj_to_index(cache, slab_address(slab), obj); -} - -static inline int objs_per_slab(const struct kmem_cache *cache, - const struct slab *slab) -{ - return slab->objects; -} -#endif /* _LINUX_SLUB_DEF_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 91ea4a67f8ca..fcd61dfe2af3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, const struct cpumask *mask); -int smp_call_function_single_async(int cpu, struct __call_single_data *csd); +int smp_call_function_single_async(int cpu, call_single_data_t *csd); /* * Cpus stopping functions in panic. All have default weak definitions. @@ -105,6 +105,12 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func, on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask); } +/* + * Architecture specific boot CPU setup. Defined as empty weak function in + * init/main.c. Architectures can override it. + */ +void smp_prepare_boot_cpu(void); + #ifdef CONFIG_SMP #include <linux/preempt.h> @@ -171,12 +177,6 @@ void generic_smp_call_function_single_interrupt(void); #define generic_smp_call_function_interrupt \ generic_smp_call_function_single_interrupt -/* - * Mark the boot cpu "online" so that it can call console drivers in - * printk() and can access its per-cpu storage. - */ -void smp_prepare_boot_cpu(void); - extern unsigned int setup_max_cpus; extern void __init setup_nr_cpu_ids(void); extern void __init smp_init(void); @@ -203,7 +203,6 @@ static inline void up_smp_call_function(smp_call_func_t func, void *info) (up_smp_call_function(func, info)) static inline void smp_send_reschedule(int cpu) { } -#define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) static inline void call_function_init(void) { } @@ -218,6 +217,8 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } static inline void wake_up_all_idle_cpus(void) { } +#define setup_max_cpus 0 + #ifdef CONFIG_UP_LATE_INIT extern void __init up_late_init(void); static inline void smp_init(void) { up_late_init(); } @@ -261,7 +262,7 @@ static inline int get_boot_cpu_id(void) * regular asm read for the stable. */ #ifndef __smp_processor_id -#define __smp_processor_id(x) raw_smp_processor_id(x) +#define __smp_processor_id() raw_smp_processor_id() #endif #ifdef CONFIG_DEBUG_PREEMPT diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h index e1c88627755a..1a6a851d2cf8 100644 --- a/include/linux/smscphy.h +++ b/include/linux/smscphy.h @@ -38,4 +38,38 @@ int smsc_phy_set_tunable(struct phy_device *phydev, struct ethtool_tunable *tuna, const void *data); int smsc_phy_probe(struct phy_device *phydev); +#define MII_LAN874X_PHY_MMD_WOL_WUCSR 0x8010 +#define MII_LAN874X_PHY_MMD_WOL_WUF_CFGA 0x8011 +#define MII_LAN874X_PHY_MMD_WOL_WUF_CFGB 0x8012 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK0 0x8021 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK1 0x8022 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK2 0x8023 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK3 0x8024 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK4 0x8025 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK5 0x8026 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK6 0x8027 +#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK7 0x8028 +#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRA 0x8061 +#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRB 0x8062 +#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRC 0x8063 +#define MII_LAN874X_PHY_MMD_MCFGR 0x8064 + +#define MII_LAN874X_PHY_PME1_SET (2 << 13) +#define MII_LAN874X_PHY_PME2_SET (2 << 11) +#define MII_LAN874X_PHY_PME_SELF_CLEAR BIT(9) +#define MII_LAN874X_PHY_WOL_PFDA_FR BIT(7) +#define MII_LAN874X_PHY_WOL_WUFR BIT(6) +#define MII_LAN874X_PHY_WOL_MPR BIT(5) +#define MII_LAN874X_PHY_WOL_BCAST_FR BIT(4) +#define MII_LAN874X_PHY_WOL_PFDAEN BIT(3) +#define MII_LAN874X_PHY_WOL_WUEN BIT(2) +#define MII_LAN874X_PHY_WOL_MPEN BIT(1) +#define MII_LAN874X_PHY_WOL_BCSTEN BIT(0) + +#define MII_LAN874X_PHY_WOL_FILTER_EN BIT(15) +#define MII_LAN874X_PHY_WOL_FILTER_MCASTTEN BIT(9) +#define MII_LAN874X_PHY_WOL_FILTER_BCSTEN BIT(8) + +#define MII_LAN874X_PHY_PME_SELF_CLEAR_DELAY 0x1000 /* 81 milliseconds */ + #endif /* __LINUX_SMSCPHY_H__ */ diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h new file mode 100644 index 000000000000..edc3182d6e66 --- /dev/null +++ b/include/linux/soc/andes/irq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Andes Technology Corporation + */ +#ifndef __ANDES_IRQ_H +#define __ANDES_IRQ_H + +/* Andes PMU irq number */ +#define ANDES_RV_IRQ_PMOVI 18 +#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI +#define ANDES_SLI_CAUSE_BASE 256 + +/* Andes PMU related registers */ +#define ANDES_CSR_SLIE 0x9c4 +#define ANDES_CSR_SLIP 0x9c5 +#define ANDES_CSR_SCOUNTEROF 0x9d4 + +#endif /* __ANDES_IRQ_H */ diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index fc456f75c131..8c9ca857ccf6 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -161,24 +161,6 @@ int apple_rtkit_send_message(struct apple_rtkit *rtk, u8 ep, u64 message, struct completion *completion, bool atomic); /* - * Send a message to the given endpoint and wait until it has been submitted - * to the hardware FIFO. - * Will return zero on success and a negative error code on failure - * (e.g. -ETIME when the message couldn't be written within the given - * timeout) - * - * @rtk: RTKit reference - * @ep: target endpoint - * @message: message to be sent - * @timeout: timeout in milliseconds to allow the message transmission - * to be completed - * @atomic: if set to true this function can be called from atomic - * context. - */ -int apple_rtkit_send_message_wait(struct apple_rtkit *rtk, u8 ep, u64 message, - unsigned long timeout, bool atomic); - -/* * Process incoming messages in atomic context. * This only guarantees that messages arrive as far as the recv_message_early * callback; drivers expecting to handle incoming messages synchronously diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 07f67b3d8e97..6c6cccc848f4 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,47 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8365_INFRA_TOPAXI_PROTECTEN_STA1 0x228 +#define MT8365_INFRA_TOPAXI_PROTECTEN_SET 0x2a0 +#define MT8365_INFRA_TOPAXI_PROTECTEN_CLR 0x2a4 +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM_M0 BIT(1) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MDMCU_M1 BIT(2) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MMAPB_S BIT(6) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM2INFRA_AXI_GALS_SLV_0 BIT(10) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MM2INFRA_AXI_GALS_SLV_1 BIT(11) +#define MT8365_INFRA_TOPAXI_PROTECTEN_AP2CONN_AHB BIT(13) +#define MT8365_INFRA_TOPAXI_PROTECTEN_CONN2INFRA_AHB BIT(14) +#define MT8365_INFRA_TOPAXI_PROTECTEN_MFG_M0 BIT(21) +#define MT8365_INFRA_TOPAXI_PROTECTEN_INFRA2MFG BIT(22) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_STA1 0x258 +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_SET 0x2a8 +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CLR 0x2ac +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_APU2AP BIT(2) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_MM2INFRA_AXI_GALS_MST_0 BIT(16) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_MM2INFRA_AXI_GALS_MST_1 BIT(17) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CONN2INFRA_AXI_GALS_MST BIT(18) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CAM2MM_AXI_GALS_MST BIT(19) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_APU_CBIP_GALS_MST BIT(20) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_INFRA2CONN_AHB_GALS_SLV BIT(21) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_PWRDNREQ_INFRA_GALS_ADB BIT(24) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_PWRDNREQ_MP1_L2C_AFIFO BIT(27) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_AUDIO_M BIT(28) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_DSP_M BIT(30) +#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_DSP_S BIT(31) + +#define MT8365_INFRA_NAO_TOPAXI_SI0_STA 0x0 +#define MT8365_INFRA_NAO_TOPAXI_SI0_CTRL_UPDATED BIT(24) +#define MT8365_INFRA_NAO_TOPAXI_SI2_STA 0x28 +#define MT8365_INFRA_NAO_TOPAXI_SI2_CTRL_UPDATED BIT(14) +#define MT8365_INFRA_TOPAXI_SI0_CTL 0x200 +#define MT8365_INFRA_TOPAXI_SI0_WAY_EN_MMAPB_S BIT(6) +#define MT8365_INFRA_TOPAXI_SI2_CTL 0x234 +#define MT8365_INFRA_TOPAXI_SI2_WAY_EN_PERI_M1 BIT(5) + +#define MT8365_SMI_COMMON_CLAMP_EN 0x3c0 +#define MT8365_SMI_COMMON_CLAMP_EN_SET 0x3c4 +#define MT8365_SMI_COMMON_CLAMP_EN_CLR 0x3c8 + #define MT8195_TOP_AXI_PROT_EN_STA1 0x228 #define MT8195_TOP_AXI_PROT_EN_1_STA1 0x258 #define MT8195_TOP_AXI_PROT_EN_SET 0x2a0 diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 2475ef914746..4885b065b849 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -62,6 +62,14 @@ enum mtk_ddp_comp_id { DDP_COMPONENT_OVL_2L1, DDP_COMPONENT_OVL_2L2, DDP_COMPONENT_OVL1, + DDP_COMPONENT_PADDING0, + DDP_COMPONENT_PADDING1, + DDP_COMPONENT_PADDING2, + DDP_COMPONENT_PADDING3, + DDP_COMPONENT_PADDING4, + DDP_COMPONENT_PADDING5, + DDP_COMPONENT_PADDING6, + DDP_COMPONENT_PADDING7, DDP_COMPONENT_POSTMASK0, DDP_COMPONENT_PWM0, DDP_COMPONENT_PWM1, diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index b2b28180dff7..a476648858a6 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -10,6 +10,7 @@ #define MTK_WED_TX_QUEUES 2 #define MTK_WED_RX_QUEUES 2 +#define MTK_WED_RX_PAGE_QUEUES 3 #define WED_WO_STA_REC 0x6 @@ -45,7 +46,7 @@ enum mtk_wed_wo_cmd { MTK_WED_WO_CMD_WED_END }; -struct mtk_rxbm_desc { +struct mtk_wed_bm_desc { __le32 buf0; __le32 token; } __packed __aligned(4); @@ -76,6 +77,11 @@ struct mtk_wed_wo_rx_stats { __le32 rx_drop_cnt; }; +struct mtk_wed_buf { + void *p; + dma_addr_t phy_addr; +}; + struct mtk_wed_device { #ifdef CONFIG_NET_MEDIATEK_SOC_WED const struct mtk_wed_ops *ops; @@ -94,17 +100,20 @@ struct mtk_wed_device { struct mtk_wed_ring txfree_ring; struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES]; + struct mtk_wed_ring ind_cmd_ring; struct { int size; - void **pages; + struct mtk_wed_buf *pages; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; } tx_buf_ring; struct { int size; - struct mtk_rxbm_desc *desc; + struct mtk_wed_bm_desc *desc; dma_addr_t desc_phys; } rx_buf_ring; @@ -114,6 +123,13 @@ struct mtk_wed_device { dma_addr_t fdbk_phys; } rro; + struct { + int size; + struct mtk_wed_buf *pages; + struct mtk_wed_bm_desc *desc; + dma_addr_t desc_phys; + } hw_rro; + /* filled by driver: */ struct { union { @@ -123,6 +139,7 @@ struct mtk_wed_device { enum mtk_wed_bus_tye bus_type; void __iomem *base; u32 phy_base; + u32 id; u32 wpdma_phys; u32 wpdma_int; @@ -131,18 +148,35 @@ struct mtk_wed_device { u32 wpdma_txfree; u32 wpdma_rx_glo; u32 wpdma_rx; + u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; + u32 wpdma_rx_pg; bool wcid_512; + bool hw_rro; + bool msi; u16 token_start; unsigned int nbuf; unsigned int rx_nbuf; unsigned int rx_npkt; unsigned int rx_size; + unsigned int amsdu_max_len; u8 tx_tbit[MTK_WED_TX_QUEUES]; u8 rx_tbit[MTK_WED_RX_QUEUES]; + u8 rro_rx_tbit[MTK_WED_RX_QUEUES]; + u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES]; u8 txfree_tbit; + u8 amsdu_max_subframes; + + struct { + u8 se_group_nums; + u16 win_size; + u16 particular_sid; + u32 ack_sn_addr; + dma_addr_t particular_se_phys; + dma_addr_t addr_elem_phys[1024]; + } ind_cmd; u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); @@ -182,6 +216,14 @@ struct mtk_wed_ops { void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev, enum tc_setup_type type, void *type_data); + void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask, + bool reset); + void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*ind_rx_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); }; extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; @@ -206,16 +248,27 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) return ret; } -static inline bool -mtk_wed_get_rx_capa(struct mtk_wed_device *dev) +static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev) { #ifdef CONFIG_NET_MEDIATEK_SOC_WED + if (dev->version == 3) + return dev->wlan.hw_rro; + return dev->version != 1; #else return false; #endif } +static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev) +{ +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + return dev->version == 3; +#else + return false; +#endif +} + #ifdef CONFIG_NET_MEDIATEK_SOC_WED #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) @@ -242,6 +295,15 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev) #define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \ (_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data) +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \ + (_dev)->ops->start_hw_rro(_dev, _mask, _reset) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \ + (_dev)->ops->ind_rx_ring_setup(_dev, _regs) + #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -261,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_stop(_dev) do {} while (0) #define mtk_wed_device_dma_reset(_dev) do {} while (0) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV #endif #endif diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index be98aebcb3e1..7161a3183eda 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -9,7 +9,7 @@ #include <dt-bindings/soc/qcom,apr.h> #include <dt-bindings/soc/qcom,gpr.h> -extern struct bus_type aprbus; +extern const struct bus_type aprbus; #define APR_HDR_LEN(hdr_len) ((hdr_len)/4) diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h index 821a19135bb6..0f038a1a0330 100644 --- a/include/linux/soc/qcom/geni-se.h +++ b/include/linux/soc/qcom/geni-se.h @@ -35,6 +35,7 @@ enum geni_se_protocol_type { GENI_SE_UART, GENI_SE_I2C, GENI_SE_I3C, + GENI_SE_SPI_SLAVE, }; struct geni_wrapper; @@ -73,12 +74,14 @@ struct geni_se { /* Common SE registers */ #define GENI_FORCE_DEFAULT_REG 0x20 +#define GENI_OUTPUT_CTRL 0x24 #define SE_GENI_STATUS 0x40 #define GENI_SER_M_CLK_CFG 0x48 #define GENI_SER_S_CLK_CFG 0x4c #define GENI_IF_DISABLE_RO 0x64 #define GENI_FW_REVISION_RO 0x68 #define SE_GENI_CLK_SEL 0x7c +#define SE_GENI_CFG_SEQ_START 0x84 #define SE_GENI_DMA_MODE_EN 0x258 #define SE_GENI_M_CMD0 0x600 #define SE_GENI_M_CMD_CTRL_REG 0x604 @@ -111,6 +114,9 @@ struct geni_se { /* GENI_FORCE_DEFAULT_REG fields */ #define FORCE_DEFAULT BIT(0) +/* GENI_OUTPUT_CTRL fields */ +#define GENI_IO_MUX_0_EN BIT(0) + /* GENI_STATUS fields */ #define M_GENI_CMD_ACTIVE BIT(0) #define S_GENI_CMD_ACTIVE BIT(12) @@ -130,6 +136,9 @@ struct geni_se { /* GENI_CLK_SEL fields */ #define CLK_SEL_MSK GENMASK(2, 0) +/* SE_GENI_CFG_SEQ_START fields */ +#define START_TRIGGER BIT(0) + /* SE_GENI_DMA_MODE_EN */ #define GENI_DMA_MODE_EN BIT(0) @@ -169,6 +178,7 @@ struct geni_se { #define M_GP_IRQ_3_EN BIT(12) #define M_GP_IRQ_4_EN BIT(13) #define M_GP_IRQ_5_EN BIT(14) +#define M_TX_FIFO_NOT_EMPTY_EN BIT(21) #define M_IO_DATA_DEASSERT_EN BIT(22) #define M_IO_DATA_ASSERT_EN BIT(23) #define M_RX_FIFO_RD_ERR_EN BIT(24) diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 93417ba1ead4..1a886666bbb6 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -30,7 +30,7 @@ #define LLCC_NPU 23 #define LLCC_WLHW 24 #define LLCC_PIMEM 25 -#define LLCC_DRE 26 +#define LLCC_ECC 26 #define LLCC_CVP 28 #define LLCC_MODPE 29 #define LLCC_APTCM 30 diff --git a/include/linux/soc/qcom/qcom-pbs.h b/include/linux/soc/qcom/qcom-pbs.h new file mode 100644 index 000000000000..8a46209ccf13 --- /dev/null +++ b/include/linux/soc/qcom/qcom-pbs.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _QCOM_PBS_H +#define _QCOM_PBS_H + +#include <linux/errno.h> +#include <linux/types.h> + +struct device_node; +struct pbs_dev; + +#if IS_ENABLED(CONFIG_QCOM_PBS) +int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap); +struct pbs_dev *get_pbs_client_device(struct device *client_dev); +#else +static inline int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap) +{ + return -ENODEV; +} + +static inline struct pbs_dev *get_pbs_client_device(struct device *client_dev) +{ + return ERR_PTR(-ENODEV); +} +#endif + +#endif diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h index 3c2a82e606f8..7361ca028752 100644 --- a/include/linux/soc/qcom/qcom_aoss.h +++ b/include/linux/soc/qcom/qcom_aoss.h @@ -13,13 +13,13 @@ struct qmp; #if IS_ENABLED(CONFIG_QCOM_AOSS_QMP) -int qmp_send(struct qmp *qmp, const void *data, size_t len); +int qmp_send(struct qmp *qmp, const char *fmt, ...); struct qmp *qmp_get(struct device *dev); void qmp_put(struct qmp *qmp); #else -static inline int qmp_send(struct qmp *qmp, const void *data, size_t len) +static inline int qmp_send(struct qmp *qmp, const char *fmt, ...) { return -ENODEV; } diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 2990f425fdef..8190878645f9 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -2,10 +2,13 @@ #ifndef __QCOM_SMD_RPM_H__ #define __QCOM_SMD_RPM_H__ +#include <linux/types.h> + struct qcom_smd_rpm; -#define QCOM_SMD_RPM_ACTIVE_STATE 0 -#define QCOM_SMD_RPM_SLEEP_STATE 1 +#define QCOM_SMD_RPM_ACTIVE_STATE 0 +#define QCOM_SMD_RPM_SLEEP_STATE 1 +#define QCOM_SMD_RPM_STATE_NUM 2 /* * Constants used for addressing resources in the RPM. @@ -44,6 +47,19 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_PKA_CLK 0x616b70 #define QCOM_SMD_RPM_MCFG_CLK 0x6766636d +#define QCOM_RPM_KEY_SOFTWARE_ENABLE 0x6e657773 +#define QCOM_RPM_KEY_PIN_CTRL_CLK_BUFFER_ENABLE_KEY 0x62636370 +#define QCOM_RPM_SMD_KEY_RATE 0x007a484b +#define QCOM_RPM_SMD_KEY_ENABLE 0x62616e45 +#define QCOM_RPM_SMD_KEY_STATE 0x54415453 +#define QCOM_RPM_SCALING_ENABLE_ID 0x2 + +struct clk_smd_rpm_req { + __le32 key; + __le32 nbytes; + __le32 value; +}; + int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, u32 resource_type, u32 resource_id, diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h index 223db6a9c733..a36a3b9d4929 100644 --- a/include/linux/soc/qcom/smem.h +++ b/include/linux/soc/qcom/smem.h @@ -4,6 +4,7 @@ #define QCOM_SMEM_HOST_ANY -1 +bool qcom_smem_is_available(void); int qcom_smem_alloc(unsigned host, unsigned item, size_t size); void *qcom_smem_get(unsigned host, unsigned item, size_t *size); diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h index a4f5516cc956..2bd9d12d9a52 100644 --- a/include/linux/soc/samsung/exynos-pmu.h +++ b/include/linux/soc/samsung/exynos-pmu.h @@ -10,6 +10,7 @@ #define __LINUX_SOC_EXYNOS_PMU_H struct regmap; +struct device_node; enum sys_powerdown { SYS_AFTR, @@ -20,12 +21,20 @@ enum sys_powerdown { extern void exynos_sys_powerdown_conf(enum sys_powerdown mode); #ifdef CONFIG_EXYNOS_PMU -extern struct regmap *exynos_get_pmu_regmap(void); +struct regmap *exynos_get_pmu_regmap(void); +struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np, + const char *propname); #else static inline struct regmap *exynos_get_pmu_regmap(void) { return ERR_PTR(-ENODEV); } + +static inline struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np, + const char *propname) +{ + return ERR_PTR(-ENODEV); +} #endif #endif /* __LINUX_SOC_EXYNOS_PMU_H */ diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 0b9ecd8cf979..110978dc9af1 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -13,6 +13,7 @@ struct nlmsghdr; struct sock; struct sock_diag_handler { + struct module *owner; __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); @@ -22,8 +23,13 @@ struct sock_diag_handler { int sock_diag_register(const struct sock_diag_handler *h); void sock_diag_unregister(const struct sock_diag_handler *h); -void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +struct sock_diag_inet_compat { + struct module *owner; + int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh); +}; + +void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr); +void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr); u64 __sock_gen_cookie(struct sock *sk); diff --git a/include/linux/socket.h b/include/linux/socket.h index 39b74d83c7c4..139c330ccf2c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 @@ -421,13 +422,6 @@ extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, struct user_msghdr __user *umsg, struct sockaddr __user *uaddr, unsigned int flags); -extern int sendmsg_copy_msghdr(struct msghdr *msg, - struct user_msghdr __user *umsg, unsigned flags, - struct iovec **iov); -extern int recvmsg_copy_msghdr(struct msghdr *msg, - struct user_msghdr __user *umsg, unsigned flags, - struct sockaddr __user **uaddr, - struct iovec **iov); extern int __copy_msghdr(struct msghdr *kmsg, struct user_msghdr *umsg, struct sockaddr __user **save_addr); diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index bae5e2369b4f..317200cd3a60 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -50,11 +50,59 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, return 0; } +/* Deprecated. + * This is unsafe, unless caller checked user provided optlen. + * Prefer copy_safe_from_sockptr() instead. + */ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) { return copy_from_sockptr_offset(dst, src, 0, size); } +/** + * copy_safe_from_sockptr: copy a struct from sockptr + * @dst: Destination address, in kernel space. This buffer must be @ksize + * bytes long. + * @ksize: Size of @dst struct. + * @optval: Source address. (in user or kernel space) + * @optlen: Size of @optval data. + * + * Returns: + * * -EINVAL: @optlen < @ksize + * * -EFAULT: access to userspace failed. + * * 0 : @ksize bytes were copied + */ +static inline int copy_safe_from_sockptr(void *dst, size_t ksize, + sockptr_t optval, unsigned int optlen) +{ + if (optlen < ksize) + return -EINVAL; + return copy_from_sockptr(dst, optval, ksize); +} + +static inline int copy_struct_from_sockptr(void *dst, size_t ksize, + sockptr_t src, size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + if (!sockptr_is_kernel(src)) + return copy_struct_from_user(dst, ksize, src.user, size); + + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + char *p = src.kernel; + + while (rest--) { + if (*p++) + return -E2BIG; + } + } + memcpy(dst, src.kernel, size); + return 0; +} + static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, const void *src, size_t size) { diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index f523ceabd059..66f814b63a43 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -6,6 +6,8 @@ #include <linux/bug.h> #include <linux/lockdep_types.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/mod_devicetable.h> #include <linux/bitfield.h> @@ -370,6 +372,7 @@ struct sdw_dpn_prop { * @clock_reg_supported: the Peripheral implements the clock base and scale * registers introduced with the SoundWire 1.2 specification. SDCA devices * do not need to set this boolean property as the registers are required. + * @use_domain_irq: call actual IRQ handler on slave, as well as callback */ struct sdw_slave_prop { u32 mipi_revision; @@ -394,6 +397,7 @@ struct sdw_slave_prop { u8 scp_int1_mask; u32 quirks; bool clock_reg_supported; + bool use_domain_irq; }; #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) @@ -482,6 +486,11 @@ struct sdw_slave_id { __u8 sdw_version:4; }; +struct sdw_extended_slave_id { + int link_id; + struct sdw_slave_id id; +}; + /* * Helper macros to extract the MIPI-defined IDs * @@ -641,6 +650,7 @@ struct sdw_slave_ops { * struct sdw_slave - SoundWire Slave * @id: MIPI device ID * @dev: Linux device + * @irq: IRQ number * @status: Status reported by the Slave * @bus: Bus handle * @prop: Slave properties @@ -670,6 +680,7 @@ struct sdw_slave_ops { struct sdw_slave { struct sdw_slave_id id; struct device dev; + int irq; enum sdw_slave_status status; struct sdw_bus *bus; struct sdw_slave_prop prop; @@ -847,6 +858,8 @@ struct sdw_defer { * @post_bank_switch: Callback for post bank switch * @read_ping_status: Read status from PING frames, reported with two bits per Device. * Bits 31:24 are reserved. + * @get_device_num: Callback for vendor-specific device_number allocation + * @put_device_num: Callback for vendor-specific device_number release * @new_peripheral_assigned: Callback to handle enumeration of new peripheral. */ struct sdw_master_ops { @@ -862,14 +875,19 @@ struct sdw_master_ops { int (*pre_bank_switch)(struct sdw_bus *bus); int (*post_bank_switch)(struct sdw_bus *bus); u32 (*read_ping_status)(struct sdw_bus *bus); - void (*new_peripheral_assigned)(struct sdw_bus *bus, int dev_num); + int (*get_device_num)(struct sdw_bus *bus, struct sdw_slave *slave); + void (*put_device_num)(struct sdw_bus *bus, struct sdw_slave *slave); + void (*new_peripheral_assigned)(struct sdw_bus *bus, + struct sdw_slave *slave, + int dev_num); }; /** * struct sdw_bus - SoundWire bus * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. * @md: Master device - * @link_id: Link id number, can be 0 to N, unique for each Master + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller * @id: bus system-wide unique id * @slaves: list of Slaves on this bus * @assigned: Bitmap for Slave device numbers. @@ -885,6 +903,7 @@ struct sdw_master_ops { * is used to compute and program bus bandwidth, clock, frame shape, * transport and port parameters * @debugfs: Bus debugfs + * @domain: IRQ domain * @defer_msg: Defer message * @clk_stop_timeout: Clock stop timeout computed * @bank_switch_timeout: Bank switch timeout computed @@ -896,13 +915,11 @@ struct sdw_master_ops { * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. - * @dev_num_ida_min: if set, defines the minimum values for the IDA - * used to allocate system-unique device numbers. This value needs to be - * identical across all SoundWire bus in the system. */ struct sdw_bus { struct device *dev; struct sdw_master_device *md; + int controller_id; unsigned int link_id; int id; struct list_head slaves; @@ -920,12 +937,13 @@ struct sdw_bus { #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; #endif + struct irq_chip irq_chip; + struct irq_domain *domain; struct sdw_defer defer_msg; unsigned int clk_stop_timeout; u32 bank_switch_timeout; bool multi_link; int hw_sync_min_links; - int dev_num_ida_min; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, @@ -1024,7 +1042,7 @@ int sdw_compute_params(struct sdw_bus *bus); int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream); int sdw_stream_remove_master(struct sdw_bus *bus, @@ -1046,7 +1064,7 @@ void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id int sdw_stream_add_slave(struct sdw_slave *slave, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream); int sdw_stream_remove_slave(struct sdw_slave *slave, @@ -1068,7 +1086,7 @@ int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); static inline int sdw_stream_add_slave(struct sdw_slave *slave, struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, + const struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream) { diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index ceecad74aef9..28a4eb77717f 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -1,11 +1,12 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* - * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2023-24 Advanced Micro Devices, Inc. All rights reserved. */ #ifndef __SDW_AMD_H #define __SDW_AMD_H +#include <linux/acpi.h> #include <linux/soundwire/sdw.h> /* AMD pm_runtime quirk definitions */ @@ -25,6 +26,7 @@ #define AMD_SDW_POWER_OFF_MODE 2 #define ACP_SDW0 0 #define ACP_SDW1 1 +#define AMD_SDW_MAX_MANAGER_COUNT 2 struct acp_sdw_pdata { u16 instance; @@ -32,12 +34,6 @@ struct acp_sdw_pdata { struct mutex *acp_sdw_lock; }; -struct sdw_manager_reg_mask { - u32 sw_pad_enable_mask; - u32 sw_pad_pulldown_mask; - u32 acp_sdw_intr_mask; -}; - /** * struct sdw_amd_dai_runtime: AMD sdw dai runtime data * @@ -59,10 +55,8 @@ struct sdw_amd_dai_runtime { * @dev: linux device * @mmio: SoundWire registers mmio base * @acp_mmio: acp registers mmio base - * @reg_mask: register mask structure per manager instance * @amd_sdw_irq_thread: SoundWire manager irq workqueue * @amd_sdw_work: peripheral status work queue - * @probe_work: SoundWire manager probe workqueue * @acp_sdw_lock: mutex to protect acp share register access * @status: peripheral devices status array * @num_din_ports: number of input ports @@ -83,10 +77,8 @@ struct amd_sdw_manager { void __iomem *mmio; void __iomem *acp_mmio; - struct sdw_manager_reg_mask *reg_mask; struct work_struct amd_sdw_irq_thread; struct work_struct amd_sdw_work; - struct work_struct probe_work; /* mutex to protect acp common register access */ struct mutex *acp_sdw_lock; @@ -106,4 +98,71 @@ struct amd_sdw_manager { struct sdw_amd_dai_runtime **dai_runtime_array; }; + +/** + * struct sdw_amd_acpi_info - Soundwire AMD information found in ACPI tables + * @handle: ACPI controller handle + * @count: maximum no of soundwire manager links supported on AMD platform. + * @link_mask: bit-wise mask listing links enabled by BIOS menu + */ +struct sdw_amd_acpi_info { + acpi_handle handle; + int count; + u32 link_mask; +}; + +/** + * struct sdw_amd_ctx - context allocated by the controller driver probe + * + * @count: link count + * @num_slaves: total number of devices exposed across all enabled links + * @link_mask: bit-wise mask listing SoundWire links reported by the + * Controller + * @ids: array of slave_id, representing Slaves exposed across all enabled + * links + * @pdev: platform device structure + */ +struct sdw_amd_ctx { + int count; + int num_slaves; + u32 link_mask; + struct sdw_extended_slave_id *ids; + struct platform_device *pdev[AMD_SDW_MAX_MANAGER_COUNT]; +}; + +/** + * struct sdw_amd_res - Soundwire AMD global resource structure, + * typically populated by the DSP driver/Legacy driver + * + * @addr: acp pci device resource start address + * @reg_range: ACP register range + * @link_mask: bit-wise mask listing links selected by the DSP driver/ + * legacy driver + * @count: link count + * @mmio_base: mmio base of SoundWire registers + * @handle: ACPI parent handle + * @parent: parent device + * @dev: device implementing hwparams and free callbacks + * @acp_lock: mutex protecting acp common registers access + */ +struct sdw_amd_res { + u32 addr; + u32 reg_range; + u32 link_mask; + int count; + void __iomem *mmio_base; + acpi_handle handle; + struct device *parent; + struct device *dev; + /* use to protect acp common registers access */ + struct mutex *acp_lock; +}; + +int sdw_amd_probe(struct sdw_amd_res *res, struct sdw_amd_ctx **ctx); + +void sdw_amd_exit(struct sdw_amd_ctx *ctx); + +int sdw_amd_get_slave_info(struct sdw_amd_ctx *ctx); + +int amd_sdw_scan_controller(struct sdw_amd_acpi_info *info); #endif diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 11fc88fb0d78..00bb22d96ae5 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -264,11 +264,6 @@ struct sdw_intel_link_dev; */ #define SDW_INTEL_CLK_STOP_BUS_RESET BIT(3) -struct sdw_intel_slave_id { - int link_id; - struct sdw_slave_id id; -}; - struct hdac_bus; /** @@ -298,7 +293,7 @@ struct sdw_intel_ctx { int num_slaves; acpi_handle handle; struct sdw_intel_link_dev **ldev; - struct sdw_intel_slave_id *ids; + struct sdw_extended_slave_id *ids; struct list_head link_list; struct mutex shim_lock; /* lock for access to shared SHIM registers */ u32 shim_mask; @@ -433,4 +428,11 @@ struct sdw_intel_hw_ops { extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops; extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops; +/* + * IDA min selected to allow for 5 unconstrained devices per link, + * and 6 system-unique Device Numbers for wake-capable devices. + */ + +#define SDW_INTEL_DEV_NUM_IDA_MIN 6 + #endif diff --git a/include/linux/soundwire/sdw_type.h b/include/linux/soundwire/sdw_type.h index d8c27f1e5559..693320b4f5c2 100644 --- a/include/linux/soundwire/sdw_type.h +++ b/include/linux/soundwire/sdw_type.h @@ -4,9 +4,9 @@ #ifndef __SOUNDWIRE_TYPES_H #define __SOUNDWIRE_TYPES_H -extern struct bus_type sdw_bus_type; -extern struct device_type sdw_slave_type; -extern struct device_type sdw_master_type; +extern const struct bus_type sdw_bus_type; +extern const struct device_type sdw_slave_type; +extern const struct device_type sdw_master_type; static inline int is_sdw_slave(const struct device *dev) { diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 4658e7801b42..ca2cd4e30ead 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -5,6 +5,7 @@ #ifndef __LINUX_SPI_PXA2XX_SPI_H #define __LINUX_SPI_PXA2XX_SPI_H +#include <linux/dmaengine.h> #include <linux/types.h> #include <linux/pxa2xx_ssp.h> @@ -19,10 +20,10 @@ struct pxa2xx_spi_controller { u16 num_chipselect; u8 enable_dma; u8 dma_burst_size; - bool is_slave; + bool is_target; /* DMA engine specific config */ - bool (*dma_filter)(struct dma_chan *chan, void *param); + dma_filter_fn dma_filter; void *tx_param; void *rx_param; @@ -31,7 +32,7 @@ struct pxa2xx_spi_controller { }; /* - * The controller specific data for SPI slave devices + * The controller specific data for SPI target devices * (resides in spi_board_info.controller_data), * copied to spi_device.platform_data ... mostly for * DMA tuning. diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h index dc2a0cbd210d..f950d280461b 100644 --- a/include/linux/spi/sh_msiof.h +++ b/include/linux/spi/sh_msiof.h @@ -3,8 +3,8 @@ #define __SPI_SH_MSIOF_H__ enum { - MSIOF_SPI_MASTER, - MSIOF_SPI_SLAVE, + MSIOF_SPI_HOST, + MSIOF_SPI_TARGET, }; struct sh_msiof_spi_info { diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 6b0a7dc48a4b..f866d5c8ed32 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -233,6 +233,8 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem) * limitations) * @supports_op: check if an operation is supported by the controller * @exec_op: execute a SPI memory operation + * not all driver provides supports_op(), so it can return -EOPNOTSUPP + * if the op is not supported by the driver/controller * @get_name: get a custom name for the SPI mem device from the controller. * This might be needed if the controller driver has been ported * to use the SPI mem layer and a custom name is used to keep diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 32c94eae8926..c459809efee4 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -6,18 +6,22 @@ #ifndef __LINUX_SPI_H #define __LINUX_SPI_H +#include <linux/acpi.h> #include <linux/bits.h> +#include <linux/completion.h> #include <linux/device.h> -#include <linux/mod_devicetable.h> -#include <linux/slab.h> +#include <linux/gpio/consumer.h> #include <linux/kthread.h> -#include <linux/completion.h> +#include <linux/mod_devicetable.h> +#include <linux/overflow.h> #include <linux/scatterlist.h> -#include <linux/gpio/consumer.h> +#include <linux/slab.h> +#include <linux/u64_stats_sync.h> #include <uapi/linux/spi/spi.h> -#include <linux/acpi.h> -#include <linux/u64_stats_sync.h> + +/* Max no. of CS supported per spi device */ +#define SPI_CS_CNT_MAX 16 struct dma_chan; struct software_node; @@ -32,11 +36,11 @@ struct spi_message; * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, * and SPI infrastructure. */ -extern struct bus_type spi_bus_type; +extern const struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers - * @syncp: seqcount to protect members in this struct for per-cpu udate + * @syncp: seqcount to protect members in this struct for per-cpu update * on 32-bit systems * * @messages: number of spi-messages handled @@ -55,7 +59,7 @@ extern struct bus_type spi_bus_type; * @bytes_rx: number of bytes received from device * * @transfer_bytes_histo: - * transfer bytes histogramm + * transfer bytes histogram * * @transfers_split_maxsize: * number of transfers that have been split because of @@ -127,11 +131,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * struct spi_device - Controller side proxy for an SPI slave device * @dev: Driver model representation of the device. * @controller: SPI controller used with the device. - * @master: Copy of controller, for backwards compatibility. * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. - * @chip_select: Chipselect, distinguishing chips handled by @controller. + * @chip_select: Array of physical chipselect, spi->chipselect[i] gives + * the corresponding physical CS for logical CS i. * @mode: The spi mode defines how data is clocked out and in. * This may be changed by the device's driver. * The "active low" default for chipselect mode can be overridden @@ -156,8 +160,8 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. - * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when - * not using a GPIO line) + * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines + * (optional, NULL when not using a GPIO line) * @word_delay: delay to be inserted between consecutive * words of a transfer * @cs_setup: delay to be introduced by the controller after CS is asserted @@ -166,6 +170,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. * @pcpu_statistics: statistics for the spi_device + * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array * * A @spi_device is used to interchange data between an SPI slave * (usually a discrete chip) and CPU memory. @@ -179,9 +184,8 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_device { struct device dev; struct spi_controller *controller; - struct spi_controller *master; /* Compatibility layer */ u32 max_speed_hz; - u8 chip_select; + u8 chip_select[SPI_CS_CNT_MAX]; u8 bits_per_word; bool rt; #define SPI_NO_TX BIT(31) /* No transmit wire */ @@ -212,7 +216,7 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - struct gpio_desc *cs_gpiod; /* Chip select gpio desc */ + struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ struct spi_delay word_delay; /* Inter-word delay */ /* CS delays */ struct spi_delay cs_setup; @@ -222,8 +226,15 @@ struct spi_device { /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; + /* Bit mask of the chipselect(s) that the driver need to use from + * the chipselect array.When the controller is capable to handle + * multiple chip selects & memories are connected in parallel + * then more than one bit need to be set in cs_index_mask. + */ + u32 cs_index_mask : SPI_CS_CNT_MAX; + /* - * likely need more hooks for more protocol options affecting how + * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: * - memory packing (12 bit samples into low bits, others zeroed) * - priority @@ -278,32 +289,43 @@ static inline void *spi_get_drvdata(const struct spi_device *spi) static inline u8 spi_get_chipselect(const struct spi_device *spi, u8 idx) { - return spi->chip_select; + return spi->chip_select[idx]; } static inline void spi_set_chipselect(struct spi_device *spi, u8 idx, u8 chipselect) { - spi->chip_select = chipselect; + spi->chip_select[idx] = chipselect; } static inline struct gpio_desc *spi_get_csgpiod(const struct spi_device *spi, u8 idx) { - return spi->cs_gpiod; + return spi->cs_gpiod[idx]; } static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_desc *csgpiod) { - spi->cs_gpiod = csgpiod; + spi->cs_gpiod[idx] = csgpiod; +} + +static inline bool spi_is_csgpiod(struct spi_device *spi) +{ + u8 idx; + + for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { + if (spi_get_csgpiod(spi, idx)) + return true; + } + return false; } /** * struct spi_driver - Host side "protocol" driver * @id_table: List of SPI devices supported by this driver - * @probe: Binds this driver to the spi device. Drivers can verify + * @probe: Binds this driver to the SPI device. Drivers can verify * that the device is actually present, and may need to configure * characteristics (such as bits_per_word) which weren't needed for * the initial configuration done during system setup. - * @remove: Unbinds this driver from the spi device + * @remove: Unbinds this driver from the SPI device * @shutdown: Standard shutdown callback used during system state * transitions such as powerdown/halt and kexec * @driver: SPI device drivers should initialize the name and owner @@ -415,7 +437,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @queued: whether this controller is providing an internal message queue * @kworker: pointer to thread struct for message pump * @pump_messages: work struct for scheduling work to the message pump - * @queue_lock: spinlock to syncronise access to message queue + * @queue_lock: spinlock to synchronise access to message queue * @queue: message queue * @cur_msg: the currently in-flight message * @cur_msg_completion: a completion for the current in-flight message @@ -426,9 +448,11 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * the @cur_msg_completion. This flag is used to signal the context that * is running spi_finalize_current_message() that it needs to complete() * @cur_msg_mapped: message has been mapped for DMA + * @fallback: fallback to PIO if DMA transfer return failure with + * SPI_TRANS_FAIL_NO_START. + * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected - * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy * @running: message pump is running @@ -451,6 +475,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * * @set_cs: set the logic level of the chip select line. May be called * from interrupt context. + * @optimize_message: optimize the message for reuse + * @unoptimize_message: release resources allocated by optimize_message * @prepare_message: set up the controller to transfer a single message, * for example doing DMA mapping. Called from threaded * context. @@ -460,10 +486,13 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * - return 1 if the transfer is still in progress. When * the driver is finished with this transfer it must * call spi_finalize_current_transfer() so the subsystem - * can issue the next transfer. Note: transfer_one and - * transfer_one_message are mutually exclusive; when both - * are set, the generic subsystem does not call your - * transfer_one callback. + * can issue the next transfer. If the transfer fails, the + * driver must set the flag SPI_TRANS_FAIL_IO to + * spi_transfer->error first, before calling + * spi_finalize_current_transfer(). + * Note: transfer_one and transfer_one_message are mutually + * exclusive; when both are set, the generic subsystem does + * not call your transfer_one callback. * @handle_err: the subsystem calls the driver to handle an error that occurs * in the generic implementation of transfer_one_message(). * @mem_ops: optimized/dedicated operations for interactions with SPI memory. @@ -473,7 +502,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller * @target_abort: abort the ongoing transfer request on an SPI target controller - * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS + * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab @@ -500,8 +529,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. * @irq_flags: Interrupt enable state during PTP system timestamping - * @fallback: fallback to pio if dma transfer return failure with - * SPI_TRANS_FAIL_NO_START. * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core @@ -522,15 +549,17 @@ struct spi_controller { struct list_head list; - /* Other than negative (== assign one dynamically), bus_num is fully - * board-specific. usually that simplifies to being SOC-specific. - * example: one SOC has three SPI controllers, numbered 0..2, - * and one board's schematics might show it using SPI-2. software + /* + * Other than negative (== assign one dynamically), bus_num is fully + * board-specific. Usually that simplifies to being SoC-specific. + * example: one SoC has three SPI controllers, numbered 0..2, + * and one board's schematics might show it using SPI-2. Software * would normally use bus_num=2 for that controller. */ s16 bus_num; - /* chipselects will be integral to many controllers; some others + /* + * Chipselects will be integral to many controllers; some others * might use board-specific GPIOs. */ u16 num_chipselect; @@ -562,8 +591,13 @@ struct spi_controller { #define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ - -#define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ + /* + * The spi-controller has multi chip select capability and can + * assert/de-assert more than one chip select at once. + */ +#define SPI_CONTROLLER_MULTI_CS BIT(7) /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; @@ -576,8 +610,8 @@ struct spi_controller { }; /* - * on some hardware transfer / message size may be constrained - * the limit may depend on device transfer settings + * On some hardware transfer / message size may be constrained + * the limit may depend on device transfer settings. */ size_t (*max_transfer_size)(struct spi_device *spi); size_t (*max_message_size)(struct spi_device *spi); @@ -595,7 +629,8 @@ struct spi_controller { /* Flag indicating that the SPI bus is locked for exclusive use */ bool bus_lock_flag; - /* Setup mode and clock, etc (spi driver may call many times). + /* + * Setup mode and clock, etc (SPI driver may call many times). * * IMPORTANT: this may be called when transfers to another * device are active. DO NOT UPDATE SHARED REGISTERS in ways @@ -613,18 +648,19 @@ struct spi_controller { */ int (*set_cs_timing)(struct spi_device *spi); - /* Bidirectional bulk transfers + /* + * Bidirectional bulk transfers * * + The transfer() method may not sleep; its main role is * just to add the message to the queue. * + For now there's no remove-from-queue operation, or * any other request management - * + To a given spi_device, message queueing is pure fifo + * + To a given spi_device, message queueing is pure FIFO * * + The controller's main job is to process its message queue, * selecting a chip (for masters), then transferring data * + If there are multiple spi_device children, the i/o queue - * arbitration algorithm is unspecified (round robin, fifo, + * arbitration algorithm is unspecified (round robin, FIFO, * priority, reservations, preemption, etc) * * + Chipselect stays active during the entire message @@ -672,12 +708,15 @@ struct spi_controller { bool rt; bool auto_runtime_pm; bool cur_msg_mapped; - char last_cs; - bool last_cs_mode_high; bool fallback; + bool last_cs_mode_high; + s8 last_cs[SPI_CS_CNT_MAX]; + u32 last_cs_index_mask : SPI_CS_CNT_MAX; struct completion xfer_completion; size_t max_dma_len; + int (*optimize_message)(struct spi_message *msg); + int (*unoptimize_message)(struct spi_message *msg); int (*prepare_transfer_hardware)(struct spi_controller *ctlr); int (*transfer_one_message)(struct spi_controller *ctlr, struct spi_message *mesg); @@ -705,7 +744,7 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; - /* gpio chip select */ + /* GPIO chip select */ struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; s8 unused_native_cs; @@ -789,7 +828,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); -/* The spi driver core manages memory for the spi_controller classdev */ +/* The SPI driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); @@ -863,6 +902,7 @@ extern int devm_spi_register_controller(struct device *dev, extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) +extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); @@ -878,13 +918,13 @@ typedef void (*spi_res_release_t)(struct spi_controller *ctlr, void *res); /** - * struct spi_res - spi resource management structure + * struct spi_res - SPI resource management structure * @entry: list entry * @release: release code called prior to freeing this resource * @data: extra data allocated for the specific use-case * - * this is based on ideas from devres, but focused on life-cycle - * management during spi_message processing + * This is based on ideas from devres, but focused on life-cycle + * management during spi_message processing. */ struct spi_res { struct list_head entry; @@ -902,7 +942,7 @@ struct spi_res { * * The spi_messages themselves consist of a series of read+write transfer * segments. Those segments always read the same number of bits as they - * write; but one or the other is easily ignored by passing a null buffer + * write; but one or the other is easily ignored by passing a NULL buffer * pointer. (This is unlike most types of I/O API, because SPI hardware * is full duplex.) * @@ -913,8 +953,8 @@ struct spi_res { /** * struct spi_transfer - a read/write buffer pair - * @tx_buf: data to be written (dma-safe memory), or NULL - * @rx_buf: data to be read (dma-safe memory), or NULL + * @tx_buf: data to be written (DMA-safe memory), or NULL + * @rx_buf: data to be read (DMA-safe memory), or NULL * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped * @tx_nbits: number of bits used for writing. If 0 the default @@ -937,7 +977,7 @@ struct spi_res { * @word_delay: inter word delay to be introduced after each word size * (set by bits_per_word) transmission. * @effective_speed_hz: the effective SCK-speed that was used to - * transfer this transfer. Set to 0 if the spi bus driver does + * transfer this transfer. Set to 0 if the SPI bus driver does * not support it. * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg: Scatterlist for transmit, currently not for client use @@ -966,16 +1006,16 @@ struct spi_res { * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. * @timestamped: true if the transfer has been timestamped - * @error: Error status logged by spi controller driver. + * @error: Error status logged by SPI controller driver. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. * In some cases, they may also want to provide DMA addresses for * the data being transferred; that may reduce overhead, when the - * underlying driver uses dma. + * underlying driver uses DMA. * - * If the transmit buffer is null, zeroes will be shifted out - * while filling @rx_buf. If the receive buffer is null, the data + * If the transmit buffer is NULL, zeroes will be shifted out + * while filling @rx_buf. If the receive buffer is NULL, the data * shifted in will be discarded. Only "len" bytes shift out (or in). * It's an error to try to shift out a partial word. (For example, by * shifting out three bytes with word size of sixteen or twenty bits; @@ -1009,7 +1049,7 @@ struct spi_res { * Some devices need protocol transactions to be built from a series of * spi_message submissions, where the content of one message is determined * by the results of previous messages and where the whole transaction - * ends when the chipselect goes intactive. + * ends when the chipselect goes inactive. * * When SPI can transfer in 1x,2x or 4x. It can get this transfer information * from device through @tx_nbits and @rx_nbits. In Bi-direction, these @@ -1023,16 +1063,18 @@ struct spi_res { * and its transfers, ignore them until its completion callback. */ struct spi_transfer { - /* It's ok if tx_buf == rx_buf (right?) - * for MicroWire, one buffer must be null - * buffers must work with dma_*map_single() calls, unless - * spi_message.is_dma_mapped reports a pre-existing mapping + /* + * It's okay if tx_buf == rx_buf (right?). + * For MicroWire, one buffer must be NULL. + * Buffers must work with dma_*map_single() calls, unless + * spi_message.is_dma_mapped reports a pre-existing mapping. */ const void *tx_buf; void *rx_buf; unsigned len; #define SPI_TRANS_FAIL_NO_START BIT(0) +#define SPI_TRANS_FAIL_IO BIT(1) u16 error; dma_addr_t tx_dma; @@ -1046,9 +1088,9 @@ struct spi_transfer { unsigned tx_nbits:3; unsigned rx_nbits:3; unsigned timestamped:1; -#define SPI_NBITS_SINGLE 0x01 /* 1bit transfer */ -#define SPI_NBITS_DUAL 0x02 /* 2bits transfer */ -#define SPI_NBITS_QUAD 0x04 /* 4bits transfer */ +#define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ +#define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ +#define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; @@ -1069,18 +1111,21 @@ struct spi_transfer { * struct spi_message - one multi-segment SPI transaction * @transfers: list of transfer segments in this transaction * @spi: SPI device to which the transaction is queued - * @is_dma_mapped: if true, the caller provided both dma and cpu virtual + * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual * addresses for each transfer buffer + * @pre_optimized: peripheral driver pre-optimized the message + * @optimized: the message is in the optimized state + * @prepared: spi_prepare_message was called for the this message + * @status: zero for success, else negative errno * @complete: called to report transaction completions * @context: the argument to complete() when it's called * @frame_length: the total number of bytes in the message * @actual_length: the total number of bytes that were transferred in all * successful segments - * @status: zero for success, else negative errno * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message - * @resources: for resource management when the spi message is processed - * @prepared: spi_prepare_message was called for the this message + * @opt_state: for use by whichever driver currently owns the message + * @resources: for resource management when the SPI message is processed * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1103,10 +1148,16 @@ struct spi_message { unsigned is_dma_mapped:1; + /* spi_optimize_message() was called for this message */ + bool pre_optimized; + /* __spi_optimize_message() was called for this message */ + bool optimized; + /* spi_prepare_message() was called for this message */ bool prepared; - /* REVISIT: we might want a flag affecting the behavior of the + /* + * REVISIT: we might want a flag affecting the behavior of the * last transfer ... allowing things like "read 16 bit length L" * immediately followed by "read L bytes". Basically imposing * a specific message scheduling algorithm. @@ -1124,14 +1175,20 @@ struct spi_message { unsigned frame_length; unsigned actual_length; - /* For optional use by whatever driver currently owns the + /* + * For optional use by whatever driver currently owns the * spi_message ... between calls to spi_async and then later * complete(), that's the spi_controller controller driver. */ struct list_head queue; void *state; + /* + * Optional state for use by controller driver between calls to + * __spi_optimize_message() and __spi_unoptimize_message(). + */ + void *opt_state; - /* List of spi_res reources when the spi message is processed */ + /* List of spi_res resources when the SPI message is processed */ struct list_head resources; }; @@ -1168,7 +1225,7 @@ spi_transfer_delay_exec(struct spi_transfer *t) /** * spi_message_init_with_transfers - Initialize spi_message and append transfers * @m: spi_message to be initialized - * @xfers: An array of spi transfers + * @xfers: An array of SPI transfers * @num_xfers: Number of items in the xfer array * * This function initializes the given spi_message and adds each spi_transfer in @@ -1185,26 +1242,27 @@ struct spi_transfer *xfers, unsigned int num_xfers) spi_message_add_tail(&xfers[i], m); } -/* It's fine to embed message and transaction structures in other data +/* + * It's fine to embed message and transaction structures in other data * structures so long as you don't free them while they're in use. */ - static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags) { - struct spi_message *m; - - m = kzalloc(sizeof(struct spi_message) - + ntrans * sizeof(struct spi_transfer), - flags); - if (m) { - unsigned i; - struct spi_transfer *t = (struct spi_transfer *)(m + 1); - - spi_message_init_no_memset(m); - for (i = 0; i < ntrans; i++, t++) - spi_message_add_tail(t, m); - } - return m; + struct spi_message_with_transfers { + struct spi_message m; + struct spi_transfer t[]; + } *mwt; + unsigned i; + + mwt = kzalloc(struct_size(mwt, t, ntrans), flags); + if (!mwt) + return NULL; + + spi_message_init_no_memset(&mwt->m); + for (i = 0; i < ntrans; i++) + spi_message_add_tail(&mwt->t[i], &mwt->m); + + return &mwt->m; } static inline void spi_message_free(struct spi_message *m) @@ -1212,6 +1270,9 @@ static inline void spi_message_free(struct spi_message *m) kfree(m); } +extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg); +extern void spi_unoptimize_message(struct spi_message *msg); + extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_slave_abort(struct spi_device *spi); @@ -1253,7 +1314,7 @@ spi_max_transfer_size(struct spi_device *spi) */ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) { - u32 bpw_mask = spi->master->bits_per_word_mask; + u32 bpw_mask = spi->controller->bits_per_word_mask; if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw))) return true; @@ -1291,7 +1352,7 @@ typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr, * replacements that have occurred * so that they can get reverted * @release: some extra release code to get executed prior to - * relasing this structure + * releasing this structure * @extradata: pointer to some extra data if requested or NULL * @replaced_transfers: transfers that have been replaced and which need * to get restored @@ -1301,9 +1362,9 @@ typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr, * @inserted_transfers: array of spi_transfers of array-size @inserted, * that have been replacing replaced_transfers * - * note: that @extradata will point to @inserted_transfers[@inserted] + * Note: that @extradata will point to @inserted_transfers[@inserted] * if some extra allocation is requested, so alignment will be the same - * as for spi_transfers + * as for spi_transfers. */ struct spi_replaced_transfers { spi_replaced_release_t release; @@ -1320,16 +1381,15 @@ struct spi_replaced_transfers { extern int spi_split_transfers_maxsize(struct spi_controller *ctlr, struct spi_message *msg, - size_t maxsize, - gfp_t gfp); + size_t maxsize); extern int spi_split_transfers_maxwords(struct spi_controller *ctlr, struct spi_message *msg, - size_t maxwords, - gfp_t gfp); + size_t maxwords); /*---------------------------------------------------------------------------*/ -/* All these synchronous SPI transfer routines are utilities layered +/* + * All these synchronous SPI transfer routines are utilities layered * over the core async transfer primitive. Here, "synchronous" means * they will sleep uninterruptibly until the async transfer completes. */ @@ -1472,7 +1532,7 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) * * Callable only from contexts that can sleep. * - * Return: the (unsigned) sixteen bit number returned by the device in cpu + * Return: the (unsigned) sixteen bit number returned by the device in CPU * endianness, or else a negative error code. */ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) @@ -1500,7 +1560,7 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) * As a rule, SPI devices can't be probed. Instead, board init code * provides a table listing the devices which are present, with enough * information to bind and set up the device's driver. There's basic - * support for nonstatic configurations too; enough to handle adding + * support for non-static configurations too; enough to handle adding * parport adapters, or microcontrollers acting as USB-to-SPI bridges. */ @@ -1537,12 +1597,13 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) * are active in some dynamic board configuration models. */ struct spi_board_info { - /* The device name and module name are coupled, like platform_bus; + /* + * The device name and module name are coupled, like platform_bus; * "modalias" is normally the driver name. * * platform_data goes to spi_device.dev.platform_data, * controller_data goes to spi_device.controller_data, - * irq is copied too + * IRQ is copied too. */ char modalias[SPI_NAME_SIZE]; const void *platform_data; @@ -1554,7 +1615,8 @@ struct spi_board_info { u32 max_speed_hz; - /* bus_num is board specific and matches the bus_num of some + /* + * bus_num is board specific and matches the bus_num of some * spi_controller that will probably be registered later. * * chip_select reflects how this chip is wired to that master; @@ -1563,12 +1625,14 @@ struct spi_board_info { u16 bus_num; u16 chip_select; - /* mode becomes spi_device.mode, and is essential for chips + /* + * mode becomes spi_device.mode, and is essential for chips * where the default of SPI_CS_HIGH = 0 is wrong. */ u32 mode; - /* ... may need additional spi_device chip config data here. + /* + * ... may need additional spi_device chip config data here. * avoid stuff protocol drivers can set; but include stuff * needed to behave without being bound to a driver: * - quirks like clock rate mattering when not selected @@ -1585,7 +1649,8 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) { return 0; } #endif -/* If you're hotplugging an adapter with devices (parport, usb, etc) +/* + * If you're hotplugging an adapter with devices (parport, USB, etc) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). @@ -1619,26 +1684,4 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } -/* Compatibility layer */ -#define spi_master spi_controller - -#define SPI_MASTER_HALF_DUPLEX SPI_CONTROLLER_HALF_DUPLEX -#define SPI_MASTER_NO_RX SPI_CONTROLLER_NO_RX -#define SPI_MASTER_NO_TX SPI_CONTROLLER_NO_TX -#define SPI_MASTER_MUST_RX SPI_CONTROLLER_MUST_RX -#define SPI_MASTER_MUST_TX SPI_CONTROLLER_MUST_TX - -#define spi_master_get_devdata(_ctlr) spi_controller_get_devdata(_ctlr) -#define spi_master_set_devdata(_ctlr, _data) \ - spi_controller_set_devdata(_ctlr, _data) -#define spi_master_get(_ctlr) spi_controller_get(_ctlr) -#define spi_master_put(_ctlr) spi_controller_put(_ctlr) -#define spi_master_suspend(_ctlr) spi_controller_suspend(_ctlr) -#define spi_master_resume(_ctlr) spi_controller_resume(_ctlr) - -#define spi_register_master(_ctlr) spi_register_controller(_ctlr) -#define devm_spi_register_master(_dev, _ctlr) \ - devm_spi_register_controller(_dev, _ctlr) -#define spi_unregister_master(_ctlr) spi_unregister_controller(_ctlr) - #endif /* __LINUX_SPI_H */ diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 4444c2a992cb..b930eca2ef7b 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -10,7 +10,7 @@ struct spi_bitbang { u8 use_dma; u16 flags; /* extra spi->mode support */ - struct spi_master *master; + struct spi_controller *ctlr; /* setup_transfer() changes clock and/or wordsize to match settings * for this transfer; zeroes restore defaults from spi_device. diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h index 9e7e83d8645b..5f0e1407917a 100644 --- a/include/linux/spi/spi_gpio.h +++ b/include/linux/spi/spi_gpio.h @@ -15,8 +15,8 @@ */ /** - * struct spi_gpio_platform_data - parameter for bitbanged SPI master - * @num_chipselect: how many slaves to allow + * struct spi_gpio_platform_data - parameter for bitbanged SPI host controller + * @num_chipselect: how many target devices to allow */ struct spi_gpio_platform_data { u16 num_chipselect; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 31d3d747a9db..3fcd20de6ca8 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -456,6 +456,37 @@ static __always_inline int spin_is_contended(spinlock_t *lock) #endif /* CONFIG_PREEMPT_RT */ /* + * Does a critical section need to be broken due to another + * task waiting?: (technically does not depend on CONFIG_PREEMPTION, + * but a general need for low latency) + */ +static inline int spin_needbreak(spinlock_t *lock) +{ +#ifdef CONFIG_PREEMPTION + return spin_is_contended(lock); +#else + return 0; +#endif +} + +/* + * Check if a rwlock is contended. + * Returns non-zero if there is another task waiting on the rwlock. + * Returns zero if the lock is not contended or the system / underlying + * rwlock implementation does not support contention detection. + * Technically does not depend on CONFIG_PREEMPTION, but a general need + * for low latency. + */ +static inline int rwlock_needbreak(rwlock_t *lock) +{ +#ifdef CONFIG_PREEMPTION + return rwlock_is_contended(lock); +#else + return 0; +#endif +} + +/* * Pull the atomic_t declaration: * (asm-mips/atomic.h needs above definitions) */ @@ -507,6 +538,8 @@ DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, raw_spin_lock(_T->lock), raw_spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), raw_spin_unlock(_T->lock)) @@ -515,23 +548,62 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, raw_spin_lock_irq(_T->lock), raw_spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, raw_spin_lock_irqsave(_T->lock, _T->flags), raw_spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try, + raw_spin_trylock_irqsave(_T->lock, _T->flags)) + DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, spin_lock(_T->lock), spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, spin_lock_irq(_T->lock), spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, + spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, spin_lock_irqsave(_T->lock, _T->flags), spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try, + spin_trylock_irqsave(_T->lock, _T->flags)) + +DEFINE_LOCK_GUARD_1(read_lock, rwlock_t, + read_lock(_T->lock), + read_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t, + read_lock_irq(_T->lock), + read_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t, + read_lock_irqsave(_T->lock, _T->flags), + read_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +DEFINE_LOCK_GUARD_1(write_lock, rwlock_t, + write_lock(_T->lock), + write_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t, + write_lock_irq(_T->lock), + write_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t, + write_lock_irqsave(_T->lock, _T->flags), + write_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/splice.h b/include/linux/splice.h index 6c461573434d..9dec4861d09f 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -68,28 +68,37 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, typedef int (splice_direct_actor)(struct pipe_inode_info *, struct splice_desc *); -extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, - loff_t *, size_t, unsigned int, - splice_actor *); -extern ssize_t __splice_from_pipe(struct pipe_inode_info *, - struct splice_desc *, splice_actor *); -extern ssize_t splice_to_pipe(struct pipe_inode_info *, - struct splice_pipe_desc *); -extern ssize_t add_to_pipe(struct pipe_inode_info *, - struct pipe_buffer *); -long vfs_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags); -extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, - splice_direct_actor *); -extern long do_splice(struct file *in, loff_t *off_in, - struct file *out, loff_t *off_out, - size_t len, unsigned int flags); +ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags, + splice_actor *actor); +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, + struct splice_desc *sd, splice_actor *actor); +ssize_t splice_to_pipe(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd); +ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf); +ssize_t vfs_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); +ssize_t splice_direct_to_actor(struct file *file, struct splice_desc *sd, + splice_direct_actor *actor); +ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out, + loff_t *off_out, size_t len, unsigned int flags); +ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags); +ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len); -extern long do_tee(struct file *in, struct file *out, size_t len, - unsigned int flags); -extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags); +static inline long splice_copy_file_range(struct file *in, loff_t pos_in, + struct file *out, loff_t pos_out, + size_t len) +{ + return splice_file_range(in, &pos_in, out, &pos_out, len); +} + +ssize_t do_tee(struct file *in, struct file *out, size_t len, + unsigned int flags); +ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags); /* * for dynamic pipe sizing diff --git a/include/linux/spmi.h b/include/linux/spmi.h index eac1956a8727..28e8c8bd3944 100644 --- a/include/linux/spmi.h +++ b/include/linux/spmi.h @@ -120,6 +120,9 @@ static inline void spmi_controller_put(struct spmi_controller *ctrl) int spmi_controller_add(struct spmi_controller *ctrl); void spmi_controller_remove(struct spmi_controller *ctrl); +struct spmi_controller *devm_spmi_controller_alloc(struct device *parent, size_t size); +int devm_spmi_controller_add(struct device *parent, struct spmi_controller *ctrl); + /** * struct spmi_driver - SPMI slave device driver * @driver: SPMI device drivers should initialize name and owner field of @@ -166,7 +169,7 @@ static inline void spmi_driver_unregister(struct spmi_driver *sdrv) struct device_node; -struct spmi_device *spmi_device_from_of(struct device_node *np); +struct spmi_device *spmi_find_device_by_of_node(struct device_node *np); int spmi_register_read(struct spmi_device *sdev, u8 addr, u8 *buf); int spmi_ext_register_read(struct spmi_device *sdev, u8 addr, u8 *buf, size_t len); diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h new file mode 100644 index 000000000000..33dcbec71925 --- /dev/null +++ b/include/linux/sprintf.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KERNEL_SPRINTF_H_ +#define _LINUX_KERNEL_SPRINTF_H_ + +#include <linux/compiler_attributes.h> +#include <linux/types.h> + +int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); + +__printf(2, 3) int sprintf(char *buf, const char * fmt, ...); +__printf(2, 0) int vsprintf(char *buf, const char *, va_list); +__printf(3, 4) int snprintf(char *buf, size_t size, const char *fmt, ...); +__printf(3, 0) int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +__printf(3, 4) int scnprintf(char *buf, size_t size, const char *fmt, ...); +__printf(3, 0) int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); +__printf(2, 3) __malloc char *kasprintf(gfp_t gfp, const char *fmt, ...); +__printf(2, 0) __malloc char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); +__printf(2, 0) const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); + +__scanf(2, 3) int sscanf(const char *, const char *, ...); +__scanf(2, 0) int vsscanf(const char *, const char *, va_list); + +/* These are for specific cases, do not use without real need */ +extern bool no_hash_pointers; +int no_hash_pointers_enable(char *str); + +#endif /* _LINUX_KERNEL_SPRINTF_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 127ef3b2e607..236610e4a8fa 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -229,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp srcu_check_nmi_safety(ssp, true); retval = __srcu_read_lock_nmisafe(ssp); - rcu_lock_acquire(&ssp->dep_map); + rcu_try_lock_acquire(&ssp->dep_map); return retval; } diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index ebd72491af99..447133171d95 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -48,6 +48,10 @@ void srcu_drive_gp(struct work_struct *wp); #define DEFINE_STATIC_SRCU(name) \ static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name) +// Dummy structure for srcu_notifier_head. +struct srcu_usage { }; +#define __SRCU_USAGE_INIT(name) { } + void synchronize_srcu(struct srcu_struct *ssp); /* diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index e58306783d8e..e9ec32fb97d4 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -11,8 +11,6 @@ * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free * stack traces often repeat, using stack depot allows to save about 100x space. * - * Stack traces are never removed from the stack depot. - * * Author: Alexander Potapenko <glider@google.com> * Copyright (C) 2016 Google, Inc. * @@ -32,6 +30,64 @@ typedef u32 depot_stack_handle_t; */ #define STACK_DEPOT_EXTRA_BITS 5 +#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) + +#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ +#define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) +#define DEPOT_STACK_ALIGN 4 +#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ + STACK_DEPOT_EXTRA_BITS) + +#ifdef CONFIG_STACKDEPOT +/* Compact structure that stores a reference to a stack. */ +union handle_parts { + depot_stack_handle_t handle; + struct { + u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; + }; +}; + +struct stack_record { + struct list_head hash_list; /* Links in the hash table */ + u32 hash; /* Hash in hash table */ + u32 size; /* Number of stored frames */ + union handle_parts handle; /* Constant after initialization */ + refcount_t count; + union { + unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + struct { + /* + * An important invariant of the implementation is to + * only place a stack record onto the freelist iff its + * refcount is zero. Because stack records with a zero + * refcount are never considered as valid, it is safe to + * union @entries and freelist management state below. + * Conversely, as soon as an entry is off the freelist + * and its refcount becomes non-zero, the below must not + * be accessed until being placed back on the freelist. + */ + struct list_head free_list; /* Links in the freelist */ + unsigned long rcu_state; /* RCU cookie */ + }; + }; +}; +#endif + +typedef u32 depot_flags_t; + +/* + * Flags that can be passed to stack_depot_save_flags(); see the comment next + * to its declaration for more details. + */ +#define STACK_DEPOT_FLAG_CAN_ALLOC ((depot_flags_t)0x0001) +#define STACK_DEPOT_FLAG_GET ((depot_flags_t)0x0002) + +#define STACK_DEPOT_FLAGS_NUM 2 +#define STACK_DEPOT_FLAGS_MASK ((depot_flags_t)((1 << STACK_DEPOT_FLAGS_NUM) - 1)) + /* * Using stack depot requires its initialization, which can be done in 3 ways: * @@ -69,31 +125,39 @@ static inline int stack_depot_early_init(void) { return 0; } #endif /** - * __stack_depot_save - Save a stack trace to stack depot + * stack_depot_save_flags - Save a stack trace to stack depot * * @entries: Pointer to the stack trace * @nr_entries: Number of frames in the stack * @alloc_flags: Allocation GFP flags - * @can_alloc: Allocate stack pools (increased chance of failure if false) + * @depot_flags: Stack depot flags + * + * Saves a stack trace from @entries array of size @nr_entries. + * + * If STACK_DEPOT_FLAG_CAN_ALLOC is set in @depot_flags, stack depot can + * replenish the stack pools in case no space is left (allocates using GFP + * flags of @alloc_flags). Otherwise, stack depot avoids any allocations and + * fails if no space is left to store the stack trace. * - * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is - * %true, stack depot can replenish the stack pools in case no space is left - * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids - * any allocations and fails if no space is left to store the stack trace. + * If STACK_DEPOT_FLAG_GET is set in @depot_flags, stack depot will increment + * the refcount on the saved stack trace if it already exists in stack depot. + * Users of this flag must also call stack_depot_put() when keeping the stack + * trace is no longer required to avoid overflowing the refcount. * * If the provided stack trace comes from the interrupt context, only the part * up to the interrupt entry is saved. * - * Context: Any context, but setting @can_alloc to %false is required if + * Context: Any context, but setting STACK_DEPOT_FLAG_CAN_ALLOC is required if * alloc_pages() cannot be used from the current context. Currently * this is the case for contexts where neither %GFP_ATOMIC nor * %GFP_NOWAIT can be used (NMI, raw_spin_lock). * * Return: Handle of the stack struct stored in depot, 0 on failure */ -depot_stack_handle_t __stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - gfp_t gfp_flags, bool can_alloc); +depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, + unsigned int nr_entries, + gfp_t gfp_flags, + depot_flags_t depot_flags); /** * stack_depot_save - Save a stack trace to stack depot @@ -102,8 +166,11 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * @nr_entries: Number of frames in the stack * @alloc_flags: Allocation GFP flags * - * Context: Contexts where allocations via alloc_pages() are allowed. - * See __stack_depot_save() for more details. + * Does not increment the refcount on the saved stack trace; see + * stack_depot_save_flags() for more details. + * + * Context: Contexts where allocations via alloc_pages() are allowed; + * see stack_depot_save_flags() for more details. * * Return: Handle of the stack trace stored in depot, 0 on failure */ @@ -111,6 +178,17 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); /** + * __stack_depot_get_stack_record - Get a pointer to a stack_record struct + * + * @handle: Stack depot handle + * + * This function is only for internal purposes. + * + * Return: Returns a pointer to a stack_record struct + */ +struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle); + +/** * stack_depot_fetch - Fetch a stack trace from stack depot * * @handle: Stack depot handle returned from stack_depot_save() @@ -142,6 +220,18 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); /** + * stack_depot_put - Drop a reference to a stack trace from stack depot + * + * @handle: Stack depot handle returned from stack_depot_save() + * + * The stack trace is evicted from stack depot once all references to it have + * been dropped (once the number of stack_depot_evict() calls matches the + * number of stack_depot_save_flags() calls with STACK_DEPOT_FLAG_GET set for + * this stack trace). + */ +void stack_depot_put(depot_stack_handle_t handle); + +/** * stack_depot_set_extra_bits - Set extra bits in a stack depot handle * * @handle: Stack depot handle returned from stack_depot_save() diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index c36e7a3b45e7..3be2cb564710 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -14,6 +14,7 @@ #ifdef CONFIG_GCC_PLUGIN_STACKLEAK #include <asm/stacktrace.h> +#include <linux/linkage.h> /* * The lowest address on tsk's stack which we can plausibly erase. @@ -76,6 +77,11 @@ static inline void stackleak_task_init(struct task_struct *t) # endif } +asmlinkage void noinstr stackleak_erase(void); +asmlinkage void noinstr stackleak_erase_on_task_stack(void); +asmlinkage void noinstr stackleak_erase_off_task_stack(void); +void __no_caller_saved_registers noinstr stackleak_track_stack(void); + #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } #endif diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h index a9806a44a605..09f994ac87df 100644 --- a/include/linux/start_kernel.h +++ b/include/linux/start_kernel.h @@ -9,7 +9,5 @@ up something else. */ extern asmlinkage void __init __noreturn start_kernel(void); -extern void __init __noreturn arch_call_rest_init(void); -extern void __ref __noreturn rest_init(void); #endif /* _LINUX_START_KERNEL_H */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 06090538fe2d..dfa1828cd756 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -76,6 +76,8 @@ | DMA_AXI_BLEN_32 | DMA_AXI_BLEN_64 \ | DMA_AXI_BLEN_128 | DMA_AXI_BLEN_256) +struct stmmac_priv; + /* Platfrom data for platform device structure's platform_data field */ struct stmmac_mdio_bus_data { @@ -125,6 +127,7 @@ struct stmmac_est { u32 gcl_unaligned[EST_GCL]; u32 gcl[EST_GCL]; u32 gcl_size; + u32 max_sdu[MTL_MAX_TX_QUEUES]; }; struct stmmac_rxq_cfg { @@ -137,6 +140,7 @@ struct stmmac_rxq_cfg { struct stmmac_txq_cfg { u32 weight; + bool coe_unsupported; u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; @@ -172,6 +176,7 @@ struct stmmac_fpe_cfg { bool hs_enable; /* FPE handshake enable */ enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ + u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ }; struct stmmac_safety_feature_cfg { @@ -204,14 +209,41 @@ struct dwmac4_addrs { u32 mtl_low_cred_offset; }; +#define STMMAC_FLAG_HAS_INTEGRATED_PCS BIT(0) +#define STMMAC_FLAG_SPH_DISABLE BIT(1) +#define STMMAC_FLAG_USE_PHY_WOL BIT(2) +#define STMMAC_FLAG_HAS_SUN8I BIT(3) +#define STMMAC_FLAG_TSO_EN BIT(4) +#define STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP BIT(5) +#define STMMAC_FLAG_VLAN_FAIL_Q_EN BIT(6) +#define STMMAC_FLAG_MULTI_MSI_EN BIT(7) +#define STMMAC_FLAG_EXT_SNAPSHOT_EN BIT(8) +#define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9) +#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10) +#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(11) +#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(12) + struct plat_stmmacenet_data { int bus_id; int phy_addr; - int interface; + /* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media + * ^ ^ + * mac_interface phy_interface + * + * mac_interface is the MAC-side interface, which may be the same + * as phy_interface if there is no intervening PCS. If there is a + * PCS, then mac_interface describes the interface mode between the + * MAC and PCS, and phy_interface describes the interface mode + * between the PCS and PHY. + */ + phy_interface_t mac_interface; + /* phy_interface is the PHY-side interface - the interface used by + * an attached PHY. + */ phy_interface_t phy_interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; - struct device_node *phylink_node; + struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; struct stmmac_est *est; @@ -240,12 +272,12 @@ struct plat_stmmacenet_data { u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; - void (*fix_mac_speed)(void *priv, unsigned int speed); + void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); void (*speed_mode_2500)(struct net_device *ndev, void *priv); - void (*ptp_clk_freq_config)(void *priv); + void (*ptp_clk_freq_config)(struct stmmac_priv *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); @@ -266,22 +298,13 @@ struct plat_stmmacenet_data { struct reset_control *stmmac_ahb_rst; struct stmmac_axi *axi; int has_gmac4; - bool has_sun8i; - bool tso_en; int rss_en; int mac_port_sel_speed; - bool en_tx_lpi_clockgating; - bool rx_clk_runs_in_lpi; int has_xgmac; - bool vlan_fail_q_en; u8 vlan_fail_q; unsigned int eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; - int ext_snapshot_num; - bool int_snapshot_en; - bool ext_snapshot_en; - bool multi_msi_en; int msi_mac_vec; int msi_wol_vec; int msi_lpi_vec; @@ -289,10 +312,7 @@ struct plat_stmmacenet_data { int msi_sfty_ue_vec; int msi_rx_base_vec; int msi_tx_base_vec; - bool use_phy_wol; - bool sph_disable; - bool serdes_up_after_phy_linkup; const struct dwmac4_addrs *dwmac4_addrs; - bool has_integrated_pcs; + unsigned int flags; }; #endif diff --git a/include/linux/string.h b/include/linux/string.h index dbfc66400050..9ba8b4597009 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -2,10 +2,14 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ +#include <linux/args.h> +#include <linux/array_size.h> #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ +#include <linux/err.h> /* for ERR_PTR() */ #include <linux/errno.h> /* for E2BIG */ +#include <linux/overflow.h> /* for check_mul_overflow() */ #include <linux/stdarg.h> #include <uapi/linux/string.h> @@ -14,6 +18,44 @@ extern void *memdup_user(const void __user *, size_t); extern void *vmemdup_user(const void __user *, size_t); extern void *memdup_user_nul(const void __user *, size_t); +/** + * memdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result is physically + * contiguous, to be freed by kfree(). + */ +static inline void *memdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return memdup_user(src, nbytes); +} + +/** + * vmemdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result may be not + * physically contiguous. Use kvfree() to free. + */ +static inline void *vmemdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return vmemdup_user(src, nbytes); +} + /* * Include machine specific inline routines */ @@ -25,15 +67,79 @@ extern char * strcpy(char *,const char *); #ifndef __HAVE_ARCH_STRNCPY extern char * strncpy(char *,const char *, __kernel_size_t); #endif -#ifndef __HAVE_ARCH_STRLCPY -size_t strlcpy(char *, const char *, size_t); -#endif -#ifndef __HAVE_ARCH_STRSCPY -ssize_t strscpy(char *, const char *, size_t); -#endif +ssize_t sized_strscpy(char *, const char *, size_t); -/* Wraps calls to strscpy()/memset(), no arch specific code required */ -ssize_t strscpy_pad(char *dest, const char *src, size_t count); +/* + * The 2 argument style can only be used when dst is an array with a + * known size. + */ +#define __strscpy0(dst, src, ...) \ + sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst)) +#define __strscpy1(dst, src, size) sized_strscpy(dst, src, size) + +#define __strscpy_pad0(dst, src, ...) \ + sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst)) +#define __strscpy_pad1(dst, src, size) sized_strscpy_pad(dst, src, size) + +/** + * strscpy - Copy a C-string into a sized buffer + * @dst: Where to copy the string to + * @src: Where to copy the string from + * @...: Size of destination buffer (optional) + * + * Copy the source string @src, or as much of it as fits, into the + * destination @dst buffer. The behavior is undefined if the string + * buffers overlap. The destination @dst buffer is always NUL terminated, + * unless it's zero-sized. + * + * The size argument @... is only required when @dst is not an array, or + * when the copy needs to be smaller than sizeof(@dst). + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zero padded. If padding is desired please use strscpy_pad(). + * + * Returns the number of characters copied in @dst (not including the + * trailing %NUL) or -E2BIG if @size is 0 or the copy from @src was + * truncated. + */ +#define strscpy(dst, src, ...) \ + CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) + +#define sized_strscpy_pad(dest, src, count) ({ \ + char *__dst = (dest); \ + const char *__src = (src); \ + const size_t __count = (count); \ + ssize_t __wrote; \ + \ + __wrote = sized_strscpy(__dst, __src, __count); \ + if (__wrote >= 0 && __wrote < __count) \ + memset(__dst + __wrote + 1, 0, __count - __wrote - 1); \ + __wrote; \ +}) + +/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dst: Where to copy the string to + * @src: Where to copy the string from + * @...: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, the + * remaining bytes in the buffer will be filled with %NUL bytes. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Returns: + * * The number of characters copied (not including the trailing %NULs) + * * -E2BIG if count is 0 or @src was truncated. + */ +#define strscpy_pad(dst, src, ...) \ + CONCATENATE(__strscpy_pad, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); @@ -179,10 +285,19 @@ extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); +extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp); +/* lib/argv_split.c */ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); +/* lib/cmdline.c */ +extern int get_option(char **str, int *pint); +extern char *get_options(const char *str, int nints, int *ints); +extern unsigned long long memparse(const char *ptr, char **retptr); +extern bool parse_option_str(const char *str, const char *option); +extern char *next_arg(char *args, char **param, char **val); + extern bool sysfs_streq(const char *s1, const char *s2); int match_string(const char * const *array, size_t n, const char *string); int __sysfs_match_string(const char * const *array, size_t n, const char *s); @@ -277,10 +392,12 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, */ #define strtomem_pad(dest, src, pad) do { \ const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ - memcpy_and_pad(dest, _dest_len, src, strnlen(src, _dest_len), pad); \ + memcpy_and_pad(dest, _dest_len, src, \ + strnlen(src, min(_src_len, _dest_len)), pad); \ } while (0) /** @@ -298,10 +415,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, */ #define strtomem(dest, src) do { \ const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ - memcpy(dest, src, min(_dest_len, strnlen(src, _dest_len))); \ + memcpy(dest, src, strnlen(src, min(_src_len, _dest_len))); \ } while (0) /** diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index 48120222b9b2..d9ebe20229f8 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -30,6 +30,7 @@ static inline const char *str_read_write(bool v) { return v ? "read" : "write"; } +#define str_write_read(v) str_read_write(!(v)) static inline const char *str_on_off(bool v) { @@ -41,4 +42,15 @@ static inline const char *str_yes_no(bool v) return v ? "yes" : "no"; } +/** + * str_plural - Return the simple pluralization based on English counts + * @num: Number used for deciding pluralization + * + * If @num is 1, returns empty string, otherwise returns "s". + */ +static inline const char *str_plural(size_t num) +{ + return num == 1 ? "" : "s"; +} + #endif diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 789ab30045da..e93fbb5b0c01 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -17,15 +17,19 @@ static inline bool string_is_terminated(const char *s, int len) return memchr(s, '\0', len) ? true : false; } -/* Descriptions of the types of units to - * print in */ +/* Descriptions of the types of units to print in */ enum string_size_units { STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ STRING_UNITS_2, /* use binary powers of 2^10 */ + STRING_UNITS_MASK = BIT(0), + + /* Modifiers */ + STRING_UNITS_NO_SPACE = BIT(30), + STRING_UNITS_NO_BYTES = BIT(31), }; -void string_get_size(u64 size, u64 blk_size, enum string_size_units units, - char *buf, int len); +int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, + char *buf, int len); int parse_int_array_user(const char __user *from, size_t count, int **array); @@ -109,6 +113,8 @@ char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); char *kstrdup_quotable_file(struct file *file, gfp_t gfp); +char *kstrdup_and_replace(const char *src, char old, char new, gfp_t gfp); + char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n); void kfree_strarray(char **array, size_t n); diff --git a/include/linux/stringify.h b/include/linux/stringify.h index 841cec8ed525..0e84cbe65270 100644 --- a/include/linux/stringify.h +++ b/include/linux/stringify.h @@ -9,4 +9,6 @@ #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x) +#define FILE_LINE __FILE__ ":" __stringify(__LINE__) + #endif /* !__LINUX_STRINGIFY_H */ diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index db30a159f9d5..f22bf915dcf6 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -20,7 +20,8 @@ #ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); -void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task); +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task, + const struct rpc_timeout *to); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 518bd28f5ab8..35766963dd14 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -56,10 +56,14 @@ struct cache_head { struct kref ref; unsigned long flags; }; -#define CACHE_VALID 0 /* Entry contains valid data */ -#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ -#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ -#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */ + +/* cache_head.flags */ +enum { + CACHE_VALID, /* Entry contains valid data */ + CACHE_NEGATIVE, /* Negative entry - there is no match for the key */ + CACHE_PENDING, /* An upcall has been sent but no reply received yet*/ + CACHE_CLEANED, /* Entry has been cleaned from cache */ +}; #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 4f41d839face..5321585c778f 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -92,6 +92,7 @@ struct rpc_clnt { }; const struct cred *cl_cred; unsigned int cl_max_connect; /* max number of transports not to the same IP */ + struct super_block *pipefs_sb; }; /* @@ -138,6 +139,7 @@ struct rpc_create_args { const char *servername; const char *nodename; const struct rpc_program *program; + struct rpc_stat *stats; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; @@ -148,6 +150,8 @@ struct rpc_create_args { const struct cred *cred; unsigned int max_connect; struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct rpc_add_xprt_test { @@ -249,7 +253,6 @@ void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *, const char *rpc_proc_name(const struct rpc_task *task); -void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8ada7dc802d3..0c77ba488bba 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -38,6 +38,17 @@ struct rpc_wait { }; /* + * This describes a timeout strategy + */ +struct rpc_timeout { + unsigned long to_initval, /* initial timeout */ + to_maxval, /* max timeout */ + to_increment; /* if !exponential */ + unsigned int to_retries; /* max # of retries */ + unsigned char to_exponential; +}; + +/* * This is the RPC task struct */ struct rpc_task { @@ -186,7 +197,7 @@ struct rpc_wait_queue { unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ - unsigned short qlen; /* total # tasks waiting in queue */ + unsigned int qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; @@ -205,7 +216,8 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + struct rpc_timeout *timeout); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index d94d4f410507..3ce1550d1beb 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -43,22 +43,6 @@ struct net; #ifdef CONFIG_PROC_FS int rpc_proc_init(struct net *); void rpc_proc_exit(struct net *); -#else -static inline int rpc_proc_init(struct net *net) -{ - return 0; -} - -static inline void rpc_proc_exit(struct net *net) -{ -} -#endif - -#ifdef MODULE -void rpc_modcount(struct inode *, int); -#endif - -#ifdef CONFIG_PROC_FS struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); void rpc_proc_unregister(struct net *,const char *); void rpc_proc_zero(const struct rpc_program *); @@ -69,7 +53,14 @@ void svc_proc_unregister(struct net *, const char *); void svc_seq_show(struct seq_file *, const struct svc_stat *); #else +static inline int rpc_proc_init(struct net *net) +{ + return 0; +} +static inline void rpc_proc_exit(struct net *net) +{ +} static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } static inline void rpc_proc_unregister(struct net *net, const char *p) {} static inline void rpc_proc_zero(const struct rpc_program *p) {} diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f8751118c122..23617da0e565 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -17,6 +17,7 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/svcauth.h> +#include <linux/lwq.h> #include <linux/wait.h> #include <linux/mm.h> #include <linux/pagevec.h> @@ -33,22 +34,27 @@ */ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ - spinlock_t sp_lock; /* protects all fields */ - struct list_head sp_sockets; /* pending sockets */ - unsigned int sp_nrthreads; /* # of threads in pool */ + struct lwq sp_xprts; /* pending transports */ + atomic_t sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ + struct llist_head sp_idle_threads; /* idle server threads */ /* statistics on pool operation */ + struct percpu_counter sp_messages_arrived; struct percpu_counter sp_sockets_queued; struct percpu_counter sp_threads_woken; - struct percpu_counter sp_threads_timedout; -#define SP_TASK_PENDING (0) /* still work to do even if no - * xprt is queued. */ -#define SP_CONGESTED (1) unsigned long sp_flags; } ____cacheline_aligned_in_smp; +/* bits for sp_flags */ +enum { + SP_TASK_PENDING, /* still work to do even if no xprt is queued */ + SP_NEED_VICTIM, /* One thread needs to agree to exit */ + SP_VICTIM_REMAINS, /* One thread needs to actually exit */ +}; + + /* * RPC service. * @@ -63,7 +69,6 @@ struct svc_serv { struct svc_program * sv_program; /* RPC program */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; - struct kref sv_refcnt; unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -84,54 +89,20 @@ struct svc_serv { int (*sv_threadfn)(void *data); #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head sv_cb_list; /* queue for callback requests + struct lwq sv_cb_list; /* queue for callback requests * that arrive over the same * connection */ - spinlock_t sv_cb_lock; /* protects the svc_cb_list */ - wait_queue_head_t sv_cb_waitq; /* sleep here if there are no - * entries in the svc_cb_list */ bool sv_bc_enabled; /* service uses backchannel */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/** - * svc_get() - increment reference count on a SUNRPC serv - * @serv: the svc_serv to have count incremented - * - * Returns: the svc_serv that was passed in. - */ -static inline struct svc_serv *svc_get(struct svc_serv *serv) -{ - kref_get(&serv->sv_refcnt); - return serv; -} - -void svc_destroy(struct kref *); +/* This is used by pool_stats to find and lock an svc */ +struct svc_info { + struct svc_serv *serv; + struct mutex *mutex; +}; -/** - * svc_put - decrement reference count on a SUNRPC serv - * @serv: the svc_serv to have count decremented - * - * When the reference count reaches zero, svc_destroy() - * is called to clean up and free the serv. - */ -static inline void svc_put(struct svc_serv *serv) -{ - kref_put(&serv->sv_refcnt, svc_destroy); -} - -/** - * svc_put_not_last - decrement non-final reference count on SUNRPC serv - * @serv: the svc_serv to have count decremented - * - * Returns: %true is refcount was decremented. - * - * If the refcount is 1, it is not decremented and instead failure is reported. - */ -static inline bool svc_put_not_last(struct svc_serv *serv) -{ - return refcount_dec_not_one(&serv->sv_refcnt.refcount); -} +void svc_destroy(struct svc_serv **svcp); /* * Maximum payload size supported by a kernel RPC server. @@ -195,6 +166,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); */ struct svc_rqst { struct list_head rq_all; /* all threads list */ + struct llist_node rq_idle; /* On the idle list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -232,16 +204,6 @@ struct svc_rqst { u32 rq_proc; /* procedure number */ u32 rq_prot; /* IP protocol */ int rq_cachetype; /* catering to nfsd */ -#define RQ_SECURE (0) /* secure port */ -#define RQ_LOCAL (1) /* local request */ -#define RQ_USEDEFERRAL (2) /* use deferral */ -#define RQ_DROPME (3) /* drop current reply */ -#define RQ_SPLICE_OK (4) /* turned off in gss privacy - * to prevent encrypting page - * cache pages */ -#define RQ_VICTIM (5) /* about to be shut down */ -#define RQ_BUSY (6) /* request is busy */ -#define RQ_DATA (7) /* request has data */ unsigned long rq_flags; /* flags field */ ktime_t rq_qtime; /* enqueue time */ @@ -265,12 +227,24 @@ struct svc_rqst { /* Catering to nfsd */ struct auth_domain * rq_client; /* RPC peer info */ struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ - struct svc_cacherep * rq_cacherep; /* cache info */ struct task_struct *rq_task; /* service thread */ struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + unsigned long bc_to_initval; + unsigned int bc_to_retries; void ** rq_lease_breaker; /* The v4 client breaking a lease */ + unsigned int rq_status_counter; /* RPC processing counter */ +}; + +/* bits for rq_flags */ +enum { + RQ_SECURE, /* secure port */ + RQ_LOCAL, /* local request */ + RQ_USEDEFERRAL, /* use deferral */ + RQ_DROPME, /* drop current reply */ + RQ_VICTIM, /* Have agreed to shut down */ + RQ_DATA, /* request has data */ }; #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) @@ -308,6 +282,28 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } +/** + * svc_thread_should_stop - check if this thread should stop + * @rqstp: the thread that might need to stop + * + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS + * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming + * the victim using this function. It should then promptly call + * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS + * so the task waiting for a thread to exit can wake and continue. + * + * Return values: + * %true: caller should invoke svc_exit_thread() + * %false: caller should do nothing + */ +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) +{ + if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags)) + set_bit(RQ_VICTIM, &rqstp->rq_flags); + + return test_bit(RQ_VICTIM, &rqstp->rq_flags); +} + struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct svc_xprt *xprt; @@ -343,8 +339,7 @@ struct svc_program { const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ - struct svc_stat * pg_stats; /* rpc statistics */ - int (*pg_authenticate)(struct svc_rqst *); + enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); __be32 (*pg_init_request)(struct svc_rqst *, const struct svc_program *, struct svc_process_info *); @@ -415,18 +410,20 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); -struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, +struct svc_serv * svc_create_pooled(struct svc_program *prog, + struct svc_stat *stats, + unsigned int bufsize, int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -int svc_pool_stats_open(struct svc_serv *serv, struct file *file); +int svc_pool_stats_open(struct svc_info *si, struct file *file); void svc_process(struct svc_rqst *rqstp); -int bc_svc_process(struct svc_serv *, struct rpc_rqst *, - struct svc_rqst *); +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp); int svc_register(const struct svc_serv *, struct net *, const int, const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); +void svc_pool_wake_idle_thread(struct svc_pool *pool); struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); const char * svc_proc_name(const struct svc_rqst *rqstp); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index a5ee0af2a310..d33bab33099a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -65,6 +65,7 @@ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; +extern struct workqueue_struct *svcrdma_wq; extern struct percpu_counter svcrdma_stat_read; extern struct percpu_counter svcrdma_stat_recv; @@ -97,6 +98,7 @@ struct svcxprt_rdma { u32 sc_pending_recvs; u32 sc_recv_batch; struct list_head sc_rq_dto_q; + struct list_head sc_read_complete_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; @@ -115,6 +117,13 @@ struct svcxprt_rdma { /* sc_flags */ #define RDMAXPRT_CONN_PENDING 3 +static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp) +{ + struct svc_xprt *xprt = rqstp->rq_xprt; + + return container_of(xprt, struct svcxprt_rdma, sc_xprt); +} + /* * Default connection parameters */ @@ -126,6 +135,43 @@ enum { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/** + * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize + */ +static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_rq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + +/** + * svc_rdma_send_cid_init - Initialize a Send Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize + */ +static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_sq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + +/* + * A chunk context tracks all I/O for moving one Read or Write + * chunk. This is a set of rdma_rw's that handle data movement + * for all segments of one chunk. + */ +struct svc_rdma_chunk_ctxt { + struct rpc_rdma_cid cc_cid; + struct ib_cqe cc_cqe; + struct list_head cc_rwctxts; + ktime_t cc_posttime; + int cc_sqecount; +}; + struct svc_rdma_recv_ctxt { struct llist_node rc_node; struct list_head rc_list; @@ -136,26 +182,63 @@ struct svc_rdma_recv_ctxt { void *rc_recv_buf; struct xdr_stream rc_stream; u32 rc_byte_len; - unsigned int rc_page_count; u32 rc_inv_rkey; __be32 rc_msgtype; + /* State for pulling a Read chunk */ + unsigned int rc_pageoff; + unsigned int rc_curpage; + unsigned int rc_readbytes; + struct xdr_buf rc_saved_arg; + struct svc_rdma_chunk_ctxt rc_cc; + struct svc_rdma_pcl rc_call_pcl; struct svc_rdma_pcl rc_read_pcl; struct svc_rdma_chunk *rc_cur_result_payload; struct svc_rdma_pcl rc_write_pcl; struct svc_rdma_pcl rc_reply_pcl; + + unsigned int rc_page_count; + struct page *rc_pages[RPCSVC_MAXPAGES]; +}; + +/* + * State for sending a Write chunk. + * - Tracks progress of writing one chunk over all its segments + * - Stores arguments for the SGL constructor functions + */ +struct svc_rdma_write_info { + struct svcxprt_rdma *wi_rdma; + + const struct svc_rdma_chunk *wi_chunk; + + /* write state of this chunk */ + unsigned int wi_seg_off; + unsigned int wi_seg_no; + + /* SGL constructor arguments */ + const struct xdr_buf *wi_xdr; + unsigned char *wi_base; + unsigned int wi_next_off; + + struct svc_rdma_chunk_ctxt wi_cc; + struct work_struct wi_work; }; struct svc_rdma_send_ctxt { struct llist_node sc_node; struct rpc_rdma_cid sc_cid; + struct work_struct sc_work; + struct svcxprt_rdma *sc_rdma; struct ib_send_wr sc_send_wr; + struct ib_send_wr *sc_wr_chain; + int sc_sqecount; struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; + struct svc_rdma_write_info sc_reply_info; void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; @@ -179,13 +262,24 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt); extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); -extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_chunk *chunk, - const struct xdr_buf *xdr); -extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_recv_ctxt *rctxt, - const struct xdr_buf *xdr); +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); +extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc, + enum dma_data_direction dir); +extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr); extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head); @@ -196,11 +290,12 @@ extern struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_send(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_post_send(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_recv_ctxt *rctxt, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, const struct xdr_buf *xdr); extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index a6b12631db21..8e20cd60e2e7 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -54,25 +54,8 @@ struct svc_xprt { const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; struct list_head xpt_list; - struct list_head xpt_ready; + struct lwq_node xpt_ready; unsigned long xpt_flags; -#define XPT_BUSY 0 /* enqueued/receiving */ -#define XPT_CONN 1 /* conn pending */ -#define XPT_CLOSE 2 /* dead or dying */ -#define XPT_DATA 3 /* data pending */ -#define XPT_TEMP 4 /* connected transport */ -#define XPT_DEAD 6 /* transport closed */ -#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */ -#define XPT_DEFERRED 8 /* deferred request pending */ -#define XPT_OLD 9 /* used for xprt aging mark+sweep */ -#define XPT_LISTENER 10 /* listening endpoint */ -#define XPT_CACHE_AUTH 11 /* cache auth info */ -#define XPT_LOCAL 12 /* connection from loopback interface */ -#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ -#define XPT_CONG_CTRL 14 /* has congestion control */ -#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ -#define XPT_TLS_SESSION 16 /* transport-layer security established */ -#define XPT_PEER_AUTH 17 /* peer has been authenticated */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ @@ -97,6 +80,27 @@ struct svc_xprt { struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ }; +/* flag bits for xpt_flags */ +enum { + XPT_BUSY, /* enqueued/receiving */ + XPT_CONN, /* conn pending */ + XPT_CLOSE, /* dead or dying */ + XPT_DATA, /* data pending */ + XPT_TEMP, /* connected transport */ + XPT_DEAD, /* transport closed */ + XPT_CHNGBUF, /* need to change snd/rcv buf sizes */ + XPT_DEFERRED, /* deferred request pending */ + XPT_OLD, /* used for xprt aging mark+sweep */ + XPT_LISTENER, /* listening endpoint */ + XPT_CACHE_AUTH, /* cache auth info */ + XPT_LOCAL, /* connection from loopback interface */ + XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */ + XPT_CONG_CTRL, /* has congestion control */ + XPT_HANDSHAKE, /* xprt requests a handshake */ + XPT_TLS_SESSION, /* transport-layer security established */ + XPT_PEER_AUTH, /* peer has been authenticated */ +}; + static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) { spin_lock(&xpt->xpt_lock); diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 6d9cc9080aca..61c455f1e1f5 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -83,6 +83,19 @@ struct auth_domain { struct rcu_head rcu_head; }; +enum svc_auth_status { + SVC_GARBAGE = 1, + SVC_SYSERR, + SVC_VALID, + SVC_NEGATIVE, + SVC_OK, + SVC_DROP, + SVC_CLOSE, + SVC_DENIED, + SVC_PENDING, + SVC_COMPLETE, +}; + /* * Each authentication flavour registers an auth_ops * structure. @@ -98,6 +111,8 @@ struct auth_domain { * is (probably) already in place. Certainly space is * reserved for it. * DROP - simply drop the request. It may have been deferred + * CLOSE - like SVC_DROP, but request is definitely lost. + * If there is a tcp connection, it should be closed. * GARBAGE - rpc garbage_args error * SYSERR - rpc system_err error * DENIED - authp holds reason for denial. @@ -111,60 +126,45 @@ struct auth_domain { * * release() is given a request after the procedure has been run. * It should sign/encrypt the results if needed - * It should return: - * OK - the resbuf is ready to be sent - * DROP - the reply should be quitely dropped - * DENIED - authp holds a reason for MSG_DENIED - * SYSERR - rpc system_err * * domain_release() * This call releases a domain. + * * set_client() - * Givens a pending request (struct svc_rqst), finds and assigns + * Given a pending request (struct svc_rqst), finds and assigns * an appropriate 'auth_domain' as the client. + * + * pseudoflavor() + * Returns RPC_AUTH pseudoflavor in use by @rqstp. */ struct auth_ops { char * name; struct module *owner; int flavour; - int (*accept)(struct svc_rqst *rq); - int (*release)(struct svc_rqst *rq); - void (*domain_release)(struct auth_domain *); - int (*set_client)(struct svc_rqst *rq); -}; -#define SVC_GARBAGE 1 -#define SVC_SYSERR 2 -#define SVC_VALID 3 -#define SVC_NEGATIVE 4 -#define SVC_OK 5 -#define SVC_DROP 6 -#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely - * lost so if there is a tcp connection, it - * should be closed - */ -#define SVC_DENIED 8 -#define SVC_PENDING 9 -#define SVC_COMPLETE 10 + enum svc_auth_status (*accept)(struct svc_rqst *rqstp); + int (*release)(struct svc_rqst *rqstp); + void (*domain_release)(struct auth_domain *dom); + enum svc_auth_status (*set_client)(struct svc_rqst *rqstp); + rpc_authflavor_t (*pseudoflavor)(struct svc_rqst *rqstp); +}; struct svc_xprt; -extern int svc_authenticate(struct svc_rqst *rqstp); +extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp); +extern rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp); extern int svc_authorise(struct svc_rqst *rqstp); -extern int svc_set_client(struct svc_rqst *rqstp); +extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); -extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); -extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr); -extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(struct net *net); extern void svcauth_unix_info_release(struct svc_xprt *xpt); -extern int svcauth_unix_set_client(struct svc_rqst *rqstp); +extern enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp); extern int unix_gid_cache_create(struct net *net); extern void unix_gid_cache_destroy(struct net *net); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index a7116048a4d4..7c78ec6356b9 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -35,8 +35,8 @@ struct svc_sock { /* Total length of the data (not including fragment headers) * received so far in the fragments making up this rpc: */ u32 sk_datalen; - /* Number of queued send requests */ - atomic_t sk_sendqlen; + + struct page_frag_cache sk_frag_cache; struct completion sk_handshake_done; @@ -56,8 +56,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk) /* * Function prototypes. */ -void svc_close_net(struct svc_serv *, struct net *); -int svc_recv(struct svc_rqst *, long); +void svc_recv(struct svc_rqst *rqstp); void svc_send(struct svc_rqst *rqstp); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); @@ -66,8 +65,6 @@ int svc_addsock(struct svc_serv *serv, struct net *net, const struct cred *cred); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); -struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); -void svc_sock_destroy(struct svc_xprt *); /* * svc_makesock socket characteristics diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f89ec4b5ea16..2f8dc47f1eb0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -139,6 +139,8 @@ void xdr_terminate_string(const struct xdr_buf *, const u32); size_t xdr_buf_pagecount(const struct xdr_buf *buf); int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); void xdr_free_bvec(struct xdr_buf *buf); +unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size, + const struct xdr_buf *xdr); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { @@ -224,6 +226,7 @@ struct xdr_stream { struct kvec *iov; /* pointer to the current kvec */ struct kvec scratch; /* Scratch buffer */ struct page **page_ptr; /* pointer to the current page */ + void *page_kaddr; /* kmapped address of the current page */ unsigned int nwords; /* Remaining decode buffer length */ struct rpc_rqst *rqst; /* For debugging */ @@ -255,6 +258,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); +extern void xdr_finish_decode(struct xdr_stream *xdr); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); @@ -775,7 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - if (len > SIZE_MAX / sizeof(*p)) + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b52411bcfe4e..81b952649d35 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -30,17 +30,6 @@ #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) -/* - * This describes a timeout strategy - */ -struct rpc_timeout { - unsigned long to_initval, /* initial timeout */ - to_maxval, /* max timeout */ - to_increment; /* if !exponential */ - unsigned int to_retries; /* max # of retries */ - unsigned char to_exponential; -}; - enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, RPC_DISPLAY_PORT, @@ -57,6 +46,7 @@ struct xprt_class; struct seq_file; struct svc_serv; struct net; +#include <linux/lwq.h> /* * This describes a complete RPC request @@ -121,7 +111,7 @@ struct rpc_rqst { int rq_ntrans; #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head rq_bc_list; /* Callback service list */ + struct lwq_node rq_bc_list; /* Callback service list */ unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ #endif /* CONFIG_SUNRPC_BACKCHANEL */ @@ -162,6 +152,7 @@ struct rpc_xprt_ops { int (*prepare_request)(struct rpc_rqst *req, struct xdr_buf *buf); int (*send_request)(struct rpc_rqst *req); + void (*abort_send_request)(struct rpc_rqst *req); void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); @@ -351,6 +342,8 @@ struct xprt_create { struct rpc_xprt_switch *bc_xps; unsigned int flags; struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct xprt_class { diff --git a/include/linux/superhyway.h b/include/linux/superhyway.h deleted file mode 100644 index 8d3376775813..000000000000 --- a/include/linux/superhyway.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * include/linux/superhyway.h - * - * SuperHyway Bus definitions - * - * Copyright (C) 2004, 2005 Paul Mundt <lethal@linux-sh.org> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#ifndef __LINUX_SUPERHYWAY_H -#define __LINUX_SUPERHYWAY_H - -#include <linux/device.h> - -/* - * SuperHyway IDs - */ -#define SUPERHYWAY_DEVICE_ID_SH5_DMAC 0x0183 - -struct superhyway_vcr_info { - u8 perr_flags; /* P-port Error flags */ - u8 merr_flags; /* Module Error flags */ - u16 mod_vers; /* Module Version */ - u16 mod_id; /* Module ID */ - u8 bot_mb; /* Bottom Memory block */ - u8 top_mb; /* Top Memory block */ -}; - -struct superhyway_ops { - int (*read_vcr)(unsigned long base, struct superhyway_vcr_info *vcr); - int (*write_vcr)(unsigned long base, struct superhyway_vcr_info vcr); -}; - -struct superhyway_bus { - struct superhyway_ops *ops; -}; - -extern struct superhyway_bus superhyway_channels[]; - -struct superhyway_device_id { - unsigned int id; - unsigned long driver_data; -}; - -struct superhyway_device; -extern struct bus_type superhyway_bus_type; - -struct superhyway_driver { - char *name; - - const struct superhyway_device_id *id_table; - struct device_driver drv; - - int (*probe)(struct superhyway_device *dev, const struct superhyway_device_id *id); - void (*remove)(struct superhyway_device *dev); -}; - -#define to_superhyway_driver(d) container_of((d), struct superhyway_driver, drv) - -struct superhyway_device { - char name[32]; - - struct device dev; - - struct superhyway_device_id id; - struct superhyway_driver *drv; - struct superhyway_bus *bus; - - int num_resources; - struct resource *resource; - struct superhyway_vcr_info vcr; -}; - -#define to_superhyway_device(d) container_of((d), struct superhyway_device, dev) - -#define superhyway_get_drvdata(d) dev_get_drvdata(&(d)->dev) -#define superhyway_set_drvdata(d,p) dev_set_drvdata(&(d)->dev, (p)) - -static inline int -superhyway_read_vcr(struct superhyway_device *dev, unsigned long base, - struct superhyway_vcr_info *vcr) -{ - return dev->bus->ops->read_vcr(base, vcr); -} - -static inline int -superhyway_write_vcr(struct superhyway_device *dev, unsigned long base, - struct superhyway_vcr_info vcr) -{ - return dev->bus->ops->write_vcr(base, vcr); -} - -extern int superhyway_scan_bus(struct superhyway_bus *); - -/* drivers/sh/superhyway/superhyway.c */ -int superhyway_register_driver(struct superhyway_driver *); -void superhyway_unregister_driver(struct superhyway_driver *); -int superhyway_add_device(unsigned long base, struct superhyway_device *, struct superhyway_bus *); -int superhyway_add_devices(struct superhyway_bus *bus, struct superhyway_device **devices, int nr_devices); - -/* drivers/sh/superhyway/superhyway-sysfs.c */ -extern const struct attribute_group *superhyway_dev_groups[]; - -#endif /* __LINUX_SUPERHYWAY_H */ - diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index cb7980805920..5b67f0f47d80 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -44,7 +44,7 @@ struct ssam_event { u8 command_id; u8 instance_id; u16 length; - u8 data[]; + u8 data[] __counted_by(length); }; /** diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 42b249b4c24b..8cd8c38cf3f3 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -193,7 +193,6 @@ struct ssam_device_driver { #ifdef CONFIG_SURFACE_AGGREGATOR_BUS -extern struct bus_type ssam_bus_type; extern const struct device_type ssam_device_type; /** diff --git a/include/linux/surface_aggregator/serial_hub.h b/include/linux/surface_aggregator/serial_hub.h index 5c4ae1a26183..d8dbef6b7fc2 100644 --- a/include/linux/surface_aggregator/serial_hub.h +++ b/include/linux/surface_aggregator/serial_hub.h @@ -12,7 +12,7 @@ #ifndef _LINUX_SURFACE_AGGREGATOR_SERIAL_HUB_H #define _LINUX_SURFACE_AGGREGATOR_SERIAL_HUB_H -#include <linux/crc-ccitt.h> +#include <linux/crc-itu-t.h> #include <linux/kref.h> #include <linux/ktime.h> #include <linux/list.h> @@ -188,7 +188,7 @@ static_assert(sizeof(struct ssh_command) == 8); */ static inline u16 ssh_crc(const u8 *buf, size_t len) { - return crc_ccitt_false(0xffff, buf, len); + return crc_itu_t(0xffff, buf, len); } /* diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ef503088942d..da6ebca3ff77 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -40,65 +40,6 @@ typedef int __bitwise suspend_state_t; #define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE #define PM_SUSPEND_MAX ((__force suspend_state_t) 4) -enum suspend_stat_step { - SUSPEND_FREEZE = 1, - SUSPEND_PREPARE, - SUSPEND_SUSPEND, - SUSPEND_SUSPEND_LATE, - SUSPEND_SUSPEND_NOIRQ, - SUSPEND_RESUME_NOIRQ, - SUSPEND_RESUME_EARLY, - SUSPEND_RESUME -}; - -struct suspend_stats { - int success; - int fail; - int failed_freeze; - int failed_prepare; - int failed_suspend; - int failed_suspend_late; - int failed_suspend_noirq; - int failed_resume; - int failed_resume_early; - int failed_resume_noirq; -#define REC_FAILED_NUM 2 - int last_failed_dev; - char failed_devs[REC_FAILED_NUM][40]; - int last_failed_errno; - int errno[REC_FAILED_NUM]; - int last_failed_step; - u64 last_hw_sleep; - u64 total_hw_sleep; - u64 max_hw_sleep; - enum suspend_stat_step failed_steps[REC_FAILED_NUM]; -}; - -extern struct suspend_stats suspend_stats; - -static inline void dpm_save_failed_dev(const char *name) -{ - strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], - name, - sizeof(suspend_stats.failed_devs[0])); - suspend_stats.last_failed_dev++; - suspend_stats.last_failed_dev %= REC_FAILED_NUM; -} - -static inline void dpm_save_failed_errno(int err) -{ - suspend_stats.errno[suspend_stats.last_failed_errno] = err; - suspend_stats.last_failed_errno++; - suspend_stats.last_failed_errno %= REC_FAILED_NUM; -} - -static inline void dpm_save_failed_step(enum suspend_stat_step step) -{ - suspend_stats.failed_steps[suspend_stats.last_failed_step] = step; - suspend_stats.last_failed_step++; - suspend_stats.last_failed_step %= REC_FAILED_NUM; -} - /** * struct platform_suspend_ops - Callbacks for managing platform dependent * system sleep states. @@ -626,4 +567,19 @@ static inline void queue_up_suspend_work(void) {} #endif /* !CONFIG_PM_AUTOSLEEP */ +enum suspend_stat_step { + SUSPEND_WORKING = 0, + SUSPEND_FREEZE, + SUSPEND_PREPARE, + SUSPEND_SUSPEND, + SUSPEND_SUSPEND_LATE, + SUSPEND_SUSPEND_NOIRQ, + SUSPEND_RESUME_NOIRQ, + SUSPEND_RESUME_EARLY, + SUSPEND_RESUME +}; + +void dpm_save_failed_dev(const char *name); +void dpm_save_failed_step(enum suspend_stat_step step); + #endif /* _LINUX_SUSPEND_H */ diff --git a/include/linux/swait.h b/include/linux/swait.h index 6a8c22b8c2a5..d324419482a0 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq) extern void swake_up_one(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); -extern void swake_up_locked(struct swait_queue_head *q); +extern void swake_up_locked(struct swait_queue_head *q, int wake_flags); extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state); extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); diff --git a/include/linux/swap.h b/include/linux/swap.h index 456546443f1f..f53d608daa01 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -298,14 +298,11 @@ struct swap_info_struct { unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ + struct file *bdev_file; /* open handle of the bdev */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ struct completion comp; /* seldom referenced */ -#ifdef CONFIG_FRONTSWAP - unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ - atomic_t frontswap_pages; /* frontswap pages in-use counter */ -#endif spinlock_t lock; /* * protect map scan related fields like * swap_map, lowest_bit, highest_bit, @@ -337,15 +334,13 @@ struct swap_info_struct { */ }; -static inline swp_entry_t folio_swap_entry(struct folio *folio) +static inline swp_entry_t page_swap_entry(struct page *page) { - swp_entry_t entry = { .val = page_private(&folio->page) }; - return entry; -} + struct folio *folio = page_folio(page); + swp_entry_t entry = folio->swap; -static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) -{ - folio->private = (void *)entry.val; + entry.val += folio_page_idx(folio, page); + return entry; } /* linux/mm/workingset.c */ @@ -355,16 +350,6 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); -/* Only track the nodes of mappings with shadow entries */ -void workingset_update_node(struct xa_node *node); -extern struct list_lru shadow_nodes; -#define mapping_set_update(xas, mapping) do { \ - if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ - xas_set_update(xas, workingset_update_node); \ - xas_set_lru(xas, &shadow_nodes); \ - } \ -} while (0) - /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; @@ -402,9 +387,6 @@ void folio_deactivate(struct folio *folio); void folio_mark_lazyfree(struct folio *folio); extern void swap_setup(void); -extern void lru_cache_add_inactive_or_unevictable(struct page *page, - struct vm_area_struct *vma); - /* linux/mm/vmscan.c */ extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, @@ -455,9 +437,9 @@ static inline unsigned long total_swapcache_pages(void) return global_node_page_state(NR_SWAPCACHE); } -extern void free_swap_cache(struct page *page); -extern void free_page_and_swap_cache(struct page *); -extern void free_pages_and_swap_cache(struct encoded_page **, int); +void free_swap_cache(struct folio *folio); +void free_page_and_swap_cache(struct page *); +void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; @@ -495,13 +477,12 @@ extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); -extern struct swap_info_struct *page_swap_info(struct page *); -extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); +struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); -sector_t swap_page_sector(struct page *page); +sector_t swap_folio_sector(struct folio *folio); static inline void put_swap_device(struct swap_info_struct *si) { @@ -540,7 +521,7 @@ static inline void put_swap_device(struct swap_info_struct *si) /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */ #define free_swap_and_cache(e) is_pfn_swap_entry(e) -static inline void free_swap_cache(struct page *page) +static inline void free_swap_cache(struct folio *folio) { } @@ -558,6 +539,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } @@ -630,11 +616,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) } #endif -#ifdef CONFIG_ZSWAP -extern u64 zswap_pool_total_size; -extern atomic_t zswap_stored_pages; -#endif - #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp); static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index 7ed529a77c5b..99e3ed469e88 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -2,11 +2,6 @@ #ifndef _LINUX_SWAPFILE_H #define _LINUX_SWAPFILE_H -/* - * these were static in swapfile.c but frontswap.c needs them and we don't - * want to expose them to the dozens of source files that include swap.h - */ -extern struct swap_info_struct *swap_info[]; extern unsigned long generic_max_swapfile_size(void); unsigned long arch_max_swapfile_size(void); diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 4c932cb45e0b..a5c560a2f8c2 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -390,10 +390,44 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) } #endif /* CONFIG_MIGRATION */ +#ifdef CONFIG_MEMORY_FAILURE + +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} + +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) -#define PTE_MARKER_SWAPIN_ERROR BIT(1) +/* + * "Poisoned" here is meant in the very general sense of "future accesses are + * invalid", instead of referring very specifically to hardware memory errors. + * This marker is meant to represent any of various different causes of this. + */ +#define PTE_MARKER_POISONED BIT(1) #define PTE_MARKER_MASK (BIT(2) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) @@ -421,15 +455,15 @@ static inline pte_t make_pte_marker(pte_marker marker) return swp_entry_to_pte(make_pte_marker_entry(marker)); } -static inline swp_entry_t make_swapin_error_entry(void) +static inline swp_entry_t make_poisoned_swp_entry(void) { - return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR); + return make_pte_marker_entry(PTE_MARKER_POISONED); } -static inline int is_swapin_error_entry(swp_entry_t entry) +static inline int is_poisoned_swp_entry(swp_entry_t entry) { return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR); + (pte_marker_get(entry) & PTE_MARKER_POISONED); } /* @@ -463,10 +497,24 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) return p; } +static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry) +{ + struct folio *folio = pfn_folio(swp_offset_pfn(entry)); + + /* + * Any use of migration entries may only occur while the + * corresponding folio is locked + */ + BUG_ON(is_migration_entry(entry) && !folio_test_locked(folio)); + + return folio; +} + /* * A pfn swap entry is a special type of swap entry that always has a pfn stored - * in the swap offset. They are used to represent unaddressable device memory - * and to restrict access to a page undergoing migration. + * in the swap offset. They can either be used to represent unaddressable device + * memory, to restrict access to a page undergoing migration or to represent a + * pfn which has been hwpoisoned and unmapped. */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { @@ -474,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry) BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); return is_migration_entry(entry) || is_device_private_entry(entry) || - is_device_exclusive_entry(entry); + is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); } struct page_vma_mapped_walk; @@ -543,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd) } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -#ifdef CONFIG_MEMORY_FAILURE - -/* - * Support for hardware poisoned pages - */ -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - BUG_ON(!PageLocked(page)); - return swp_entry(SWP_HWPOISON, page_to_pfn(page)); -} - -static inline int is_hwpoison_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_HWPOISON; -} - -#else - -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - return swp_entry(0, 0); -} - -static inline int is_hwpoison_entry(swp_entry_t swp) -{ - return 0; -} -#endif - static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 4e52cd5e0bdc..ea23097e351f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/limits.h> #include <linux/spinlock.h> +#include <linux/workqueue.h> struct device; struct page; @@ -62,8 +63,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, #ifdef CONFIG_SWIOTLB /** - * struct io_tlb_mem - IO TLB Memory Pool Descriptor - * + * struct io_tlb_pool - IO TLB memory pool descriptor * @start: The start address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. @@ -73,50 +73,125 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool * may be remapped in the memory encrypted case and store virtual * address for bounce buffer operation. - * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and - * @end. For default swiotlb, this is command line adjustable via - * setup_io_tlb_npages. - * @list: The free list describing the number of free entries available - * from each index. - * @orig_addr: The original address corresponding to a mapped entry. - * @alloc_size: Size of the allocated buffer. + * @nslabs: The number of IO TLB slots between @start and @end. For the + * default swiotlb, this can be adjusted with a boot parameter, + * see setup_io_tlb_npages(). + * @late_alloc: %true if allocated using the page allocator. + * @nareas: Number of areas in the pool. + * @area_nslabs: Number of slots in each area. + * @areas: Array of memory area descriptors. + * @slots: Array of slot descriptors. + * @node: Member of the IO TLB memory pool list. + * @rcu: RCU head for swiotlb_dyn_free(). + * @transient: %true if transient memory pool. + */ +struct io_tlb_pool { + phys_addr_t start; + phys_addr_t end; + void *vaddr; + unsigned long nslabs; + bool late_alloc; + unsigned int nareas; + unsigned int area_nslabs; + struct io_tlb_area *areas; + struct io_tlb_slot *slots; +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct list_head node; + struct rcu_head rcu; + bool transient; +#endif +}; + +/** + * struct io_tlb_mem - Software IO TLB allocator + * @defpool: Default (initial) IO TLB memory pool descriptor. + * @pool: IO TLB memory pool descriptor (if not dynamic). + * @nslabs: Total number of IO TLB slabs in all pools. * @debugfs: The dentry to debugfs. - * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation - * @nareas: The area number in the pool. - * @area_nslabs: The slot number in the area. + * @can_grow: %true if more pools can be allocated dynamically. + * @phys_limit: Maximum allowed physical address. + * @lock: Lock to synchronize changes to the list. + * @pools: List of IO TLB memory pool descriptors (if dynamic). + * @dyn_alloc: Dynamic IO TLB pool allocation work. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. * @used_hiwater: The high water mark for total_used. Used only for reporting * in debugfs. + * @transient_nslabs: The total number of slots in all transient pools that + * are currently used across all areas. */ struct io_tlb_mem { - phys_addr_t start; - phys_addr_t end; - void *vaddr; + struct io_tlb_pool defpool; unsigned long nslabs; struct dentry *debugfs; - bool late_alloc; bool force_bounce; bool for_alloc; - unsigned int nareas; - unsigned int area_nslabs; - struct io_tlb_area *areas; - struct io_tlb_slot *slots; +#ifdef CONFIG_SWIOTLB_DYNAMIC + bool can_grow; + u64 phys_limit; + spinlock_t lock; + struct list_head pools; + struct work_struct dyn_alloc; +#endif #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; + atomic_long_t transient_nslabs; #endif }; -extern struct io_tlb_mem io_tlb_default_mem; +#ifdef CONFIG_SWIOTLB_DYNAMIC + +struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr); + +#else + +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) +{ + return &dev->dma_io_tlb_mem->defpool; +} + +#endif + +/** + * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb + * @dev: Device which has mapped the buffer. + * @paddr: Physical address within the DMA buffer. + * + * Check if @paddr points into a bounce buffer. + * + * Return: + * * %true if @paddr points into a bounce buffer + * * %false otherwise + */ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - return mem && paddr >= mem->start && paddr < mem->end; + if (!mem) + return false; + +#ifdef CONFIG_SWIOTLB_DYNAMIC + /* + * All SWIOTLB buffer addresses must have been returned by + * swiotlb_tbl_map_single() and passed to a device driver. + * If a SWIOTLB address is checked on another CPU, then it was + * presumably loaded by the device driver from an unspecified private + * data structure. Make sure that this load is ordered before reading + * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). + * + * This barrier pairs with smp_mb() in swiotlb_find_slots(). + */ + smp_rmb(); + return READ_ONCE(dev->dma_uses_io_tlb) && + swiotlb_find_pool(dev, paddr); +#else + return paddr >= mem->defpool.start && paddr < mem->defpool.end; +#endif } static inline bool is_swiotlb_force_bounce(struct device *dev) @@ -128,13 +203,22 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) void swiotlb_init(bool addressing_limited, unsigned int flags); void __init swiotlb_exit(void); +void swiotlb_dev_init(struct device *dev); size_t swiotlb_max_mapping_size(struct device *dev); +bool is_swiotlb_allocated(void); bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); +phys_addr_t default_swiotlb_base(void); +phys_addr_t default_swiotlb_limit(void); #else static inline void swiotlb_init(bool addressing_limited, unsigned int flags) { } + +static inline void swiotlb_dev_init(struct device *dev) +{ +} + static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { return false; @@ -151,6 +235,11 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev) return SIZE_MAX; } +static inline bool is_swiotlb_allocated(void) +{ + return false; +} + static inline bool is_swiotlb_active(struct device *dev) { return false; @@ -159,6 +248,16 @@ static inline bool is_swiotlb_active(struct device *dev) static inline void swiotlb_adjust_size(unsigned long size) { } + +static inline phys_addr_t default_swiotlb_base(void) +{ + return 0; +} + +static inline phys_addr_t default_swiotlb_limit(void) +{ + return 0; +} #endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 48fabe36509e..8d8fac1626bd 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -41,6 +41,7 @@ enum { enum switchtec_gen { SWITCHTEC_GEN3, SWITCHTEC_GEN4, + SWITCHTEC_GEN5, }; struct mrpc_regs { diff --git a/include/linux/sync_core.h b/include/linux/sync_core.h index 013da4b8b327..67bb9794b875 100644 --- a/include/linux/sync_core.h +++ b/include/linux/sync_core.h @@ -17,5 +17,19 @@ static inline void sync_core_before_usermode(void) } #endif -#endif /* _LINUX_SYNC_CORE_H */ +#ifdef CONFIG_ARCH_HAS_PREPARE_SYNC_CORE_CMD +#include <asm/sync_core.h> +#else +/* + * This is a dummy prepare_sync_core_cmd() implementation that can be used on + * all architectures which provide unconditional core serializing instructions + * in switch_mm(). + * If your architecture doesn't provide such core serializing instructions in + * switch_mm(), you may need to write your own functions. + */ +static inline void prepare_sync_core_cmd(struct mm_struct *mm) +{ +} +#endif +#endif /* _LINUX_SYNC_CORE_H */ diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h index 641ca8880995..3858a6ffdd5c 100644 --- a/include/linux/syscall_user_dispatch.h +++ b/include/linux/syscall_user_dispatch.h @@ -6,16 +6,10 @@ #define _SYSCALL_USER_DISPATCH_H #include <linux/thread_info.h> +#include <linux/syscall_user_dispatch_types.h> #ifdef CONFIG_GENERIC_ENTRY -struct syscall_user_dispatch { - char __user *selector; - unsigned long offset; - unsigned long len; - bool on_dispatch; -}; - int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, unsigned long len, char __user *selector); @@ -29,7 +23,6 @@ int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long siz void __user *data); #else -struct syscall_user_dispatch {}; static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, unsigned long len, char __user *selector) diff --git a/include/linux/syscall_user_dispatch_types.h b/include/linux/syscall_user_dispatch_types.h new file mode 100644 index 000000000000..3be36b06c7d7 --- /dev/null +++ b/include/linux/syscall_user_dispatch_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SYSCALL_USER_DISPATCH_TYPES_H +#define _SYSCALL_USER_DISPATCH_TYPES_H + +#include <linux/types.h> + +#ifdef CONFIG_GENERIC_ENTRY + +struct syscall_user_dispatch { + char __user *selector; + unsigned long offset; + unsigned long len; + bool on_dispatch; +}; + +#else + +struct syscall_user_dispatch {}; + +#endif + +#endif /* _SYSCALL_USER_DISPATCH_TYPES_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 03e3d0121d5e..e619ac10cd23 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -71,9 +71,12 @@ struct clone_args; struct open_how; struct mount_attr; struct landlock_ruleset_attr; +struct lsm_ctx; enum landlock_rule_type; struct cachestat_range; struct cachestat; +struct statmount; +struct mnt_id_req; #include <linux/types.h> #include <linux/aio_abi.h> @@ -125,6 +128,7 @@ struct cachestat; #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a +#define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) @@ -284,22 +288,6 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #endif /* - * Called before coming back to user-mode. Returning to user-mode with an - * address limit different than USER_DS can allow to overwrite kernel memory. - */ -static inline void addr_limit_user_check(void) -{ -#ifdef TIF_FSCHECK - if (!test_thread_flag(TIF_FSCHECK)) - return; -#endif - -#ifdef TIF_FSCHECK - clear_thread_flag(TIF_FSCHECK); -#endif -} - -/* * These syscall function prototypes are kept in the same order as * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, * followed by deprecated or obsolete system calls. @@ -371,7 +359,6 @@ asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_epoll_create1(int flags); asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, @@ -424,6 +411,12 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz, asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf); asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf); +asmlinkage long sys_statmount(const struct mnt_id_req __user *req, + struct statmount __user *buf, size_t bufsize, + unsigned int flags); +asmlinkage long sys_listmount(const struct mnt_id_req __user *req, + u64 __user *mnt_ids, size_t nr_mnt_ids, + unsigned int flags); asmlinkage long sys_truncate(const char __user *path, long length); asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); #if BITS_PER_LONG == 32 @@ -438,8 +431,10 @@ asmlinkage long sys_chdir(const char __user *filename); asmlinkage long sys_fchdir(unsigned int fd); asmlinkage long sys_chroot(const char __user *filename); asmlinkage long sys_fchmod(unsigned int fd, umode_t mode); -asmlinkage long sys_fchmodat(int dfd, const char __user * filename, +asmlinkage long sys_fchmodat(int dfd, const char __user *filename, umode_t mode); +asmlinkage long sys_fchmodat2(int dfd, const char __user *filename, + umode_t mode, unsigned int flags); asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); @@ -563,6 +558,16 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, @@ -953,6 +958,12 @@ asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long l asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); +asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); +asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, + u32 *size, u32 flags); +asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, + u32 size, u32 flags); +asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags); /* * Architecture-specific system calls diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 59d451f455bf..ee7d33b89e9e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -159,12 +159,22 @@ struct ctl_node { struct ctl_table_header *header; }; -/* struct ctl_table_header is used to maintain dynamic lists of - struct ctl_table trees. */ +/** + * struct ctl_table_header - maintains dynamic lists of struct ctl_table trees + * @ctl_table: pointer to the first element in ctl_table array + * @ctl_table_size: number of elements pointed by @ctl_table + * @used: The entry will never be touched when equal to 0. + * @count: Upped every time something is added to @inodes and downed every time + * something is removed from inodes + * @nreg: When nreg drops to 0 the ctl_table_header will be unregistered. + * @rcu: Delays the freeing of the inode. Introduced with "unfuck proc_sysctl ->d_compare()" + * + */ struct ctl_table_header { union { struct { struct ctl_table *ctl_table; + int ctl_table_size; int used; int count; int nreg; @@ -200,10 +210,8 @@ struct ctl_table_root { int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; -/* struct ctl_path describes where in the hierarchy a table is added */ -struct ctl_path { - const char *procname; -}; +#define register_sysctl(path, table) \ + register_sysctl_sz(path, table, ARRAY_SIZE(table)) #ifdef CONFIG_SYSCTL @@ -216,17 +224,20 @@ extern void retire_sysctl_set(struct ctl_table_set *set); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, - const char *path, struct ctl_table *table); -struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table); + const char *path, struct ctl_table *table, size_t table_size); +struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table, + size_t table_size); void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init_bases(void); extern void __register_sysctl_init(const char *path, struct ctl_table *table, - const char *table_name); -#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) + const char *table_name, size_t table_size); +#define register_sysctl_init(path, table) \ + __register_sysctl_init(path, table, #table, ARRAY_SIZE(table)) extern struct ctl_table_header *register_sysctl_mount_point(const char *path); void do_sysctl_args(void); +bool sysctl_is_alias(char *param); int do_proc_douintvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, @@ -239,8 +250,6 @@ extern int unaligned_enabled; extern int unaligned_dump_stack; extern int no_unaligned_warning; -#define SYSCTL_PERM_EMPTY_DIR (1 << 0) - #else /* CONFIG_SYSCTL */ static inline void register_sysctl_init(const char *path, struct ctl_table *table) @@ -252,7 +261,9 @@ static inline struct ctl_table_header *register_sysctl_mount_point(const char *p return NULL; } -static inline struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) +static inline struct ctl_table_header *register_sysctl_sz(const char *path, + struct ctl_table *table, + size_t table_size) { return NULL; } @@ -270,6 +281,11 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, static inline void do_sysctl_args(void) { } + +static inline bool sysctl_is_alias(char *param) +{ + return false; +} #endif /* CONFIG_SYSCTL */ int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index c1ef5fc60a3c..c9cb657dad08 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -9,7 +9,8 @@ #include <linux/kernel.h> #include <linux/platform_data/simplefb.h> -#include <linux/screen_info.h> + +struct screen_info; enum { M_I17, /* 17-Inch iMac */ @@ -90,7 +91,8 @@ static inline void sysfb_set_efifb_fwnode(struct platform_device *pd) bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); struct platform_device *sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); + const struct simplefb_platform_data *mode, + struct device *parent); #else /* CONFIG_SYSFB_SIMPLE */ @@ -101,7 +103,8 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, } static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) + const struct simplefb_platform_data *mode, + struct device *parent) { return ERR_PTR(-EINVAL); } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fd3fe5c8c17f..326341c62385 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -61,22 +61,32 @@ do { \ /** * struct attribute_group - data structure used to declare an attribute group. * @name: Optional: Attribute group name - * If specified, the attribute group will be created in - * a new subdirectory with this name. + * If specified, the attribute group will be created in a + * new subdirectory with this name. Additionally when a + * group is named, @is_visible and @is_bin_visible may + * return SYSFS_GROUP_INVISIBLE to control visibility of + * the directory itself. * @is_visible: Optional: Function to return permissions associated with an - * attribute of the group. Will be called repeatedly for each - * non-binary attribute in the group. Only read/write + * attribute of the group. Will be called repeatedly for + * each non-binary attribute in the group. Only read/write * permissions as well as SYSFS_PREALLOC are accepted. Must - * return 0 if an attribute is not visible. The returned value - * will replace static permissions defined in struct attribute. + * return 0 if an attribute is not visible. The returned + * value will replace static permissions defined in struct + * attribute. Use SYSFS_GROUP_VISIBLE() when assigning this + * callback to specify separate _group_visible() and + * _attr_visible() handlers. * @is_bin_visible: * Optional: Function to return permissions associated with a * binary attribute of the group. Will be called repeatedly * for each binary attribute in the group. Only read/write - * permissions as well as SYSFS_PREALLOC are accepted. Must - * return 0 if a binary attribute is not visible. The returned - * value will replace static permissions defined in - * struct bin_attribute. + * permissions as well as SYSFS_PREALLOC (and the + * visibility flags for named groups) are accepted. Must + * return 0 if a binary attribute is not visible. The + * returned value will replace static permissions defined + * in struct bin_attribute. If @is_visible is not set, Use + * SYSFS_GROUP_VISIBLE() when assigning this callback to + * specify separate _group_visible() and _attr_visible() + * handlers. * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. @@ -91,13 +101,121 @@ struct attribute_group { struct bin_attribute **bin_attrs; }; +#define SYSFS_PREALLOC 010000 +#define SYSFS_GROUP_INVISIBLE 020000 + +/* + * DEFINE_SYSFS_GROUP_VISIBLE(name): + * A helper macro to pair with the assignment of ".is_visible = + * SYSFS_GROUP_VISIBLE(name)", that arranges for the directory + * associated with a named attribute_group to optionally be hidden. + * This allows for static declaration of attribute_groups, and the + * simplification of attribute visibility lifetime that implies, + * without polluting sysfs with empty attribute directories. + * Ex. + * + * static umode_t example_attr_visible(struct kobject *kobj, + * struct attribute *attr, int n) + * { + * if (example_attr_condition) + * return 0; + * else if (ro_attr_condition) + * return 0444; + * return a->mode; + * } + * + * static bool example_group_visible(struct kobject *kobj) + * { + * if (example_group_condition) + * return false; + * return true; + * } + * + * DEFINE_SYSFS_GROUP_VISIBLE(example); + * + * static struct attribute_group example_group = { + * .name = "example", + * .is_visible = SYSFS_GROUP_VISIBLE(example), + * .attrs = &example_attrs, + * }; + * + * Note that it expects <name>_attr_visible and <name>_group_visible to + * be defined. For cases where individual attributes do not need + * separate visibility consideration, only entire group visibility at + * once, see DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(). + */ +#define DEFINE_SYSFS_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct attribute *attr, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return name##_attr_visible(kobj, attr, n); \ + } + +/* + * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name): + * A helper macro to pair with SYSFS_GROUP_VISIBLE() that like + * DEFINE_SYSFS_GROUP_VISIBLE() controls group visibility, but does + * not require the implementation of a per-attribute visibility + * callback. + * Ex. + * + * static bool example_group_visible(struct kobject *kobj) + * { + * if (example_group_condition) + * return false; + * return true; + * } + * + * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(example); + * + * static struct attribute_group example_group = { + * .name = "example", + * .is_visible = SYSFS_GROUP_VISIBLE(example), + * .attrs = &example_attrs, + * }; + */ +#define DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct attribute *a, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return a->mode; \ + } + +/* + * Same as DEFINE_SYSFS_GROUP_VISIBLE, but for groups with only binary + * attributes. If an attribute_group defines both text and binary + * attributes, the group visibility is determined by the function + * specified to is_visible() not is_bin_visible() + */ +#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct bin_attribute *attr, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return name##_attr_visible(kobj, attr, n); \ + } + +#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct bin_attribute *a, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return a->mode; \ + } + +#define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn + /* * Use these macros to make defining attributes easier. * See include/linux/device.h for examples.. */ -#define SYSFS_PREALLOC 010000 - #define __ATTR(_name, _mode, _show, _store) { \ .attr = {.name = __stringify(_name), \ .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ @@ -181,6 +299,8 @@ struct bin_attribute { char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); + loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *, + loff_t, int); int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 3a582ec7a2f1..bdca467ebb77 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -30,7 +30,7 @@ #define SYSRQ_ENABLE_RTNICE 0x0100 struct sysrq_key_op { - void (* const handler)(int); + void (* const handler)(u8); const char * const help_msg; const char * const action_msg; const int enable_mask; @@ -43,10 +43,10 @@ struct sysrq_key_op { * are available -- else NULL's). */ -void handle_sysrq(int key); -void __handle_sysrq(int key, bool check_mask); -int register_sysrq_key(int key, const struct sysrq_key_op *op); -int unregister_sysrq_key(int key, const struct sysrq_key_op *op); +void handle_sysrq(u8 key); +void __handle_sysrq(u8 key, bool check_mask); +int register_sysrq_key(u8 key, const struct sysrq_key_op *op); +int unregister_sysrq_key(u8 key, const struct sysrq_key_op *op); extern const struct sysrq_key_op *__sysrq_reboot_op; int sysrq_toggle_support(int enable_mask); @@ -54,20 +54,20 @@ int sysrq_mask(void); #else -static inline void handle_sysrq(int key) +static inline void handle_sysrq(u8 key) { } -static inline void __handle_sysrq(int key, bool check_mask) +static inline void __handle_sysrq(u8 key, bool check_mask) { } -static inline int register_sysrq_key(int key, const struct sysrq_key_op *op) +static inline int register_sysrq_key(u8 key, const struct sysrq_key_op *op) { return -EINVAL; } -static inline int unregister_sysrq_key(int key, const struct sysrq_key_op *op) +static inline int unregister_sysrq_key(u8 key, const struct sysrq_key_op *op) { return -EINVAL; } diff --git a/include/linux/tc.h b/include/linux/tc.h index a60639f37963..1638660abf5e 100644 --- a/include/linux/tc.h +++ b/include/linux/tc.h @@ -120,7 +120,7 @@ static inline unsigned long tc_get_speed(struct tc_bus *tbus) #ifdef CONFIG_TC -extern struct bus_type tc_bus_type; +extern const struct bus_type tc_bus_type; extern int tc_register_driver(struct tc_driver *tdrv); extern void tc_unregister_driver(struct tc_driver *tdrv); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 91a37c99ba66..55399ee2a57e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -152,6 +152,7 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; + bool req_usec_ts; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif @@ -165,6 +166,11 @@ struct tcp_request_sock { * after data-in-SYN. */ u8 syn_tos; +#ifdef CONFIG_TCP_AO + u8 ao_keyid; + u8 ao_rcv_next; + bool used_tcp_ao; +#endif }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) @@ -172,24 +178,135 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) return (struct tcp_request_sock *)req; } +static inline bool tcp_rsk_used_ao(const struct request_sock *req) +{ +#ifndef CONFIG_TCP_AO + return false; +#else + return tcp_rsk(req)->used_tcp_ao; +#endif +} + +#define TCP_RMEM_TO_WIN_SCALE 8 + struct tcp_sock { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/tcp_sock.rst. + * Please update the document when adding new fields. + */ + /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; - u16 tcp_header_len; /* Bytes of tcp header to send */ + + /* TX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_tx); + /* timestamp of last sent data packet (for restart window) */ + u32 max_window; /* Maximal window ever seen from peer */ + u32 rcv_ssthresh; /* Current window clamp */ + u32 reordering; /* Packet reordering metric. */ + u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ u16 gso_segs; /* Max number of segs per GSO packet */ + /* from STCP, retrans queue hinting */ + struct sk_buff *lost_skb_hint; + struct sk_buff *retransmit_skb_hint; + __cacheline_group_end(tcp_sock_read_tx); + + /* TXRX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_txrx); + u32 tsoffset; /* timestamp offset */ + u32 snd_wnd; /* The window we expect to receive */ + u32 mss_cache; /* Cached effective mss, not including SACKS */ + u32 snd_cwnd; /* Sending congestion window */ + u32 prr_out; /* Total number of pkts sent during Recovery. */ + u32 lost_out; /* Lost packets */ + u32 sacked_out; /* SACK'd packets */ + u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ + u8 chrono_type : 2, /* current chronograph type */ + repair : 1, + tcp_usec_ts : 1, /* TSval values in usec */ + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ + __cacheline_group_end(tcp_sock_read_txrx); + + /* RX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_rx); + u32 copied_seq; /* Head of yet unread data */ + u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ + u32 snd_wl1; /* Sequence for window update */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 retrans_out; /* Retransmitted packets out */ + u16 advmss; /* Advertised MSS */ + u16 urg_data; /* Saved octet of OOB data and control flags */ + u32 lost; /* Total data packets lost incl. rexmits */ + struct minmax rtt_min; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + u32 snd_ssthresh; /* Slow start size threshold */ + __cacheline_group_end(tcp_sock_read_rx); + /* TX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_tx) ____cacheline_aligned; + u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut + * total number of data bytes sent. + */ + u32 snd_sml; /* Last byte of the most recently transmitted small packet */ + u32 chrono_start; /* Start time in jiffies of a TCP chrono */ + u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ + u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ + u32 pushed_seq; /* Last pushed seq, required to talk to windows */ + u32 lsndtime; + u32 mdev_us; /* medium deviation */ + u32 rtt_seq; /* sequence number to update rttvar */ + u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ + u64 tcp_mstamp; /* most recent packet received/sent */ + struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ + struct sk_buff *highest_sack; /* skb just after the highest + * skb with SACKed bit set + * (validity guaranteed only if + * sacked_out > 0) + */ + u8 ecn_flags; /* ECN status bits. */ + __cacheline_group_end(tcp_sock_write_tx); + + /* TXRX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_txrx); /* * Header prediction flags * 0x5?10 << 16 + snd_wnd in net byte order */ __be32 pred_flags; - + u32 rcv_nxt; /* What we want to receive next */ + u32 snd_nxt; /* Next sequence we send */ + u32 snd_una; /* First byte we want an ack for */ + u32 window_clamp; /* Maximal window to advertise */ + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 packets_out; /* Packets which are "in flight" */ + u32 snd_up; /* Urgent pointer */ + u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 delivered_ce; /* Like the above but only ECE marked packets */ + u32 app_limited; /* limited until "delivered" reaches this val */ + u32 rcv_wnd; /* Current receiver window */ /* - * RFC793 variables by their proper names. This means you can - * read the code and the spec side by side (and laugh ...) - * See RFC793 and RFC1122. The RFC writes these in capitals. + * Options received (usually on last packet, some only on SYN packets). */ - u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + struct tcp_options_received rx_opt; + u8 nonagle : 4,/* Disable Nagle algorithm? */ + rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ + __cacheline_group_end(tcp_sock_write_txrx); + + /* RX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_rx) __aligned(8); + u64 bytes_received; + /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes * were acked. */ @@ -199,45 +316,42 @@ struct tcp_sock { u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn * total number of data segments in. */ - u32 rcv_nxt; /* What we want to receive next */ - u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ - u32 snd_nxt; /* Next sequence we send */ - u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut - * The total number of segments sent. - */ - u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut - * total number of data segments sent. - */ - u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut - * total number of data bytes sent. - */ + u32 max_packets_out; /* max packets_out in last window */ + u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ + u32 rate_delivered; /* saved rate sample: packets delivered */ + u32 rate_interval_us; /* saved rate sample: time elapsed */ + u32 rcv_rtt_last_tsecr; + u64 first_tx_mstamp; /* start of window send phase */ + u64 delivered_mstamp; /* time we reached "delivered" */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. */ + struct { + u32 rtt_us; + u32 seq; + u64 time; + } rcv_rtt_est; +/* Receiver queue space */ + struct { + u32 space; + u32 seq; + u64 time; + } rcvq_space; + __cacheline_group_end(tcp_sock_write_rx); + /* End of Hot Path */ + +/* + * RFC793 variables by their proper names. This means you can + * read the code and the spec side by side (and laugh ...) + * See RFC793 and RFC1122. The RFC writes these in capitals. + */ u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups * total number of DSACK blocks received */ - u32 snd_una; /* First byte we want an ack for */ - u32 snd_sml; /* Last byte of the most recently transmitted small packet */ - u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ - u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 compressed_ack_rcv_nxt; - - u32 tsoffset; /* timestamp offset */ - struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ - - u32 snd_wl1; /* Sequence for window update */ - u32 snd_wnd; /* The window we expect to receive */ - u32 max_window; /* Maximal window ever seen from peer */ - u32 mss_cache; /* Cached effective mss, not including SACKS */ - - u32 window_clamp; /* Maximal window to advertise */ - u32 rcv_ssthresh; /* Current window clamp */ /* Information of the most recently (s)acked skb */ struct tcp_rack { @@ -251,23 +365,15 @@ struct tcp_sock { dsack_seen:1, /* Whether DSACK seen after last adj */ advanced:1; /* mstamp advanced since last lost marking */ } rack; - u16 advmss; /* Advertised MSS */ u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ unused:5; - u32 chrono_start; /* Start time in jiffies of a TCP chrono */ - u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ - u8 chrono_type:2, /* current chronograph type */ - rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + u8 thin_lto : 1,/* Use linear timeouts for thin streams */ + recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ - is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ - fastopen_client_fail:2; /* reason why fastopen failed */ - u8 nonagle : 4,/* Disable Nagle algorithm? */ - thin_lto : 1,/* Use linear timeouts for thin streams */ - recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ - repair : 1, + fastopen_client_fail:2, /* reason why fastopen failed */ frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; u8 save_syn:2, /* Save headers of SYN packet */ @@ -275,45 +381,19 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ - syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ - is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ - u32 tlp_high_seq; /* snd_nxt at the time of TLP */ + syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + u8 keepalive_probes; /* num of allowed keep alive probes */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ - u64 tcp_wstamp_ns; /* departure time for next sent data packet */ - u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ /* RTT measurement */ - u64 tcp_mstamp; /* most recent packet received/sent */ - u32 srtt_us; /* smoothed round trip time << 3 in usecs */ - u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ - u32 rttvar_us; /* smoothed mdev_max */ - u32 rtt_seq; /* sequence number to update rttvar */ - struct minmax rtt_min; - - u32 packets_out; /* Packets which are "in flight" */ - u32 retrans_out; /* Retransmitted packets out */ - u32 max_packets_out; /* max packets_out in last window */ - u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ - u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 ecn_flags; /* ECN status bits. */ - u8 keepalive_probes; /* num of allowed keep alive probes */ - u32 reordering; /* Packet reordering metric. */ u32 reord_seen; /* number of data packet reordering events */ - u32 snd_up; /* Urgent pointer */ - -/* - * Options received (usually on last packet, some only on SYN packets). - */ - struct tcp_options_received rx_opt; /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ - u32 snd_ssthresh; /* Slow start size threshold */ - u32 snd_cwnd; /* Sending congestion window */ u32 snd_cwnd_cnt; /* Linear increase counter */ u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; @@ -321,32 +401,11 @@ struct tcp_sock { u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ - u32 prr_out; /* Total number of pkts sent during Recovery. */ - u32 delivered; /* Total data packets delivered incl. rexmits */ - u32 delivered_ce; /* Like the above but only ECE marked packets */ - u32 lost; /* Total data packets lost incl. rexmits */ - u32 app_limited; /* limited until "delivered" reaches this val */ - u64 first_tx_mstamp; /* start of window send phase */ - u64 delivered_mstamp; /* time we reached "delivered" */ - u32 rate_delivered; /* saved rate sample: packets delivered */ - u32 rate_interval_us; /* saved rate sample: time elapsed */ - - u32 rcv_wnd; /* Current receiver window */ - u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ - u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ - u32 pushed_seq; /* Last pushed seq, required to talk to windows */ - u32 lost_out; /* Lost packets */ - u32 sacked_out; /* SACK'd packets */ + u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ struct hrtimer pacing_timer; struct hrtimer compressed_ack_timer; - /* from STCP, retrans queue hinting */ - struct sk_buff* lost_skb_hint; - struct sk_buff *retransmit_skb_hint; - - /* OOO segments go in this rbtree. Socket lock must be held. */ - struct rb_root out_of_order_queue; struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ @@ -355,12 +414,6 @@ struct tcp_sock { struct tcp_sack_block recv_sack_cache[4]; - struct sk_buff *highest_sack; /* skb just after the highest - * skb with SACKed bit set - * (validity guaranteed only if - * sacked_out > 0) - */ - int lost_cnt_hint; u32 prior_ssthresh; /* ssthresh saved at recovery start */ @@ -375,6 +428,14 @@ struct tcp_sock { * Total data bytes retransmitted */ u32 total_retrans; /* Total retransmits for entire connection */ + u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */ + u16 total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + u16 total_rto_recoveries; /* Total number of RTO recoveries, + * including any unfinished recovery. + */ + u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */ u32 urg_seq; /* Seq of received urgent pointer */ unsigned int keepalive_time; /* time before keep alive takes place */ @@ -403,21 +464,6 @@ struct tcp_sock { u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ -/* Receiver side RTT estimation */ - u32 rcv_rtt_last_tsecr; - struct { - u32 rtt_us; - u32 seq; - u64 time; - } rcv_rtt_est; - -/* Receiver queue space */ - struct { - u32 space; - u32 seq; - u64 time; - } rcvq_space; - /* TCP-specific MTU probe information. */ struct { u32 probe_seq_start; @@ -431,17 +477,22 @@ struct tcp_sock { bool is_mptcp; #endif #if IS_ENABLED(CONFIG_SMC) - bool (*smc_hs_congested)(const struct sock *sk); bool syn_smc; /* SYN includes SMC */ + bool (*smc_hs_congested)(const struct sock *sk); #endif -#ifdef CONFIG_TCP_MD5SIG -/* TCP AF-Specific parts; only used by MD5 Signature support so far */ +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +/* TCP AF-Specific parts; only used by TCP-AO/MD5 Signature support so far */ const struct tcp_sock_af_ops *af_specific; +#ifdef CONFIG_TCP_MD5SIG /* TCP MD5 Signature Option information */ struct tcp_md5sig_info __rcu *md5sig_info; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif +#endif /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; @@ -461,15 +512,17 @@ enum tsq_enum { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + TCP_ACK_DEFERRED, /* TX pure ack is deferred */ }; enum tsq_flags { - TSQF_THROTTLED = (1UL << TSQ_THROTTLED), - TSQF_QUEUED = (1UL << TSQ_QUEUED), - TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), + TSQF_THROTTLED = BIT(TSQ_THROTTLED), + TSQF_QUEUED = BIT(TSQ_QUEUED), + TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED), + TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) @@ -495,6 +548,9 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) @@ -562,6 +618,11 @@ void __tcp_sock_set_nodelay(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); -void tcp_sock_set_user_timeout(struct sock *sk, u32 val); +int tcp_sock_set_user_timeout(struct sock *sk, int val); + +static inline bool dst_tcp_usec_ts(const struct dst_entry *dst) +{ + return dst_feature(dst, RTAX_FEATURE_TCP_USEC_TS); +} #endif /* _LINUX_TCP_H */ diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 17eb1c5205d3..71632e3c5f18 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -84,6 +84,7 @@ struct tee_param { * @release: release this open file * @open_session: open a new session * @close_session: close a session + * @system_session: declare session as a system session * @invoke_func: invoke a trusted function * @cancel_req: request cancel of an ongoing invoke or open * @supp_recv: called for supplicant to get a command @@ -100,6 +101,7 @@ struct tee_driver_ops { struct tee_ioctl_open_session_arg *arg, struct tee_param *param); int (*close_session)(struct tee_context *ctx, u32 session); + int (*system_session)(struct tee_context *ctx, u32 session); int (*invoke_func)(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg, struct tee_param *param); @@ -430,6 +432,20 @@ int tee_client_open_session(struct tee_context *ctx, int tee_client_close_session(struct tee_context *ctx, u32 session); /** + * tee_client_system_session() - Declare session as a system session + * @ctx: TEE Context + * @session: Session id + * + * This function requests TEE to provision an entry context ready to use for + * that session only. The provisioned entry context is used for command + * invocation and session closure, not for command cancelling requests. + * TEE releases the provisioned context upon session closure. + * + * Return < 0 on error else 0 if an entry context has been provisioned. + */ +int tee_client_system_session(struct tee_context *ctx, u32 session); + +/** * tee_client_invoke_func() - Invoke a function in a Trusted Application * @ctx: TEE Context * @arg: Invoke arguments, see description of @@ -466,7 +482,7 @@ static inline bool tee_param_is_memref(struct tee_param *param) } } -extern struct bus_type tee_bus_type; +extern const struct bus_type tee_bus_type; /** * struct tee_client_device - tee based device diff --git a/include/linux/thermal.h b/include/linux/thermal.h index dee66ade89a0..c33f50177f51 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -32,6 +32,7 @@ struct thermal_zone_device; struct thermal_cooling_device; struct thermal_instance; +struct thermal_debugfs; struct thermal_attr; enum thermal_trend { @@ -51,8 +52,35 @@ enum thermal_notify_event { THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */ THERMAL_TABLE_CHANGED, /* Thermal table(s) changed */ THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */ + THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */ + THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ + THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ }; +/** + * struct thermal_trip - representation of a point in temperature domain + * @temperature: temperature value in miliCelsius + * @hysteresis: relative hysteresis in miliCelsius + * @threshold: trip crossing notification threshold miliCelsius + * @type: trip point type + * @priv: pointer to driver data associated with this trip + * @flags: flags representing binary properties of the trip + */ +struct thermal_trip { + int temperature; + int hysteresis; + int threshold; + enum thermal_trip_type type; + u8 flags; + void *priv; +}; + +#define THERMAL_TRIP_FLAG_RW_TEMP BIT(0) +#define THERMAL_TRIP_FLAG_RW_HYST BIT(1) + +#define THERMAL_TRIP_FLAG_RW (THERMAL_TRIP_FLAG_RW_TEMP | \ + THERMAL_TRIP_FLAG_RW_HYST) + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); @@ -62,32 +90,15 @@ struct thermal_zone_device_ops { int (*set_trips) (struct thermal_zone_device *, int, int); int (*change_mode) (struct thermal_zone_device *, enum thermal_device_mode); - int (*get_trip_type) (struct thermal_zone_device *, int, - enum thermal_trip_type *); - int (*get_trip_temp) (struct thermal_zone_device *, int, int *); int (*set_trip_temp) (struct thermal_zone_device *, int, int); - int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); - int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); - int (*get_trend) (struct thermal_zone_device *, int, - enum thermal_trend *); + int (*get_trend) (struct thermal_zone_device *, + const struct thermal_trip *, enum thermal_trend *); void (*hot)(struct thermal_zone_device *); void (*critical)(struct thermal_zone_device *); }; -/** - * struct thermal_trip - representation of a point in temperature domain - * @temperature: temperature value in miliCelsius - * @hysteresis: relative hysteresis in miliCelsius - * @type: trip point type - */ -struct thermal_trip { - int temperature; - int hysteresis; - enum thermal_trip_type type; -}; - struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); @@ -99,7 +110,7 @@ struct thermal_cooling_device_ops { struct thermal_cooling_device { int id; - char *type; + const char *type; unsigned long max_state; struct device device; struct device_node *np; @@ -110,6 +121,9 @@ struct thermal_cooling_device { struct mutex lock; /* protect thermal_instances list */ struct list_head thermal_instances; struct list_head node; +#ifdef CONFIG_THERMAL_DEBUGFS + struct thermal_debugfs *debugfs; +#endif }; /** @@ -117,14 +131,13 @@ struct thermal_cooling_device { * @id: unique id number for each thermal zone * @type: the thermal zone device type * @device: &struct device for this thermal zone + * @removal: removal completion * @trip_temp_attrs: attributes for trip points for sysfs: trip temperature * @trip_type_attrs: attributes for trip points for sysfs: trip type * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis * @mode: current mode of this thermal zone * @devdata: private pointer for device private data - * @trips: an array of struct thermal_trip * @num_trips: number of trip points the thermal zone supports - * @trips_disabled; bitmap for disabled trips * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. * @polling_delay_jiffies: number of jiffies to wait between polls when @@ -152,20 +165,21 @@ struct thermal_cooling_device { * @node: node in thermal_tz_list (in thermal_core.c) * @poll_queue: delayed work for polling * @notify_event: Last notification event + * @suspended: thermal zone suspend indicator + * @trips: array of struct thermal_trip objects */ struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; struct device device; + struct completion removal; struct attribute_group trips_attribute_group; struct thermal_attr *trip_temp_attrs; struct thermal_attr *trip_type_attrs; struct thermal_attr *trip_hyst_attrs; enum thermal_device_mode mode; void *devdata; - struct thermal_trip *trips; int num_trips; - unsigned long trips_disabled; /* bitmap for disabled trips */ unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; int temperature; @@ -175,7 +189,7 @@ struct thermal_zone_device { int prev_low_trip; int prev_high_trip; atomic_t need_update; - struct thermal_zone_device_ops *ops; + struct thermal_zone_device_ops ops; struct thermal_zone_params *tzp; struct thermal_governor *governor; void *governor_data; @@ -185,6 +199,11 @@ struct thermal_zone_device { struct list_head node; struct delayed_work poll_queue; enum thermal_notify_event notify_event; + bool suspended; +#ifdef CONFIG_THERMAL_DEBUGFS + struct thermal_debugfs *debugfs; +#endif + struct thermal_trip trips[] __counted_by(num_trips); }; /** @@ -197,19 +216,24 @@ struct thermal_zone_device { * thermal zone. * @throttle: callback called for every trip point even if temperature is * below the trip point temperature + * @update_tz: callback called when thermal zone internals have changed, e.g. + * thermal cooling instance was added/removed * @governor_list: node in thermal_governor_list (in thermal_core.c) */ struct thermal_governor { - char name[THERMAL_NAME_LENGTH]; + const char *name; int (*bind_to_tz)(struct thermal_zone_device *tz); void (*unbind_from_tz)(struct thermal_zone_device *tz); - int (*throttle)(struct thermal_zone_device *tz, int trip); + int (*throttle)(struct thermal_zone_device *tz, + const struct thermal_trip *trip); + void (*update_tz)(struct thermal_zone_device *tz, + enum thermal_notify_event reason); struct list_head governor_list; }; /* Structure to define Thermal Zone parameters */ struct thermal_zone_params { - char governor_name[THERMAL_NAME_LENGTH]; + const char *governor_name; /* * a boolean to indicate if the thermal to hwmon sysfs interface @@ -283,42 +307,52 @@ int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip); int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip); - -int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id, - const struct thermal_trip *trip); - +int for_each_thermal_trip(struct thermal_zone_device *tz, + int (*cb)(struct thermal_trip *, void *), + void *data); +int thermal_zone_for_each_trip(struct thermal_zone_device *tz, + int (*cb)(struct thermal_trip *, void *), + void *data); int thermal_zone_get_num_trips(struct thermal_zone_device *tz); +void thermal_zone_set_trip_temp(struct thermal_zone_device *tz, + struct thermal_trip *trip, int temp); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); -#ifdef CONFIG_THERMAL_ACPI -int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp); -int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp); -int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp); -int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); -#endif - #ifdef CONFIG_THERMAL -struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, - void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); - -void thermal_zone_device_unregister(struct thermal_zone_device *); - -struct thermal_zone_device * -thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int, - void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); +struct thermal_zone_device *thermal_zone_device_register_with_trips( + const char *type, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay); + +struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + const struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp); + +void thermal_zone_device_unregister(struct thermal_zone_device *tz); void *thermal_zone_device_priv(struct thermal_zone_device *tzd); const char *thermal_zone_device_type(struct thermal_zone_device *tzd); int thermal_zone_device_id(struct thermal_zone_device *tzd); struct device *thermal_zone_device(struct thermal_zone_device *tzd); +int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev, + unsigned long upper, unsigned long lower, + unsigned int weight); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, unsigned int); +int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, @@ -345,15 +379,25 @@ int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); #else -static inline struct thermal_zone_device *thermal_zone_device_register( - const char *type, int trips, int mask, void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay) +static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( + const char *type, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } -static inline void thermal_zone_device_unregister( - struct thermal_zone_device *tz) + +static inline struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp) +{ return ERR_PTR(-ENODEV); } + +static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } + static inline struct thermal_cooling_device * thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 02333f47c994..4338ea9ac4fd 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -86,9 +86,9 @@ struct tb { unsigned long privdata[]; }; -extern struct bus_type tb_bus_type; -extern struct device_type tb_service_type; -extern struct device_type tb_xdomain_type; +extern const struct bus_type tb_bus_type; +extern const struct device_type tb_service_type; +extern const struct device_type tb_xdomain_type; #define TB_LINKS_PER_PHY_PORT 2 @@ -175,7 +175,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * enum tb_link_width - Thunderbolt/USB4 link width * @TB_LINK_WIDTH_SINGLE: Single lane link * @TB_LINK_WIDTH_DUAL: Dual lane symmetric link - * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 trasmitters + * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 transmitters * @TB_LINK_WIDTH_ASYM_RX: Dual lane asymmetric Gen 4 link with 3 receivers */ enum tb_link_width { diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index 44a7f9169ac6..10642d4844f0 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -271,7 +271,7 @@ long st_kim_stop(void *); void st_kim_complete(void *); void kim_st_list_protocols(struct st_data_s *, void *); -void st_kim_recv(void *, const unsigned char *, long); +void st_kim_recv(void *disc_data, const u8 *data, size_t count); /* diff --git a/include/linux/tick.h b/include/linux/tick.h index 9459fef5b857..4924a33700b7 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -12,6 +12,7 @@ #include <linux/cpumask.h> #include <linux/sched.h> #include <linux/rcupdate.h> +#include <linux/static_key.h> #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); @@ -19,16 +20,22 @@ extern void __init tick_init(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); -extern void tick_handover_do_timer(void); extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } -static inline void tick_handover_do_timer(void) { } static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) +extern int tick_cpu_dying(unsigned int cpu); +extern void tick_assert_timekeeping_handover(void); +#else +#define tick_cpu_dying NULL +static inline void tick_assert_timekeeping_handover(void) { } +#endif + #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) extern void tick_freeze(void); extern void tick_unfreeze(void); @@ -63,18 +70,14 @@ enum tick_broadcast_state { TICK_BROADCAST_ENTER, }; +extern struct static_key_false arch_needs_tick_broadcast; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_control(enum tick_broadcast_mode mode); #else static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU) -extern void tick_offline_cpu(unsigned int cpu); -#else -static inline void tick_offline_cpu(unsigned int cpu) { } -#endif - #ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); #else @@ -140,14 +143,6 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -static inline void tick_nohz_idle_stop_tick_protected(void) -{ - local_irq_disable(); - tick_nohz_idle_stop_tick(); - local_irq_enable(); -} - #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } @@ -170,13 +165,18 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } - -static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ +/* + * Mask of CPUs that are nohz_full. + * + * Users should be guarded by CONFIG_NO_HZ_FULL or a tick_nohz_full_cpu() + * check. + */ +extern cpumask_var_t tick_nohz_full_mask; + #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; -extern cpumask_var_t tick_nohz_full_mask; static inline bool tick_nohz_full_enabled(void) { diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 03d9c5ac01d1..876e31b4461d 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -7,10 +7,13 @@ #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/err.h> +#include <linux/time64.h> struct user_namespace; extern struct user_namespace init_user_ns; +struct vm_area_struct; + struct timens_offsets { struct timespec64 monotonic; struct timespec64 boottime; diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index c6540ceea143..0982d1d52b24 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -22,7 +22,7 @@ * * @read: returns the current cycle value * @mask: bitmask for two's complement - * subtraction of non 64 bit counters, + * subtraction of non-64-bit counters, * see CYCLECOUNTER_MASK() helper macro * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) @@ -35,7 +35,7 @@ struct cyclecounter { }; /** - * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds + * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds * Contains the state needed by timecounter_read() to detect * cycle counter wrap around. Initialize with * timecounter_init(). Also used to convert cycle counts into the @@ -66,6 +66,8 @@ struct timecounter { * @cycles: Cycles * @mask: bit mask for maintaining the 'frac' field * @frac: pointer to storage for the fractional nanoseconds. + * + * Returns: cycle counter cycles converted to nanoseconds */ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, u64 cycles, u64 mask, u64 *frac) @@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, /** * timecounter_adjtime - Shifts the time of the clock. + * @tc: The &struct timecounter to adjust * @delta: Desired change in nanoseconds. */ static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) @@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc, * * In other words, keeps track of time since the same epoch as * the function which generated the initial time stamp. + * + * Returns: nanoseconds since the initial time stamp */ extern u64 timecounter_read(struct timecounter *tc); @@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc); * * This allows conversion of cycle counter values which were generated * in the past. + * + * Returns: cycle counter converted to nanoseconds since the initial time stamp */ extern u64 timecounter_cyc2time(const struct timecounter *tc, u64 cycle_tstamp); diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fe1e467ba046..0ea7823b7f31 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -4,6 +4,7 @@ #include <linux/errno.h> #include <linux/clocksource_ids.h> +#include <linux/ktime.h> /* Included from linux/ktime.h */ @@ -21,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); /* - * ktime_get() family: read the current time in a multitude of ways, + * ktime_get() family - read the current time in a multitude of ways. * * The default time reference is CLOCK_MONOTONIC, starting at * boot time but not counting the time spent in suspend. * For other references, use the functions with "real", "clocktai", * "boottime" and "raw" suffixes. * - * To get the time in a different format, use the ones wit + * To get the time in a different format, use the ones with * "ns", "ts64" and "seconds" suffix. * * See Documentation/core-api/timekeeping.rst for more details. @@ -73,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format + * + * Returns: real (wall) time in ktime_t format */ static inline ktime_t ktime_get_real(void) { @@ -85,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void) } /** - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format + * ktime_get_boottime - Get monotonic time since boot in ktime_t format * * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the * time spent in suspend. + * + * Returns: monotonic time since boot in ktime_t format */ static inline ktime_t ktime_get_boottime(void) { @@ -101,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void) } /** - * ktime_get_clocktai - Returns the TAI time of day in ktime_t format + * ktime_get_clocktai - Get the TAI time of day in ktime_t format + * + * Returns: the TAI time of day in ktime_t format */ static inline ktime_t ktime_get_clocktai(void) { @@ -143,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void) /** * ktime_mono_to_real - Convert monotonic time to clock realtime + * @mono: monotonic time to convert + * + * Returns: time converted to realtime clock */ static inline ktime_t ktime_mono_to_real(ktime_t mono) { return ktime_mono_to_any(mono, TK_OFFS_REAL); } +/** + * ktime_get_ns - Get the current time in nanoseconds + * + * Returns: current time converted to nanoseconds + */ static inline u64 ktime_get_ns(void) { return ktime_to_ns(ktime_get()); } +/** + * ktime_get_real_ns - Get the current real/wall time in nanoseconds + * + * Returns: current real time converted to nanoseconds + */ static inline u64 ktime_get_real_ns(void) { return ktime_to_ns(ktime_get_real()); } +/** + * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds + * + * Returns: current boottime converted to nanoseconds + */ static inline u64 ktime_get_boottime_ns(void) { return ktime_to_ns(ktime_get_boottime()); } +/** + * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds + * + * Returns: current TAI time converted to nanoseconds + */ static inline u64 ktime_get_clocktai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); } +/** + * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds + * + * Returns: current raw monotonic time converted to nanoseconds + */ static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); @@ -223,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* - * struct ktime_timestanps - Simultaneous mono/boot/real timestamps +/** + * struct ktime_timestamps - Simultaneous mono/boot/real timestamps * @mono: Monotonic timestamp * @boot: Boottime timestamp * @real: Realtime timestamp @@ -241,7 +276,8 @@ struct ktime_timestamps { * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time - * @clock_was_set_seq: The sequence number of clock was set events + * @cs_id: Clocksource ID + * @clock_was_set_seq: The sequence number of clock-was-set events * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { @@ -267,15 +303,17 @@ struct system_device_crosststamp { }; /** - * struct system_counterval_t - system counter value with the pointer to the + * struct system_counterval_t - system counter value with the ID of the * corresponding clocksource * @cycles: System counter value - * @cs: Clocksource corresponding to system counter value. Used by - * timekeeping code to verify comparibility of two cycle values + * @cs_id: Clocksource ID corresponding to system counter value. Used by + * timekeeping code to verify comparability of two cycle values. + * The default ID, CSID_GENERIC, does not identify a specific + * clocksource. */ struct system_counterval_t { u64 cycles; - struct clocksource *cs; + enum clocksource_ids cs_id; }; /* diff --git a/include/linux/timer.h b/include/linux/timer.h index 9162f275819a..e67ecd1cbc97 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -7,21 +7,7 @@ #include <linux/stddef.h> #include <linux/debugobjects.h> #include <linux/stringify.h> - -struct timer_list { - /* - * All fields that change during normal runtime grouped to the - * same cacheline - */ - struct hlist_node entry; - unsigned long expires; - void (*function)(struct timer_list *); - u32 flags; - -#ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; -#endif -}; +#include <linux/timer_types.h> #ifdef CONFIG_LOCKDEP /* @@ -36,7 +22,7 @@ struct timer_list { #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif -/** +/* * @TIMER_DEFERRABLE: A deferrable timer will work normally when the * system is busy, but will not cause a CPU to come out of idle just * to service it; instead, the timer will be serviced when the CPU @@ -50,16 +36,10 @@ struct timer_list { * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! * - * @TIMER_PINNED: A pinned timer will not be affected by any timer - * placement heuristics (like, NOHZ) and will always expire on the CPU - * on which the timer was enqueued. - * - * Note: Because enqueuing of timers can migrate the timer from one - * CPU to another, pinned timers are not guaranteed to stay on the - * initialy selected CPU. They move to the CPU on which the enqueue - * function is invoked via mod_timer() or add_timer(). If the timer - * should be placed on a particular CPU, then add_timer_on() has to be - * used. + * @TIMER_PINNED: A pinned timer will always expire on the CPU on which the + * timer was enqueued. When a particular CPU is required, add_timer_on() + * has to be used. Enqueue via mod_timer() and add_timer() is always done + * on the local CPU. */ #define TIMER_CPUMASK 0x0003FFFF #define TIMER_MIGRATING 0x00040000 @@ -77,8 +57,7 @@ struct timer_list { .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .flags = (_flags), \ - __TIMER_LOCKDEP_MAP_INITIALIZER( \ - __FILE__ ":" __stringify(__LINE__)) \ + __TIMER_LOCKDEP_MAP_INITIALIZER(FILE_LINE) \ } #define DEFINE_TIMER(_name, _function) \ @@ -161,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { } * or not. Callers must ensure serialization wrt. other operations done * to this timer, eg. interrupt contexts, or other CPUs on SMP. * - * return value: 1 if the timer is pending, 0 if not. + * Returns: 1 if the timer is pending, 0 if not. */ static inline int timer_pending(const struct timer_list * timer) { @@ -180,6 +159,8 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) extern void add_timer(struct timer_list *timer); +extern void add_timer_local(struct timer_list *timer); +extern void add_timer_global(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); extern int timer_delete_sync(struct timer_list *timer); @@ -194,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer); * See timer_delete_sync() for detailed explanation. * * Do not use in new code. Use timer_delete_sync() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer_sync(struct timer_list *timer) { @@ -207,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer) * See timer_delete() for detailed explanation. * * Do not use in new code. Use timer_delete() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer(struct timer_list *timer) { diff --git a/include/linux/timer_types.h b/include/linux/timer_types.h new file mode 100644 index 000000000000..fae5a388f914 --- /dev/null +++ b/include/linux/timer_types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TIMER_TYPES_H +#define _LINUX_TIMER_TYPES_H + +#include <linux/lockdep_types.h> +#include <linux/types.h> + +struct timer_list { + /* + * All fields that change during normal runtime grouped to the + * same cacheline + */ + struct hlist_node entry; + unsigned long expires; + void (*function)(struct timer_list *); + u32 flags; + +#ifdef CONFIG_LOCKDEP + struct lockdep_map lockdep_map; +#endif +}; + +#endif /* _LINUX_TIMER_TYPES_H */ diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index adc80e29168e..62973f7d4610 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -3,18 +3,7 @@ #define _LINUX_TIMERQUEUE_H #include <linux/rbtree.h> -#include <linux/ktime.h> - - -struct timerqueue_node { - struct rb_node node; - ktime_t expires; -}; - -struct timerqueue_head { - struct rb_root_cached rb_root; -}; - +#include <linux/timerqueue_types.h> extern bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node); diff --git a/include/linux/timerqueue_types.h b/include/linux/timerqueue_types.h new file mode 100644 index 000000000000..dc298d0923e3 --- /dev/null +++ b/include/linux/timerqueue_types.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TIMERQUEUE_TYPES_H +#define _LINUX_TIMERQUEUE_TYPES_H + +#include <linux/rbtree_types.h> +#include <linux/types.h> + +struct timerqueue_node { + struct rb_node node; + ktime_t expires; +}; + +struct timerqueue_head { + struct rb_root_cached rb_root; +}; + +#endif /* _LINUX_TIMERQUEUE_TYPES_H */ diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 1c3948a1d6ad..3c13240077b8 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -106,6 +106,10 @@ int tnum_sbin(char *str, size_t size, struct tnum a); struct tnum tnum_subreg(struct tnum a); /* Returns the tnum with the lower 32-bit subreg cleared */ struct tnum tnum_clear_subreg(struct tnum a); +/* Returns the tnum with the lower 32-bit subreg in *reg* set to the lower + * 32-bit subreg in *subreg* + */ +struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg); /* Returns the tnum with the lower 32-bit subreg set to value */ struct tnum tnum_const_subreg(struct tnum a, u32 value); /* Returns true if 32-bit subreg @a is a known constant*/ diff --git a/include/linux/topology.h b/include/linux/topology.h index fea32377f7c7..52f5850730b3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -251,7 +251,7 @@ extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int #else static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) { - return cpumask_nth(cpu, cpus); + return cpumask_nth_and(cpu, cpus, cpu_online_mask); } static inline const struct cpumask * diff --git a/include/linux/torture.h b/include/linux/torture.h index 7038104463e4..1541454da03e 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -21,6 +21,7 @@ #include <linux/debugobjects.h> #include <linux/bug.h> #include <linux/compiler.h> +#include <linux/hrtimer.h> /* Definitions for a non-string torture-test module parameter. */ #define torture_param(type, name, init, msg) \ @@ -81,7 +82,8 @@ static inline void torture_random_init(struct torture_random_state *trsp) } /* Definitions for high-resolution-timer sleeps. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp); +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp); int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp); int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp); int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp); @@ -108,19 +110,27 @@ bool torture_must_stop(void); bool torture_must_stop_irq(void); void torture_kthread_stopping(char *title); int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, - char *f, struct task_struct **tp); + char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp)); void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_create_kthread(n, arg, tp) \ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \ - "Failed to create " #n, &(tp)) + "Failed to create " #n, &(tp), NULL) +#define torture_create_kthread_cb(n, arg, tp, cbf) \ + _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \ + "Failed to create " #n, &(tp), cbf) #define torture_stop_kthread(n, tp) \ _torture_stop_kthread("Stopping " #n " task", &(tp)) +/* Scheduler-related definitions. */ #ifdef CONFIG_PREEMPTION #define torture_preempt_schedule() __preempt_schedule() #else #define torture_preempt_schedule() do { } while (0) #endif +#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +#endif + #endif /* __LINUX_TORTURE_H */ diff --git a/include/linux/trace.h b/include/linux/trace.h index 2a70a447184c..fdcd76b7be83 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -51,7 +51,7 @@ int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...); int trace_array_init_printk(struct trace_array *tr); void trace_array_put(struct trace_array *tr); -struct trace_array *trace_array_get_by_name(const char *name); +struct trace_array *trace_array_get_by_name(const char *name, const char *systems); int trace_array_destroy(struct trace_array *tr); /* For osnoise tracer */ @@ -84,7 +84,7 @@ static inline int trace_array_init_printk(struct trace_array *tr) static inline void trace_array_put(struct trace_array *tr) { } -static inline struct trace_array *trace_array_get_by_name(const char *name) +static inline struct trace_array *trace_array_get_by_name(const char *name, const char *systems) { return NULL; } diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 1e8bbdb8da90..6f9bdfb09d1d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -17,6 +17,9 @@ struct dentry; struct bpf_prog; union bpf_attr; +/* Used for event string fields when they are NULL */ +#define EVENT_NULL_STR "(null)" + const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); @@ -62,13 +65,13 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* Used to find the offset and length of dynamic fields in trace events */ struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN - u16 offset; u16 len; + u16 offset; #else - u16 len; u16 offset; + u16 len; #endif -}; +} __packed; /* * The trace entry - the most basic unit of tracing. This is what @@ -103,13 +106,16 @@ struct trace_iterator { unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; - long wait_index; + atomic_t wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; cpumask_var_t started; + /* Set when the file is closed to prevent new waiters */ + bool closed; + /* it's true when current open file is snapshot */ bool snapshot; @@ -492,6 +498,7 @@ enum { EVENT_FILE_FL_TRIGGER_COND_BIT, EVENT_FILE_FL_PID_FILTER_BIT, EVENT_FILE_FL_WAS_ENABLED_BIT, + EVENT_FILE_FL_FREED_BIT, }; extern struct trace_event_file *trace_get_event_file(const char *instance, @@ -630,6 +637,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...); * TRIGGER_COND - When set, one or more triggers has an associated filter * PID_FILTER - When set, the event is filtered based on pid * WAS_ENABLED - Set when enabled to know to clear trace on module removal + * FREED - File descriptor is freed, all fields should be considered invalid */ enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), @@ -643,13 +651,14 @@ enum { EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT), + EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT), }; struct trace_event_file { struct list_head list; struct trace_event_call *event_call; struct event_filter __rcu *filter; - struct dentry *dir; + struct eventfs_inode *ei; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; @@ -671,6 +680,7 @@ struct trace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; + atomic_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; @@ -761,8 +771,10 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -800,7 +812,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -809,6 +821,11 @@ bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EOPNOTSUPP; } +static inline int +bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif enum { @@ -818,6 +835,7 @@ enum { FILTER_RDYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, + FILTER_CPUMASK, FILTER_COMM, FILTER_CPU, FILTER_STACKTRACE, @@ -870,6 +888,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS @@ -878,7 +897,8 @@ extern int perf_uprobe_init(struct perf_event *event, extern void perf_uprobe_destroy(struct perf_event *event); extern int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, const char **filename, - u64 *probe_offset, bool perf_type_tracepoint); + u64 *probe_offset, u64 *probe_addr, + bool perf_type_tracepoint); #endif extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 6be92bf559fe..1ef95c0287f0 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -8,20 +8,31 @@ /* * Trace sequences are used to allow a function to call several other functions - * to create a string of data to use (up to a max of PAGE_SIZE). + * to create a string of data to use. + * + * Have the trace seq to be 8K which is typically PAGE_SIZE * 2 on + * most architectures. The TRACE_SEQ_BUFFER_SIZE (which is + * TRACE_SEQ_SIZE minus the other fields of trace_seq), is the + * max size the output of a trace event may be. */ +#define TRACE_SEQ_SIZE 8192 +#define TRACE_SEQ_BUFFER_SIZE (TRACE_SEQ_SIZE - \ + (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int))) + struct trace_seq { - char buffer[PAGE_SIZE]; + char buffer[TRACE_SEQ_BUFFER_SIZE]; struct seq_buf seq; + size_t readpos; int full; }; static inline void trace_seq_init(struct trace_seq *s) { - seq_buf_init(&s->seq, s->buffer, PAGE_SIZE); + seq_buf_init(&s->seq, s->buffer, TRACE_SEQ_BUFFER_SIZE); s->full = 0; + s->readpos = 0; } /** diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 99912445974c..d03f74658716 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -21,6 +21,75 @@ struct file_operations; #ifdef CONFIG_TRACING +struct eventfs_file; + +/** + * eventfs_callback - A callback function to create dynamic files in eventfs + * @name: The name of the file that is to be created + * @mode: return the file mode for the file (RW access, etc) + * @data: data to pass to the created file ops + * @fops: the file operations of the created file + * + * The evetnfs files are dynamically created. The struct eventfs_entry array + * is passed to eventfs_create_dir() or eventfs_create_events_dir() that will + * be used to create the files within those directories. When a lookup + * or access to a file within the directory is made, the struct eventfs_entry + * array is used to find a callback() with the matching name that is being + * referenced (for lookups, the entire array is iterated and each callback + * will be called). + * + * The callback will be called with @name for the name of the file to create. + * The callback can return less than 1 to indicate that no file should be + * created. + * + * If a file is to be created, then @mode should be populated with the file + * mode (permissions) for which the file is created for. This would be + * used to set the created inode i_mode field. + * + * The @data should be set to the data passed to the other file operations + * (read, write, etc). Note, @data will also point to the data passed in + * to eventfs_create_dir() or eventfs_create_events_dir(), but the callback + * can replace the data if it chooses to. Otherwise, the original data + * will be used for the file operation functions. + * + * The @fops should be set to the file operations that will be used to create + * the inode. + * + * NB. This callback is called while holding internal locks of the eventfs + * system. The callback must not call any code that might also call into + * the tracefs or eventfs system or it will risk creating a deadlock. + */ +typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, + const struct file_operations **fops); + +typedef void (*eventfs_release)(const char *name, void *data); + +/** + * struct eventfs_entry - dynamically created eventfs file call back handler + * @name: Then name of the dynamic file in an eventfs directory + * @callback: The callback to get the fops of the file when it is created + * + * See evenfs_callback() typedef for how to set up @callback. + */ +struct eventfs_entry { + const char *name; + eventfs_callback callback; + eventfs_release release; +}; + +struct eventfs_inode; + +struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, + const struct eventfs_entry *entries, + int size, void *data); + +struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, + const struct eventfs_entry *entries, + int size, void *data); + +void eventfs_remove_events_dir(struct eventfs_inode *ei); +void eventfs_remove_dir(struct eventfs_inode *ei); + struct dentry *tracefs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 88c0ba623ee6..689b6d71590e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -199,7 +199,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (!(cond)) \ return; \ \ - if (WARN_ON_ONCE(RCUIDLE_COND(rcuidle))) \ + if (WARN_ONCE(RCUIDLE_COND(rcuidle), \ + "Bad RCU usage for tracepoint")) \ return; \ \ /* keep srcu and sched-rcu usage consistent */ \ @@ -259,7 +260,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) TP_ARGS(args), \ TP_CONDITION(cond), 0); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ - WARN_ON_ONCE(!rcu_is_watching()); \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ diff --git a/include/linux/tsm.h b/include/linux/tsm.h new file mode 100644 index 000000000000..de8324a2223c --- /dev/null +++ b/include/linux/tsm.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TSM_H +#define __TSM_H + +#include <linux/sizes.h> +#include <linux/types.h> + +#define TSM_INBLOB_MAX 64 +#define TSM_OUTBLOB_MAX SZ_32K + +/* + * Privilege level is a nested permission concept to allow confidential + * guests to partition address space, 4-levels are supported. + */ +#define TSM_PRIVLEVEL_MAX 3 + +/** + * struct tsm_desc - option descriptor for generating tsm report blobs + * @privlevel: optional privilege level to associate with @outblob + * @inblob_len: sizeof @inblob + * @inblob: arbitrary input data + */ +struct tsm_desc { + unsigned int privlevel; + size_t inblob_len; + u8 inblob[TSM_INBLOB_MAX]; +}; + +/** + * struct tsm_report - track state of report generation relative to options + * @desc: input parameters to @report_new() + * @outblob_len: sizeof(@outblob) + * @outblob: generated evidence to provider to the attestation agent + * @auxblob_len: sizeof(@auxblob) + * @auxblob: (optional) auxiliary data to the report (e.g. certificate data) + */ +struct tsm_report { + struct tsm_desc desc; + size_t outblob_len; + u8 *outblob; + size_t auxblob_len; + u8 *auxblob; +}; + +/** + * struct tsm_ops - attributes and operations for tsm instances + * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider + * @privlevel_floor: convey base privlevel for nested scenarios + * @report_new: Populate @report with the report blob and auxblob + * (optional), return 0 on successful population, or -errno otherwise + * + * Implementation specific ops, only one is expected to be registered at + * a time i.e. only one of "sev-guest", "tdx-guest", etc. + */ +struct tsm_ops { + const char *name; + const unsigned int privlevel_floor; + int (*report_new)(struct tsm_report *report, void *data); +}; + +extern const struct config_item_type tsm_report_default_type; + +/* publish @privlevel, @privlevel_floor, and @auxblob attributes */ +extern const struct config_item_type tsm_report_extra_type; + +int tsm_register(const struct tsm_ops *ops, void *priv, + const struct config_item_type *type); +int tsm_unregister(const struct tsm_ops *ops); +#endif /* __TSM_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index e8d5d9997aca..2b2e6f0a54d6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -6,7 +6,6 @@ #include <linux/major.h> #include <linux/termios.h> #include <linux/workqueue.h> -#include <linux/tty_buffer.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> #include <linux/tty_port.h> @@ -192,13 +191,14 @@ struct tty_operations; */ struct tty_struct { struct kref kref; + int index; struct device *dev; struct tty_driver *driver; + struct tty_port *port; const struct tty_operations *ops; - int index; - struct ld_semaphore ldisc_sem; struct tty_ldisc *ldisc; + struct ld_semaphore ldisc_sem; struct mutex atomic_write_lock; struct mutex legacy_mutex; @@ -209,6 +209,7 @@ struct tty_struct { char name[64]; unsigned long flags; int count; + unsigned int receive_room; struct winsize winsize; struct { @@ -219,16 +220,16 @@ struct tty_struct { } __aligned(sizeof(unsigned long)) flow; struct { - spinlock_t lock; struct pid *pgrp; struct pid *session; + spinlock_t lock; unsigned char pktstatus; bool packet; unsigned long unused[0]; } __aligned(sizeof(unsigned long)) ctrl; bool hw_stopped; - unsigned int receive_room; + bool closing; int flow_change; struct tty_struct *link; @@ -239,15 +240,13 @@ struct tty_struct { void *disc_data; void *driver_data; spinlock_t files_lock; + int write_cnt; + u8 *write_buf; + struct list_head tty_files; #define N_TTY_BUF_SIZE 4096 - - int closing; - unsigned char *write_buf; - int write_cnt; struct work_struct SAK_work; - struct tty_port *port; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ @@ -390,14 +389,14 @@ int vcs_init(void); extern const struct class tty_class; /** - * tty_kref_get - get a tty reference - * @tty: tty device + * tty_kref_get - get a tty reference + * @tty: tty device * - * Return a new reference to a tty object. The caller must hold - * sufficient locks/counts to ensure that their existing reference cannot - * go away + * Returns: a new reference to a tty object + * + * Locking: The caller must hold sufficient locks/counts to ensure that their + * existing reference cannot go away. */ - static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) @@ -410,17 +409,18 @@ void tty_wait_until_sent(struct tty_struct *tty, long timeout); void stop_tty(struct tty_struct *tty); void start_tty(struct tty_struct *tty); void tty_write_message(struct tty_struct *tty, char *msg); -int tty_send_xchar(struct tty_struct *tty, char ch); -int tty_put_char(struct tty_struct *tty, unsigned char c); +int tty_send_xchar(struct tty_struct *tty, u8 ch); +int tty_put_char(struct tty_struct *tty, u8 c); unsigned int tty_chars_in_buffer(struct tty_struct *tty); unsigned int tty_write_room(struct tty_struct *tty); void tty_driver_flush_buffer(struct tty_struct *tty); void tty_unthrottle(struct tty_struct *tty); -int tty_throttle_safe(struct tty_struct *tty); -int tty_unthrottle_safe(struct tty_struct *tty); +bool tty_throttle_safe(struct tty_struct *tty); +bool tty_unthrottle_safe(struct tty_struct *tty); int tty_do_resize(struct tty_struct *tty, struct winsize *ws); int tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount); +int tty_get_tiocm(struct tty_struct *tty); int is_current_pgrp_orphaned(void); void tty_hangup(struct tty_struct *tty); void tty_vhangup(struct tty_struct *tty); @@ -435,16 +435,14 @@ void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** - * tty_get_baud_rate - get tty bit rates - * @tty: tty to query + * tty_get_baud_rate - get tty bit rates + * @tty: tty to query * - * Returns the baud rate as an integer for this terminal. The - * termios lock must be held by the caller and the terminal bit - * flags may be updated. + * Returns: the baud rate as an integer for this terminal * - * Locking: none + * Locking: The termios lock must be held by the caller. */ -static inline speed_t tty_get_baud_rate(struct tty_struct *tty) +static inline speed_t tty_get_baud_rate(const struct tty_struct *tty) { return tty_termios_baud_rate(&tty->termios); } diff --git a/include/linux/tty_buffer.h b/include/linux/tty_buffer.h index 6ceb2789e6c8..31125e3be3c5 100644 --- a/include/linux/tty_buffer.h +++ b/include/linux/tty_buffer.h @@ -12,24 +12,24 @@ struct tty_buffer { struct tty_buffer *next; struct llist_node free; }; - int used; - int size; - int commit; - int lookahead; /* Lazy update on recv, can become less than "read" */ - int read; + unsigned int used; + unsigned int size; + unsigned int commit; + unsigned int lookahead; /* Lazy update on recv, can become less than "read" */ + unsigned int read; bool flags; /* Data points here */ - unsigned long data[]; + u8 data[] __aligned(sizeof(unsigned long)); }; -static inline unsigned char *char_buf_ptr(struct tty_buffer *b, int ofs) +static inline u8 *char_buf_ptr(struct tty_buffer *b, unsigned int ofs) { - return ((unsigned char *)b->data) + ofs; + return b->data + ofs; } -static inline char *flag_buf_ptr(struct tty_buffer *b, int ofs) +static inline u8 *flag_buf_ptr(struct tty_buffer *b, unsigned int ofs) { - return (char *)char_buf_ptr(b, ofs) + b->size; + return char_buf_ptr(b, ofs) + b->size; } struct tty_bufhead { diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index e00034118c7b..7372124fbf90 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -72,8 +72,7 @@ struct serial_struct; * is closed for the last time freeing up the resources. This is * actually the second part of shutdown for routines that might sleep. * - * @write: ``int ()(struct tty_struct *tty, const unsigned char *buf, - * int count)`` + * @write: ``ssize_t ()(struct tty_struct *tty, const u8 *buf, size_t count)`` * * This routine is called by the kernel to write a series (@count) of * characters (@buf) to the @tty device. The characters may come from @@ -85,7 +84,7 @@ struct serial_struct; * * Optional: Required for writable devices. May not sleep. * - * @put_char: ``int ()(struct tty_struct *tty, unsigned char ch)`` + * @put_char: ``int ()(struct tty_struct *tty, u8 ch)`` * * This routine is called by the kernel to write a single character @ch to * the @tty device. If the kernel uses this routine, it must call the @@ -243,7 +242,7 @@ struct serial_struct; * Optional: If not provided, the device is assumed to have no FIFO. * Usually correct to invoke via tty_wait_until_sent(). May sleep. * - * @send_xchar: ``void ()(struct tty_struct *tty, char ch)`` + * @send_xchar: ``void ()(struct tty_struct *tty, u8 ch)`` * * This routine is used to send a high-priority XON/XOFF character (@ch) * to the @tty device. @@ -356,9 +355,8 @@ struct tty_operations { void (*close)(struct tty_struct * tty, struct file * filp); void (*shutdown)(struct tty_struct *tty); void (*cleanup)(struct tty_struct *tty); - int (*write)(struct tty_struct * tty, - const unsigned char *buf, int count); - int (*put_char)(struct tty_struct *tty, unsigned char ch); + ssize_t (*write)(struct tty_struct *tty, const u8 *buf, size_t count); + int (*put_char)(struct tty_struct *tty, u8 ch); void (*flush_chars)(struct tty_struct *tty); unsigned int (*write_room)(struct tty_struct *tty); unsigned int (*chars_in_buffer)(struct tty_struct *tty); @@ -376,7 +374,7 @@ struct tty_operations { void (*flush_buffer)(struct tty_struct *tty); void (*set_ldisc)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, int timeout); - void (*send_xchar)(struct tty_struct *tty, char ch); + void (*send_xchar)(struct tty_struct *tty, u8 ch); int (*tiocmget)(struct tty_struct *tty); int (*tiocmset)(struct tty_struct *tty, unsigned int set, unsigned int clear); diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index bfaaeee61a05..af4fce98f64e 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -10,17 +10,59 @@ struct tty_ldisc; int tty_buffer_set_limit(struct tty_port *port, int limit); unsigned int tty_buffer_space_avail(struct tty_port *port); int tty_buffer_request_room(struct tty_port *port, size_t size); -int tty_insert_flip_string_flags(struct tty_port *port, - const unsigned char *chars, const char *flags, size_t size); -int tty_insert_flip_string_fixed_flag(struct tty_port *port, - const unsigned char *chars, char flag, size_t size); -int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, - size_t size); +size_t __tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars, + const u8 *flags, bool mutable_flags, + size_t size); +size_t tty_prepare_flip_string(struct tty_port *port, u8 **chars, size_t size); void tty_flip_buffer_push(struct tty_port *port); -int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); -static inline int tty_insert_flip_char(struct tty_port *port, - unsigned char ch, char flag) +/** + * tty_insert_flip_string_fixed_flag - add characters to the tty buffer + * @port: tty port + * @chars: characters + * @flag: flag value for each character + * @size: size + * + * Queue a series of bytes to the tty buffering. All the characters passed are + * marked with the supplied flag. + * + * Returns: the number added. + */ +static inline size_t tty_insert_flip_string_fixed_flag(struct tty_port *port, + const u8 *chars, u8 flag, + size_t size) +{ + return __tty_insert_flip_string_flags(port, chars, &flag, false, size); +} + +/** + * tty_insert_flip_string_flags - add characters to the tty buffer + * @port: tty port + * @chars: characters + * @flags: flag bytes + * @size: size + * + * Queue a series of bytes to the tty buffering. For each character the flags + * array indicates the status of the character. + * + * Returns: the number added. + */ +static inline size_t tty_insert_flip_string_flags(struct tty_port *port, + const u8 *chars, + const u8 *flags, size_t size) +{ + return __tty_insert_flip_string_flags(port, chars, flags, true, size); +} + +/** + * tty_insert_flip_char - add one character to the tty buffer + * @port: tty port + * @ch: character + * @flag: flag byte + * + * Queue a single byte @ch to the tty buffering, with an optional flag. + */ +static inline size_t tty_insert_flip_char(struct tty_port *port, u8 ch, u8 flag) { struct tty_buffer *tb = port->buf.tail; int change; @@ -32,17 +74,17 @@ static inline int tty_insert_flip_char(struct tty_port *port, *char_buf_ptr(tb, tb->used++) = ch; return 1; } - return __tty_insert_flip_char(port, ch, flag); + return __tty_insert_flip_string_flags(port, &ch, &flag, false, 1); } -static inline int tty_insert_flip_string(struct tty_port *port, - const unsigned char *chars, size_t size) +static inline size_t tty_insert_flip_string(struct tty_port *port, + const u8 *chars, size_t size) { return tty_insert_flip_string_fixed_flag(port, chars, TTY_NORMAL, size); } -int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, - const char *f, int count); +size_t tty_ldisc_receive_buf(struct tty_ldisc *ld, const u8 *p, const u8 *f, + size_t count); void tty_buffer_lock_exclusive(struct tty_port *port); void tty_buffer_unlock_exclusive(struct tty_port *port); diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 49dc172dedc7..af01e89074b2 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -71,7 +71,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * call to @receive_buf(). Returning an error will prevent the ldisc from * being attached. * - * Can sleep. + * Optional. Can sleep. * * @close: [TTY] ``void ()(struct tty_struct *tty)`` * @@ -80,7 +80,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * changed to use a new line discipline. At the point of execution no * further users will enter the ldisc code for this tty. * - * Can sleep. + * Optional. Can sleep. * * @flush_buffer: [TTY] ``void ()(struct tty_struct *tty)`` * @@ -88,8 +88,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * input characters it may have queued to be delivered to the user mode * process. It may be called at any point between open and close. * - * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, - * unsigned char *buf, size_t nr)`` + * Optional. + * + * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, u8 *buf, + * size_t nr)`` * * This function is called when the user requests to read from the @tty. * The line discipline will return whatever characters it has buffered up @@ -97,10 +99,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * an %EIO error. Multiple read calls may occur in parallel and the ldisc * must deal with serialization issues. * - * Can sleep. + * Optional: %EIO unless provided. Can sleep. * * @write: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, - * const unsigned char *buf, size_t nr)`` + * const u8 *buf, size_t nr)`` * * This function is called when the user requests to write to the @tty. * The line discipline will deliver the characters to the low-level tty @@ -108,7 +110,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * characters first. If this function is not defined, the user will * receive an %EIO error. * - * Can sleep. + * Optional: %EIO unless provided. Can sleep. * * @ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd, * unsigned long arg)`` @@ -120,6 +122,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * discpline. So a low-level driver can "grab" an ioctl request before * the line discpline has a chance to see it. * + * Optional. + * * @compat_ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd, * unsigned long arg)`` * @@ -130,11 +134,15 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * a pointer to wordsize-sensitive structure belongs here, but most of * ldiscs will happily leave it %NULL. * + * Optional. + * * @set_termios: [TTY] ``void ()(struct tty_struct *tty, const struct ktermios *old)`` * * This function notifies the line discpline that a change has been made * to the termios structure. * + * Optional. + * * @poll: [TTY] ``int ()(struct tty_struct *tty, struct file *file, * struct poll_table_struct *wait)`` * @@ -142,6 +150,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * device. It is solely the responsibility of the line discipline to * handle poll requests. * + * Optional. + * * @hangup: [TTY] ``void ()(struct tty_struct *tty)`` * * Called on a hangup. Tells the discipline that it should cease I/O to @@ -149,10 +159,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * but should wait until any pending driver I/O is completed. No further * calls into the ldisc code will occur. * - * Can sleep. + * Optional. Can sleep. * - * @receive_buf: [DRV] ``void ()(struct tty_struct *tty, - * const unsigned char *cp, const char *fp, int count)`` + * @receive_buf: [DRV] ``void ()(struct tty_struct *tty, const u8 *cp, + * const u8 *fp, size_t count)`` * * This function is called by the low-level tty driver to send characters * received by the hardware to the line discpline for processing. @cp is @@ -161,6 +171,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * character was received with a parity error, etc. @fp may be %NULL to * indicate all data received is %TTY_NORMAL. * + * Optional. + * * @write_wakeup: [DRV] ``void ()(struct tty_struct *tty)`` * * This function is called by the low-level tty driver to signal that line @@ -170,13 +182,17 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * send, please arise a tasklet or workqueue to do the real data transfer. * Do not send data in this hook, it may lead to a deadlock. * + * Optional. + * * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, bool active)`` * * Tells the discipline that the DCD pin has changed its status. Used * exclusively by the %N_PPS (Pulse-Per-Second) line discipline. * - * @receive_buf2: [DRV] ``int ()(struct tty_struct *tty, - * const unsigned char *cp, const char *fp, int count)`` + * Optional. + * + * @receive_buf2: [DRV] ``ssize_t ()(struct tty_struct *tty, const u8 *cp, + * const u8 *fp, size_t count)`` * * This function is called by the low-level tty driver to send characters * received by the hardware to the line discpline for processing. @cp is a @@ -186,8 +202,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * indicate all data received is %TTY_NORMAL. If assigned, prefer this * function for automatic flow control. * - * @lookahead_buf: [DRV] ``void ()(struct tty_struct *tty, - * const unsigned char *cp, const char *fp, int count)`` + * Optional. + * + * @lookahead_buf: [DRV] ``void ()(struct tty_struct *tty, const u8 *cp, + * const u8 *fp, size_t count)`` * * This function is called by the low-level tty driver for characters * not eaten by ->receive_buf() or ->receive_buf2(). It is useful for @@ -198,6 +216,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * same characters (e.g. by skipping the actions for high-priority * characters already handled by ->lookahead_buf()). * + * Optional. + * * @owner: module containting this ldisc (for reference counting) * * This structure defines the interface between the tty line discipline @@ -218,11 +238,10 @@ struct tty_ldisc_ops { int (*open)(struct tty_struct *tty); void (*close)(struct tty_struct *tty); void (*flush_buffer)(struct tty_struct *tty); - ssize_t (*read)(struct tty_struct *tty, struct file *file, - unsigned char *buf, size_t nr, - void **cookie, unsigned long offset); + ssize_t (*read)(struct tty_struct *tty, struct file *file, u8 *buf, + size_t nr, void **cookie, unsigned long offset); ssize_t (*write)(struct tty_struct *tty, struct file *file, - const unsigned char *buf, size_t nr); + const u8 *buf, size_t nr); int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, @@ -235,14 +254,14 @@ struct tty_ldisc_ops { /* * The following routines are called from below. */ - void (*receive_buf)(struct tty_struct *tty, const unsigned char *cp, - const char *fp, int count); + void (*receive_buf)(struct tty_struct *tty, const u8 *cp, + const u8 *fp, size_t count); void (*write_wakeup)(struct tty_struct *tty); void (*dcd_change)(struct tty_struct *tty, bool active); - int (*receive_buf2)(struct tty_struct *tty, const unsigned char *cp, - const char *fp, int count); - void (*lookahead_buf)(struct tty_struct *tty, const unsigned char *cp, - const unsigned char *fp, unsigned int count); + size_t (*receive_buf2)(struct tty_struct *tty, const u8 *cp, + const u8 *fp, size_t count); + void (*lookahead_buf)(struct tty_struct *tty, const u8 *cp, + const u8 *fp, size_t count); struct module *owner; }; diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index edf685a24f7c..1b861f2100b6 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -39,9 +39,10 @@ struct tty_port_operations { }; struct tty_port_client_operations { - int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t); - void (*lookahead_buf)(struct tty_port *port, const unsigned char *cp, - const unsigned char *fp, unsigned int count); + size_t (*receive_buf)(struct tty_port *port, const u8 *cp, const u8 *fp, + size_t count); + void (*lookahead_buf)(struct tty_port *port, const u8 *cp, + const u8 *fp, size_t count); void (*write_wakeup)(struct tty_port *port); }; @@ -113,8 +114,8 @@ struct tty_port { unsigned char console:1; struct mutex mutex; struct mutex buf_mutex; - unsigned char *xmit_buf; - DECLARE_KFIFO_PTR(xmit_fifo, unsigned char); + u8 *xmit_buf; + DECLARE_KFIFO_PTR(xmit_fifo, u8); unsigned int close_delay; unsigned int closing_wait; int drain_delay; @@ -148,10 +149,10 @@ struct device *tty_port_register_device_attr(struct tty_port *port, const struct attribute_group **attr_grp); struct device *tty_port_register_device_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, - struct device *device); + struct device *host, struct device *parent); struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, - struct device *device, void *drvdata, + struct device *host, struct device *parent, void *drvdata, const struct attribute_group **attr_grp); void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index); diff --git a/include/linux/types.h b/include/linux/types.h index 253168bb3fe1..2bc8766ba20c 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -120,6 +120,9 @@ typedef s64 int64_t; #define aligned_be64 __aligned_be64 #define aligned_le64 __aligned_le64 +/* Nanosecond scalar representation for kernel time values */ +typedef s64 ktime_t; + /** * The type used for indexing onto a disc or disc partition. * diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index ffe48e69b3f3..457879938fc1 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p) p->v++; } -static inline void u64_stats_init(struct u64_stats_sync *syncp) -{ - seqcount_init(&syncp->seq); -} +#define u64_stats_init(syncp) \ + do { \ + struct u64_stats_sync *__s = (syncp); \ + seqcount_init(&__s->seq); \ + } while (0) static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cf3ada3e820e..c499ae809c7d 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h @@ -10,6 +10,7 @@ typedef u16 ucs2_char_t; unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength); unsigned long ucs2_strlen(const ucs2_char_t *s); unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); +ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count); int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); unsigned long ucs2_utf8size(const ucs2_char_t *src); diff --git a/include/linux/udp.h b/include/linux/udp.h index 43c1fb2d2c21..e398e1dbd2d3 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) return (num + net_hash_mix(net)) & mask; } +enum { + UDP_FLAGS_CORK, /* Cork is required */ + UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ + UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ + UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ + UDP_FLAGS_ACCEPT_FRAGLIST, + UDP_FLAGS_ACCEPT_L4, + UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ + UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ + UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node + + unsigned long udp_flags; + int pending; /* Any pending frames ? */ - unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1, /* This socket enabled encap - * processing; UDP tunnels and - * different encapsulation layer set - * this - */ - gro_enabled:1, /* Request GRO aggregation */ - accept_udp_l4:1, - accept_udp_fraglist:1; + /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -62,12 +67,6 @@ struct udp_sock { */ __u16 pcslen; __u16 pcrlen; -/* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ - __u8 unused[3]; /* * For encapsulation sockets. */ @@ -93,30 +92,51 @@ struct udp_sock { /* This fields follows rcvbuf value, and is touched by udp_recvmsg */ int forward_threshold; + + /* Cache friendly copy of sk->sk_peek_off >= 0 */ + bool peeking_with_offset; }; -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define udp_test_bit(nr, sk) \ + test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_set_bit(nr, sk) \ + set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_test_and_set_bit(nr, sk) \ + test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_clear_bit(nr, sk) \ + clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + +#define UDP_MAX_SEGMENTS (1 << 7UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) +static inline int udp_set_peek_off(struct sock *sk, int val) +{ + sk_set_peek_off(sk, val); + WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0); + return 0; +} + static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_tx = val; + udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_rx = val; + udp_assign_bit(NO_CHECK6_RX, sk, val); } -static inline bool udp_get_no_check6_tx(struct sock *sk) +static inline bool udp_get_no_check6_tx(const struct sock *sk) { - return udp_sk(sk)->no_check6_tx; + return udp_test_bit(NO_CHECK6_TX, sk); } -static inline bool udp_get_no_check6_rx(struct sock *sk) +static inline bool udp_get_no_check6_rx(const struct sock *sk) { - return udp_sk(sk)->no_check6_rx; + return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, @@ -130,15 +150,45 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) return false; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + !udp_test_bit(ACCEPT_L4, sk)) return true; - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && + !udp_test_bit(ACCEPT_FRAGLIST, sk)) + return true; + + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) return true; return false; @@ -146,8 +196,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) static inline void udp_allow_gso(struct sock *sk) { - udp_sk(sk)->accept_udp_l4 = 1; - udp_sk(sk)->accept_udp_fraglist = 1; + udp_set_bit(ACCEPT_L4, sk); + udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index b0542cd11aeb..f85ec5613721 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -12,20 +12,12 @@ * to detect when we overlook these differences. * */ -#include <linux/types.h> +#include <linux/uidgid_types.h> #include <linux/highuid.h> struct user_namespace; extern struct user_namespace init_user_ns; - -typedef struct { - uid_t val; -} kuid_t; - - -typedef struct { - gid_t val; -} kgid_t; +struct uid_gid_map; #define KUIDT_INIT(value) (kuid_t){ value } #define KGIDT_INIT(value) (kgid_t){ value } @@ -138,6 +130,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) return from_kgid(ns, gid) != (gid_t) -1; } +u32 map_id_down(struct uid_gid_map *map, u32 id); +u32 map_id_up(struct uid_gid_map *map, u32 id); + #else static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid) @@ -186,6 +181,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) return gid_valid(gid); } +static inline u32 map_id_down(struct uid_gid_map *map, u32 id) +{ + return id; +} + +static inline u32 map_id_up(struct uid_gid_map *map, u32 id) +{ + return id; +} #endif /* CONFIG_USER_NS */ #endif /* _LINUX_UIDGID_H */ diff --git a/include/linux/uidgid_types.h b/include/linux/uidgid_types.h new file mode 100644 index 000000000000..b35ac4955a33 --- /dev/null +++ b/include/linux/uidgid_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UIDGID_TYPES_H +#define _LINUX_UIDGID_TYPES_H + +#include <linux/types.h> + +typedef struct { + uid_t val; +} kuid_t; + +typedef struct { + gid_t val; +} kgid_t; + +#endif /* _LINUX_UIDGID_TYPES_H */ diff --git a/include/linux/uio.h b/include/linux/uio.h index ff81e5ccaef2..00cebe2b70de 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -21,12 +21,12 @@ struct kvec { enum iter_type { /* iter types */ + ITER_UBUF, ITER_IOVEC, - ITER_KVEC, ITER_BVEC, + ITER_KVEC, ITER_XARRAY, ITER_DISCARD, - ITER_UBUF, }; #define ITER_SOURCE 1 // == WRITE @@ -40,14 +40,9 @@ struct iov_iter_state { struct iov_iter { u8 iter_type; - bool copy_mc; bool nofault; bool data_source; - bool user_backed; - union { - size_t iov_offset; - int last_offset; - }; + size_t iov_offset; /* * Hack alert: overlay ubuf_iovec with iovec + count, so * that the members resolve correctly regardless of the type @@ -143,7 +138,7 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) static inline bool user_backed_iter(const struct iov_iter *i) { - return i->user_backed; + return iter_is_ubuf(i) || iter_is_iovec(i); } /* @@ -163,7 +158,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } -size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, +size_t copy_page_from_iter_atomic(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); @@ -184,6 +179,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, { return copy_page_to_iter(&folio->page, offset, bytes, i); } + +static inline size_t copy_folio_from_iter_atomic(struct folio *folio, + size_t offset, size_t bytes, struct iov_iter *i) +{ + return copy_page_from_iter_atomic(&folio->page, offset, bytes, i); +} + size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); @@ -245,22 +247,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #ifdef CONFIG_ARCH_HAS_COPY_MC size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); -static inline void iov_iter_set_copy_mc(struct iov_iter *i) -{ - i->copy_mc = true; -} - -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return i->copy_mc; -} #else #define _copy_mc_to_iter _copy_to_iter -static inline void iov_iter_set_copy_mc(struct iov_iter *i) { } -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return false; -} #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); @@ -335,27 +323,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) return npages; } -struct csum_state { - __wsum csum; - size_t off; -}; - -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); - -static __always_inline __must_check -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) -{ - size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); - if (likely(copied == bytes)) - return true; - iov_iter_revert(i, copied); - return false; -} -size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, - struct iov_iter *i); - struct iovec *iovec_from_user(const struct iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_iov, bool compat); @@ -365,8 +332,6 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec, ssize_t __import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i, bool compat); -int import_single_range(int type, void __user *buf, size_t len, - struct iovec *iov, struct iov_iter *i); int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i); static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, @@ -375,8 +340,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_UBUF, - .copy_mc = false, - .user_backed = true, .data_source = direction, .ubuf = buf, .count = count, diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 47c5962b876b..18238dc8bfd3 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -28,19 +28,26 @@ struct uio_map; * logical, virtual, or physical & phys_addr_t * should always be large enough to handle any of * the address types) + * @dma_addr: DMA handle set by dma_alloc_coherent, used with + * UIO_MEM_DMA_COHERENT only (@addr should be the + * void * returned from the same dma_alloc_coherent call) * @offs: offset of device memory within the page * @size: size of IO (multiple of page size) * @memtype: type of memory addr points to * @internal_addr: ioremap-ped version of addr, for driver internal use + * @dma_device: device struct that was passed to dma_alloc_coherent, + * used with UIO_MEM_DMA_COHERENT only * @map: for use by the UIO core only. */ struct uio_mem { const char *name; phys_addr_t addr; + dma_addr_t dma_addr; unsigned long offs; resource_size_t size; int memtype; void __iomem *internal_addr; + struct device *dma_device; struct uio_map *map; }; @@ -158,6 +165,12 @@ extern int __must_check #define UIO_MEM_LOGICAL 2 #define UIO_MEM_VIRTUAL 3 #define UIO_MEM_IOVA 4 +/* + * UIO_MEM_DMA_COHERENT exists for legacy drivers that had been getting by with + * improperly mapping DMA coherent allocations through the other modes. + * Do not use in new drivers. + */ +#define UIO_MEM_DMA_COHERENT 5 /* defines for uio_port->porttype */ #define UIO_PORT_NONE 0 diff --git a/include/linux/units.h b/include/linux/units.h index 2793a41e73a2..00e15de33eca 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -2,6 +2,7 @@ #ifndef _LINUX_UNITS_H #define _LINUX_UNITS_H +#include <linux/bits.h> #include <linux/math.h> /* Metric prefixes in accordance with Système international (d'unités) */ @@ -23,14 +24,21 @@ #define NANOHZ_PER_HZ 1000000000UL #define MICROHZ_PER_HZ 1000000UL #define MILLIHZ_PER_HZ 1000UL + #define HZ_PER_KHZ 1000UL -#define KHZ_PER_MHZ 1000UL #define HZ_PER_MHZ 1000000UL +#define KHZ_PER_MHZ 1000UL +#define KHZ_PER_GHZ 1000000UL + #define MILLIWATT_PER_WATT 1000UL #define MICROWATT_PER_MILLIWATT 1000UL #define MICROWATT_PER_WATT 1000000UL +#define BYTES_PER_KBIT (KILO / BITS_PER_BYTE) +#define BYTES_PER_MBIT (MEGA / BITS_PER_BYTE) +#define BYTES_PER_GBIT (GIGA / BITS_PER_BYTE) + #define ABSOLUTE_ZERO_MILLICELSIUS -273150 static inline long milli_kelvin_to_millicelsius(long t) diff --git a/include/linux/usb.h b/include/linux/usb.h index 25f8e62a30ec..9e52179872a5 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -25,7 +25,6 @@ struct usb_device; struct usb_driver; -struct wusb_dev; /*-------------------------------------------------------------------------*/ @@ -425,7 +424,6 @@ struct usb_host_config { struct usb_host_bos { struct usb_bos_descriptor *desc; - /* wireless cap descriptor is handled by wusb */ struct usb_ext_cap_descriptor *ext_cap; struct usb_ss_cap_descriptor *ss_cap; struct usb_ssp_cap_descriptor *ssp_cap; @@ -612,7 +610,6 @@ struct usb3_lpm_parameters { * WUSB devices are not, until we authorize them from user space. * FIXME -- complete doc * @authenticated: Crypto authentication passed - * @wusb: device is Wireless USB * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -634,10 +631,7 @@ struct usb3_lpm_parameters { * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) - * @wusb_dev: if this is a Wireless USB device, link to the WUSB - * specific data for the device. * @slot_id: Slot ID assigned by xHCI - * @removable: Device can be physically removed from this port * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout. * @u2_params: exit latencies for USB3 U2 LPM state, and hub-initiated timeout. @@ -696,7 +690,6 @@ struct usb_device { unsigned have_langid:1; unsigned authorized:1; unsigned authenticated:1; - unsigned wusb:1; unsigned lpm_capable:1; unsigned lpm_devinit_allow:1; unsigned usb2_hw_lpm_capable:1; @@ -727,7 +720,6 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; - struct wusb_dev *wusb_dev; int slot_id; struct usb2_lpm_parameters l1_params; struct usb3_lpm_parameters u1_params; @@ -1152,16 +1144,6 @@ extern ssize_t usb_store_new_id(struct usb_dynids *dynids, extern ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf); /** - * struct usbdrv_wrap - wrapper for driver-model structure - * @driver: The driver-model core driver structure. - * @for_devices: Non-zero for device drivers, 0 for interface drivers. - */ -struct usbdrv_wrap { - struct device_driver driver; - int for_devices; -}; - -/** * struct usb_driver - identifies USB interface driver to usbcore * @name: The driver name should be unique among USB drivers, * and should normally be the same as the module name. @@ -1201,7 +1183,7 @@ struct usbdrv_wrap { * is bound to the driver. * @dynids: used internally to hold the list of dynamically added device * ids for this driver. - * @drvwrap: Driver-model core structure wrapper. + * @driver: The driver-model core driver structure. * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be * added to this driver by preventing the sysfs file from being created. * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend @@ -1249,13 +1231,13 @@ struct usb_driver { const struct attribute_group **dev_groups; struct usb_dynids dynids; - struct usbdrv_wrap drvwrap; + struct device_driver driver; unsigned int no_dynamic_id:1; unsigned int supports_autosuspend:1; unsigned int disable_hub_initiated_lpm:1; unsigned int soft_unbind:1; }; -#define to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver) +#define to_usb_driver(d) container_of(d, struct usb_driver, driver) /** * struct usb_device_driver - identifies USB device driver to usbcore @@ -1271,9 +1253,12 @@ struct usb_driver { * module is being unloaded. * @suspend: Called when the device is going to be suspended by the system. * @resume: Called when the device is being resumed by the system. + * @choose_configuration: If non-NULL, called instead of the default + * usb_choose_configuration(). If this returns an error then we'll go + * on to call the normal usb_choose_configuration(). * @dev_groups: Attributes attached to the device that will be created once it * is bound to the driver. - * @drvwrap: Driver-model core structure wrapper. + * @driver: The driver-model core driver structure. * @id_table: used with @match() to select better matching driver at * probe() time. * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend @@ -1282,7 +1267,7 @@ struct usb_driver { * resume and suspend functions will be called in addition to the driver's * own, so this part of the setup does not need to be replicated. * - * USB drivers must provide all the fields listed above except drvwrap, + * USB drivers must provide all the fields listed above except driver, * match, and id_table. */ struct usb_device_driver { @@ -1294,14 +1279,17 @@ struct usb_device_driver { int (*suspend) (struct usb_device *udev, pm_message_t message); int (*resume) (struct usb_device *udev, pm_message_t message); + + int (*choose_configuration) (struct usb_device *udev); + const struct attribute_group **dev_groups; - struct usbdrv_wrap drvwrap; + struct device_driver driver; const struct usb_device_id *id_table; unsigned int supports_autosuspend:1; unsigned int generic_subclass:1; }; #define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \ - drvwrap.driver) + driver) /** * struct usb_class_driver - identifies a USB driver that wants to use the USB major number @@ -1742,11 +1730,6 @@ static inline void usb_fill_bulk_urb(struct urb *urb, * encoding of the endpoint interval, and express polling intervals in * microframes (eight per millisecond) rather than in frames (one per * millisecond). - * - * Wireless USB also uses the logarithmic encoding, but specifies it in units of - * 128us instead of 125us. For Wireless USB devices, the interval is passed - * through to the host controller, rather than being translated into microframe - * units. */ static inline void usb_fill_int_urb(struct urb *urb, struct usb_device *dev, @@ -1835,22 +1818,6 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size, void usb_free_coherent(struct usb_device *dev, size_t size, void *addr, dma_addr_t dma); -#if 0 -struct urb *usb_buffer_map(struct urb *urb); -void usb_buffer_dmasync(struct urb *urb); -void usb_buffer_unmap(struct urb *urb); -#endif - -struct scatterlist; -int usb_buffer_map_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int nents); -#if 0 -void usb_buffer_dmasync_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); -#endif -void usb_buffer_unmap_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); - /*-------------------------------------------------------------------* * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index ca796dc1a984..6e5555610010 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -82,7 +82,7 @@ struct uac_clock_source_descriptor { #define UAC_CLOCK_SOURCE_TYPE_INT_PROG 0x3 #define UAC_CLOCK_SOURCE_SYNCED_TO_SOF (1 << 2) -/* 4.7.2.2 Clock Source Descriptor */ +/* 4.7.2.2 Clock Selector Descriptor */ struct uac_clock_selector_descriptor { __u8 bLength; @@ -91,7 +91,7 @@ struct uac_clock_selector_descriptor { __u8 bClockID; __u8 bNrInPins; __u8 baCSourceID[]; - /* bmControls and iClockSource omitted */ + /* bmControls and iClockSelector omitted */ } __attribute__((packed)); /* 4.7.2.3 Clock Multiplier Descriptor */ diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 969e7dba6358..c93b410b314a 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -3,7 +3,7 @@ * This file holds USB constants and structures that are needed for * USB device APIs. These are used by the USB device model, which is * defined in chapter 9 of the USB 2.0 specification and in the - * Wireless USB 1.0 (spread around). Linux has several APIs in C that + * Wireless USB 1.0 spec (now defunct). Linux has several APIs in C that * need these: * * - the host side Linux-USB kernel driver API; @@ -14,9 +14,6 @@ * act either as a USB host or as a USB device. That means the host and * device side APIs benefit from working well together. * - * There's also "Wireless USB", using low power short range radios for - * peripheral interconnection but otherwise building on the USB framework. - * * Note all descriptors are declared '__attribute__((packed))' so that: * * [a] they never get padded, either internally (USB spec writers diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index ee38835ed77c..5a7f96684ea2 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -63,6 +63,8 @@ struct ci_hdrc_platform_data { #define CI_HDRC_IMX_IS_HSIC BIT(14) #define CI_HDRC_PMQOS BIT(15) #define CI_HDRC_PHY_VBUS_CONTROL BIT(16) +#define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17) +#define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 07531c4f4350..af3cd2aae4bc 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -35,6 +35,14 @@ * are ready. The control transfer will then be kept from completing till * all the function drivers that requested for USB_GADGET_DELAYED_STAUS * invoke usb_composite_setup_continue(). + * + * NOTE: USB_GADGET_DELAYED_STATUS must not be used in UDC drivers: they + * must delay completing the status stage for 0-length control transfers + * regardless of the whether USB_GADGET_DELAYED_STATUS is returned from + * the gadget driver's setup() callback. + * Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS, + * which is a bug. These drivers must be fixed and USB_GADGET_DELAYED_STATUS + * must be contained within the composite framework. */ #define USB_GADGET_DELAYED_STATUS 0x7fff /* Impossibly large value */ @@ -450,29 +458,6 @@ static inline struct usb_composite_driver *to_cdriver( * * One of these devices is allocated and initialized before the * associated device driver's bind() is called. - * - * OPEN ISSUE: it appears that some WUSB devices will need to be - * built by combining a normal (wired) gadget with a wireless one. - * This revision of the gadget framework should probably try to make - * sure doing that won't hurt too much. - * - * One notion for how to handle Wireless USB devices involves: - * - * (a) a second gadget here, discovery mechanism TBD, but likely - * needing separate "register/unregister WUSB gadget" calls; - * (b) updates to usb_gadget to include flags "is it wireless", - * "is it wired", plus (presumably in a wrapper structure) - * bandgroup and PHY info; - * (c) presumably a wireless_ep wrapping a usb_ep, and reporting - * wireless-specific parameters like maxburst and maxsequence; - * (d) configurations that are specific to wireless links; - * (e) function drivers that understand wireless configs and will - * support wireless for (additional) function instances; - * (f) a function to support association setup (like CBAF), not - * necessarily requiring a wireless adapter; - * (g) composite device setup that can create one or more wireless - * configs, including appropriate association setup support; - * (h) more, TBD. */ struct usb_composite_dev { struct usb_gadget *gadget; diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 75bda0783395..56dda8e1562d 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -52,6 +52,7 @@ struct usb_ep; * @short_not_ok: When reading data, makes short packets be * treated as errors (queue stops advancing till cleanup). * @dma_mapped: Indicates if request has been mapped to DMA (internal) + * @sg_was_mapped: Set if the scatterlist has been mapped before the request * @complete: Function called when request completes, so this request and * its buffer may be re-used. The function will always be called with * interrupts disabled, and it must not sleep. @@ -111,6 +112,7 @@ struct usb_request { unsigned zero:1; unsigned short_not_ok:1; unsigned dma_mapped:1; + unsigned sg_was_mapped:1; void (*complete)(struct usb_ep *ep, struct usb_request *req); @@ -711,6 +713,15 @@ static inline int usb_gadget_check_config(struct usb_gadget *gadget) * get_interface. Setting a configuration (or interface) is where * endpoints should be activated or (config 0) shut down. * + * The gadget driver's setup() callback does not have to queue a response to + * ep0 within the setup() call, the driver can do it after setup() returns. + * The UDC driver must wait until such a response is queued before proceeding + * with the data/status stages of the control transfer. + * + * NOTE: Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS + * being returned from the setup() callback, which is a bug. See the comment + * next to USB_GADGET_DELAYED_STATUS for details. + * * (Note that only the default control endpoint is supported. Neither * hosts nor devices generally support control traffic except to ep0.) * diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 4e9623e8492b..ac95e7c89df5 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -55,7 +55,7 @@ struct giveback_urb_bh { bool high_prio; spinlock_t lock; struct list_head head; - struct tasklet_struct bh; + struct work_struct bh; struct usb_host_endpoint *completing_ep; }; @@ -154,7 +154,6 @@ struct usb_hcd { /* The next flag is a stopgap, to be removed when all the HCDs * support the new root-hub polling mechanism. */ unsigned uses_new_polling:1; - unsigned wireless:1; /* Wireless USB HCD */ unsigned has_tt:1; /* Integrated TT in root hub */ unsigned amd_resume_bug:1; /* AMD remote wakeup quirk */ unsigned can_do_streams:1; /* HC supports streams */ @@ -249,7 +248,6 @@ struct hc_driver { #define HCD_SHARED 0x0004 /* Two (or more) usb_hcds share HW */ #define HCD_USB11 0x0010 /* USB 1.1 */ #define HCD_USB2 0x0020 /* USB 2.0 */ -#define HCD_USB25 0x0030 /* Wireless USB 1.0 (USB 2.5)*/ #define HCD_USB3 0x0040 /* USB 3.0 */ #define HCD_USB31 0x0050 /* USB 3.1 */ #define HCD_USB32 0x0060 /* USB 3.2 */ @@ -374,8 +372,9 @@ struct hc_driver { * or bandwidth constraints. */ void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); - /* Returns the hardware-chosen device address */ - int (*address_device)(struct usb_hcd *, struct usb_device *udev); + /* Set the hardware-chosen device address */ + int (*address_device)(struct usb_hcd *, struct usb_device *udev, + unsigned int timeout_ms); /* prepares the hardware to send commands to the device */ int (*enable_device)(struct usb_hcd *, struct usb_device *udev); /* Notifies the HCD after a hub descriptor is fetched. @@ -486,8 +485,25 @@ extern int usb_hcd_pci_probe(struct pci_dev *dev, extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); +#ifdef CONFIG_USB_PCI_AMD extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + if (!usb_hcd_amd_remote_wakeup_quirk(dev)) + return false; + if (driver->flags & (HCD_USB11 | HCD_USB3)) + return true; + return false; +} +#else /* CONFIG_USB_PCI_AMD */ +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + return false; +} +#endif extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif /* CONFIG_USB_PCI */ diff --git a/include/linux/usb/ljca.h b/include/linux/usb/ljca.h new file mode 100644 index 000000000000..47661feda96c --- /dev/null +++ b/include/linux/usb/ljca.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023, Intel Corporation. All rights reserved. + */ +#ifndef _LINUX_USB_LJCA_H_ +#define _LINUX_USB_LJCA_H_ + +#include <linux/auxiliary_bus.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#define LJCA_MAX_GPIO_NUM 64 + +#define auxiliary_dev_to_ljca_client(auxiliary_dev) \ + container_of(auxiliary_dev, struct ljca_client, auxdev) + +struct ljca_adapter; + +/** + * typedef ljca_event_cb_t - event callback function signature + * + * @context: the execution context of who registered this callback + * @cmd: the command from device for this event + * @evt_data: the event data payload + * @len: the event data payload length + * + * The callback function is called in interrupt context and the data payload is + * only valid during the call. If the user needs later access of the data, it + * must copy it. + */ +typedef void (*ljca_event_cb_t)(void *context, u8 cmd, const void *evt_data, int len); + +/** + * struct ljca_client - represent a ljca client device + * + * @type: ljca client type + * @id: ljca client id within same client type + * @link: ljca client on the same ljca adapter + * @auxdev: auxiliary device object + * @adapter: ljca adapter the ljca client sit on + * @context: the execution context of the event callback + * @event_cb: ljca client driver register this callback to get + * firmware asynchronous rx buffer pending notifications + * @event_cb_lock: spinlock to protect event callback + */ +struct ljca_client { + u8 type; + u8 id; + struct list_head link; + struct auxiliary_device auxdev; + struct ljca_adapter *adapter; + + void *context; + ljca_event_cb_t event_cb; + /* lock to protect event_cb */ + spinlock_t event_cb_lock; +}; + +/** + * struct ljca_gpio_info - ljca gpio client device info + * + * @num: ljca gpio client device pin number + * @valid_pin_map: ljca gpio client device valid pin mapping + */ +struct ljca_gpio_info { + unsigned int num; + DECLARE_BITMAP(valid_pin_map, LJCA_MAX_GPIO_NUM); +}; + +/** + * struct ljca_i2c_info - ljca i2c client device info + * + * @id: ljca i2c client device identification number + * @capacity: ljca i2c client device capacity + * @intr_pin: ljca i2c client device interrupt pin number if exists + */ +struct ljca_i2c_info { + u8 id; + u8 capacity; + u8 intr_pin; +}; + +/** + * struct ljca_spi_info - ljca spi client device info + * + * @id: ljca spi client device identification number + * @capacity: ljca spi client device capacity + */ +struct ljca_spi_info { + u8 id; + u8 capacity; +}; + +/** + * ljca_register_event_cb - register a callback function to receive events + * + * @client: ljca client device + * @event_cb: callback function + * @context: execution context of event callback + * + * Return: 0 in case of success, negative value in case of error + */ +int ljca_register_event_cb(struct ljca_client *client, ljca_event_cb_t event_cb, void *context); + +/** + * ljca_unregister_event_cb - unregister the callback function for an event + * + * @client: ljca client device + */ +void ljca_unregister_event_cb(struct ljca_client *client); + +/** + * ljca_transfer - issue a LJCA command and wait for a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device; it should + * be 0 when obuf is NULL + * @ibuf: any data associated with the response will be copied here; it can be + * NULL if the user doesn't need the response data + * @ibuf_len: must be initialized to the input buffer size + * + * Return: the actual length of response data for success, negative value for errors + */ +int ljca_transfer(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len, u8 *ibuf, u8 ibuf_len); + +/** + * ljca_transfer_noack - issue a LJCA command without a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device + * + * Return: 0 for success, negative value for errors + */ +int ljca_transfer_noack(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len); + +#endif diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index 98487fd7ab11..de42f14bd280 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -6,6 +6,7 @@ #ifndef __LINUX_USB_OF_H #define __LINUX_USB_OF_H +#include <linux/usb.h> #include <linux/usb/ch9.h> #include <linux/usb/otg.h> #include <linux/usb/phy.h> @@ -17,6 +18,7 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0); bool of_usb_host_tpl_support(struct device_node *np); int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps); +enum usb_port_connect_type usb_of_get_connect_type(struct usb_device *hub, int port1); struct device_node *usb_of_get_device_node(struct usb_device *hub, int port1); bool usb_of_has_combined_node(struct usb_device *udev); struct device_node *usb_of_get_interface_node(struct usb_device *udev, @@ -37,6 +39,11 @@ static inline int of_usb_update_otg_caps(struct device_node *np, { return 0; } +static inline enum usb_port_connect_type +usb_of_get_connect_type(const struct usb_device *hub, int port1) +{ + return USB_PORT_CONNECT_TYPE_UNKNOWN; +} static inline struct device_node * usb_of_get_device_node(struct usb_device *hub, int port1) { diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index c59fb79a42e8..d50098fb16b5 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -228,6 +228,7 @@ enum pd_pdo_type { #define PDO_FIXED_UNCHUNK_EXT BIT(24) /* Unchunked Extended Message supported (Source) */ #define PDO_FIXED_FRS_CURR_MASK (BIT(24) | BIT(23)) /* FR_Swap Current (Sink) */ #define PDO_FIXED_FRS_CURR_SHIFT 23 +#define PDO_FIXED_PEAK_CURR_SHIFT 20 #define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */ #define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */ @@ -482,6 +483,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_BIST_CONT_MODE 50 /* 30 - 60 ms */ #define PD_T_SINK_TX 16 /* 16 - 20 ms */ #define PD_T_CHUNK_NOT_SUPP 42 /* 40 - 50 ms */ +#define PD_T_VCONN_STABLE 50 #define PD_T_DRP_TRY 100 /* 75 - 150 ms */ #define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index b057250704e8..5c48e8a81403 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -7,6 +7,7 @@ #define __LINUX_USB_PD_VDO_H #include "pd.h" +#include <linux/bitfield.h> /* * VDO : Vendor Defined Message Object @@ -86,12 +87,15 @@ * * Request is simply properly formatted SVDM header * - * Response is 4 data objects: + * Response is 4 data objects for Power Delivery 2.0 and Passive Cables for + * Power Delivery 3.0. Active Cables in Power Delivery 3.0 have 5 data objects. * [0] :: SVDM header * [1] :: Identitiy header * [2] :: Cert Stat VDO * [3] :: (Product | Cable) VDO + * [4] :: Cable VDO 1 * [4] :: AMA VDO + * [5] :: Cable VDO 2 * */ #define VDO_INDEX_HDR 0 @@ -100,6 +104,8 @@ #define VDO_INDEX_CABLE 3 #define VDO_INDEX_PRODUCT 3 #define VDO_INDEX_AMA 4 +#define VDO_INDEX_CABLE_1 4 +#define VDO_INDEX_CABLE_2 5 /* * SVDM Identity Header @@ -150,6 +156,7 @@ #define PD_IDH_MODAL_SUPP(vdo) ((vdo) & (1 << 26)) #define PD_IDH_DFP_PTYPE(vdo) (((vdo) >> 23) & 0x7) #define PD_IDH_CONN_TYPE(vdo) (((vdo) >> 21) & 0x3) +#define PD_IDH_HOST_SUPP(vdo) ((vdo) & (1 << 31)) /* * Cert Stat VDO @@ -182,7 +189,7 @@ * <5:3> :: Alternate modes * <2:0> :: USB highest speed */ -#define PD_VDO_UFP_DEVCAP(vdo) (((vdo) & GENMASK(27, 24)) >> 24) +#define PD_VDO_UFP_DEVCAP(vdo) FIELD_GET(GENMASK(27, 24), vdo) /* UFP VDO Version */ #define UFP_VDO_VER1_2 2 @@ -241,7 +248,7 @@ * <21:5> :: Reserved * <4:0> :: Port number */ -#define PD_VDO_DFP_HOSTCAP(vdo) (((vdo) & GENMASK(26, 24)) >> 24) +#define PD_VDO_DFP_HOSTCAP(vdo) FIELD_GET(GENMASK(26, 24), vdo) #define DFP_VDO_VER1_1 1 #define HOST_USB2_CAPABLE BIT(0) @@ -376,6 +383,7 @@ | ((vbm) & 0x3) << 9 | (sbu) << 8 | (sbut) << 7 | ((cur) & 0x3) << 5 \ | (vbt) << 4 | (sopp) << 3 | ((spd) & 0x7)) +#define VDO_TYPEC_CABLE_SPEED(vdo) ((vdo) & 0x7) #define VDO_TYPEC_CABLE_TYPE(vdo) (((vdo) >> 18) & 0x3) /* diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index eeb7c2157c72..59409c1fc3de 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -72,4 +72,7 @@ /* device has endpoints that should be ignored */ #define USB_QUIRK_ENDPOINT_IGNORE BIT(15) +/* short SET_ADDRESS request timeout */ +#define USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT BIT(16) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 20d88b1defc3..33a4c146dc19 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -29,6 +29,8 @@ #define VENDOR_ID_LINKSYS 0x13b1 #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 +#define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index d418c55523a7..372898d9eeb0 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -5,16 +5,6 @@ * Copyright (C) 2011 Renesas Solutions Corp. * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef RENESAS_USB_H #define RENESAS_USB_H diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 85e95a3251d3..47a86b8a4a50 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -36,7 +36,9 @@ #define TCPC_ALERT_MASK 0x12 #define TCPC_POWER_STATUS_MASK 0x14 -#define TCPC_FAULT_STATUS_MASK 0x15 + +#define TCPC_FAULT_STATUS_MASK 0x15 +#define TCPC_FAULT_STATUS_MASK_VCONN_OC BIT(1) #define TCPC_EXTENDED_STATUS_MASK 0x16 #define TCPC_EXTENDED_STATUS_MASK_VSAFE0V BIT(0) @@ -103,6 +105,8 @@ #define TCPC_POWER_STATUS_SINKING_VBUS BIT(0) #define TCPC_FAULT_STATUS 0x1f +#define TCPC_FAULT_STATUS_ALL_REG_RST_TO_DEFAULT BIT(7) +#define TCPC_FAULT_STATUS_VCONN_OC BIT(1) #define TCPC_ALERT_EXTENDED 0x21 @@ -141,6 +145,7 @@ #define TCPC_RX_BYTE_CNT 0x30 #define TCPC_RX_BUF_FRAME_TYPE 0x31 #define TCPC_RX_BUF_FRAME_TYPE_SOP 0 +#define TCPC_RX_BUF_FRAME_TYPE_SOP1 1 #define TCPC_RX_HDR 0x32 #define TCPC_RX_DATA 0x34 /* through 0x4f */ @@ -194,12 +199,23 @@ struct tcpci; * Chip level drivers are expected to check for contaminant and call * tcpm_clean_port when the port is clean to put the port back into * toggling state. + * @cable_comm_capable + * optional; Set when TCPC can communicate with cable plugs over SOP' + * @attempt_vconn_swap_discovery: + * Optional; The callback is called by the TCPM when the result of + * a Discover Identity request indicates that the port partner is + * a receptacle capable of modal operation. Chip level TCPCI drivers + * can implement their own policy to determine if and when a Vconn + * swap following Discover Identity on SOP' occurs. + * Return true when the TCPM is allowed to request a Vconn swap + * after Discovery Identity on SOP. */ struct tcpci_data { struct regmap *regmap; unsigned char TX_BUF_BYTE_x_hidden:1; unsigned char auto_discharge_disconnect:1; unsigned char vbus_vsafe0v:1; + unsigned char cable_comm_capable:1; int (*init)(struct tcpci *tcpci, struct tcpci_data *data); int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data, @@ -211,6 +227,7 @@ struct tcpci_data { void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data, bool capable); void (*check_contaminant)(struct tcpci *tcpci, struct tcpci_data *data); + bool (*attempt_vconn_swap_discovery)(struct tcpci *tcpci, struct tcpci_data *data); }; struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data); diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index ab7ca872950b..061da9546a81 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -119,6 +119,17 @@ enum tcpm_transmit_type { * at the end of the deboumce period or when the port is still * toggling. Chip level drivers are expected to check for contaminant * and call tcpm_clean_port when the port is clean. + * @cable_comm_capable + * Optional; Returns whether cable communication over SOP' is supported + * by the tcpc + * @attempt_vconn_swap_discovery: + * Optional; The callback is called by the TCPM when the result of + * a Discover Identity request indicates that the port partner is + * a receptacle capable of modal operation. Chip level TCPCI drivers + * can implement their own policy to determine if and when a Vconn + * swap following Discover Identity on SOP' occurs. + * Return true when the TCPM is allowed to request a Vconn swap + * after Discovery Identity on SOP. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -133,6 +144,8 @@ struct tcpc_dev { enum typec_cc_status *cc2); int (*set_polarity)(struct tcpc_dev *dev, enum typec_cc_polarity polarity); + int (*set_orientation)(struct tcpc_dev *dev, + enum typec_orientation orientation); int (*set_vconn)(struct tcpc_dev *dev, bool on); int (*set_vbus)(struct tcpc_dev *dev, bool on, bool charge); int (*set_current_limit)(struct tcpc_dev *dev, u32 max_ma, u32 mv); @@ -154,6 +167,8 @@ struct tcpc_dev { bool (*is_vbus_vsafe0v)(struct tcpc_dev *dev); void (*set_partner_usb_comm_capable)(struct tcpc_dev *dev, bool enable); void (*check_contaminant)(struct tcpc_dev *dev); + bool (*cable_comm_capable)(struct tcpc_dev *dev); + bool (*attempt_vconn_swap_discovery)(struct tcpc_dev *dev); }; struct tcpm_port; @@ -166,12 +181,14 @@ void tcpm_cc_change(struct tcpm_port *port); void tcpm_sink_frs(struct tcpm_port *port); void tcpm_sourcing_vbus(struct tcpm_port *port); void tcpm_pd_receive(struct tcpm_port *port, - const struct pd_message *msg); + const struct pd_message *msg, + enum tcpm_transmit_type rx_sop_type); void tcpm_pd_transmit_complete(struct tcpm_port *port, enum tcpm_transmit_status status); void tcpm_pd_hard_reset(struct tcpm_port *port); void tcpm_tcpc_reset(struct tcpm_port *port); void tcpm_port_clean(struct tcpm_port *port); bool tcpm_port_is_toggling(struct tcpm_port *port); +void tcpm_port_error_recovery(struct tcpm_port *port); #endif /* __LINUX_USB_TCPM_H */ diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 8fa781207970..b35b427561ab 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -18,6 +18,7 @@ struct typec_cable; struct typec_plug; struct typec_port; struct typec_altmode_ops; +struct typec_cable_ops; struct fwnode_handle; struct device; @@ -157,6 +158,9 @@ void typec_port_register_altmodes(struct typec_port *port, const struct typec_altmode_ops *ops, void *drvdata, struct typec_altmode **altmodes, size_t n); +void typec_port_register_cable_ops(struct typec_altmode **altmodes, int max_altmodes, + const struct typec_cable_ops *ops); + void typec_unregister_altmode(struct typec_altmode *altmode); struct typec_port *typec_altmode2port(struct typec_altmode *alt); @@ -202,6 +206,8 @@ struct typec_cable_desc { * @accessory: Audio, Debug or none. * @identity: Discover Identity command data * @pd_revision: USB Power Delivery Specification Revision if supported + * @attach: Notification about attached USB device + * @deattach: Notification about removed USB device * * Details about a partner that is attached to USB Type-C port. If @identity * member exists when partner is registered, a directory named "identity" is @@ -217,6 +223,9 @@ struct typec_partner_desc { enum typec_accessory accessory; struct usb_pd_identity *identity; u16 pd_revision; /* 0300H = "3.0" */ + + void (*attach)(struct typec_partner *partner, struct device *dev); + void (*deattach)(struct typec_partner *partner, struct device *dev); }; /** @@ -328,6 +337,9 @@ void typec_partner_set_svdm_version(struct typec_partner *partner, enum usb_pd_svdm_ver svdm_version); int typec_get_negotiated_svdm_version(struct typec_port *port); +int typec_get_cable_svdm_version(struct typec_port *port); +void typec_cable_set_svdm_version(struct typec_cable *cable, enum usb_pd_svdm_ver svdm_version); + struct usb_power_delivery *typec_partner_usb_power_delivery_register(struct typec_partner *partner, struct usb_power_delivery_desc *desc); @@ -335,4 +347,36 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_ int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); +/** + * struct typec_connector - Representation of Type-C port for external drivers + * @attach: notification about device removal + * @deattach: notification about device removal + * + * Drivers that control the USB and other ports (DisplayPorts, etc.), that are + * connected to the Type-C connectors, can use these callbacks to inform the + * Type-C connector class about connections and disconnections. That information + * can then be used by the typec-port drivers to power on or off parts that are + * needed or not needed - as an example, in USB mode if USB2 device is + * enumerated, USB3 components (retimers, phys, and what have you) do not need + * to be powered on. + * + * The attached (enumerated) devices will be liked with the typec-partner device. + */ +struct typec_connector { + void (*attach)(struct typec_connector *con, struct device *dev); + void (*deattach)(struct typec_connector *con, struct device *dev); +}; + +static inline void typec_attach(struct typec_connector *con, struct device *dev) +{ + if (con && con->attach) + con->attach(con, dev); +} + +static inline void typec_deattach(struct typec_connector *con, struct device *dev) +{ + if (con && con->deattach) + con->deattach(con, dev); +} + #endif /* __LINUX_USB_TYPEC_H */ diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h index 350d49012659..b3c0866ea70f 100644 --- a/include/linux/usb/typec_altmode.h +++ b/include/linux/usb/typec_altmode.h @@ -20,6 +20,7 @@ struct typec_altmode_ops; * @active: Tells has the mode been entered or not * @desc: Optional human readable description of the mode * @ops: Operations vector from the driver + * @cable_ops: Cable operations vector from the driver. */ struct typec_altmode { struct device dev; @@ -30,6 +31,7 @@ struct typec_altmode { char *desc; const struct typec_altmode_ops *ops; + const struct typec_cable_ops *cable_ops; }; #define to_typec_altmode(d) container_of(d, struct typec_altmode, dev) @@ -67,7 +69,7 @@ struct typec_altmode_ops { int typec_altmode_enter(struct typec_altmode *altmode, u32 *vdo); int typec_altmode_exit(struct typec_altmode *altmode); -void typec_altmode_attention(struct typec_altmode *altmode, u32 vdo); +int typec_altmode_attention(struct typec_altmode *altmode, u32 vdo); int typec_altmode_vdm(struct typec_altmode *altmode, const u32 header, const u32 *vdo, int count); int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf, @@ -75,6 +77,34 @@ int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf, const struct typec_altmode * typec_altmode_get_partner(struct typec_altmode *altmode); +/** + * struct typec_cable_ops - Cable alternate mode operations vector + * @enter: Operations to be executed with Enter Mode Command + * @exit: Operations to be executed with Exit Mode Command + * @vdm: Callback for SVID specific commands + */ +struct typec_cable_ops { + int (*enter)(struct typec_altmode *altmode, enum typec_plug_index sop, u32 *vdo); + int (*exit)(struct typec_altmode *altmode, enum typec_plug_index sop); + int (*vdm)(struct typec_altmode *altmode, enum typec_plug_index sop, + const u32 hdr, const u32 *vdo, int cnt); +}; + +int typec_cable_altmode_enter(struct typec_altmode *altmode, enum typec_plug_index sop, u32 *vdo); +int typec_cable_altmode_exit(struct typec_altmode *altmode, enum typec_plug_index sop); +int typec_cable_altmode_vdm(struct typec_altmode *altmode, enum typec_plug_index sop, + const u32 header, const u32 *vdo, int count); + +/** + * typec_altmode_get_cable_svdm_version - Get negotiated SVDM version for cable plug + * @altmode: Handle to the alternate mode + */ +static inline int +typec_altmode_get_cable_svdm_version(struct typec_altmode *altmode) +{ + return typec_get_cable_svdm_version(typec_altmode2port(altmode)); +} + /* * These are the connector states (USB, Safe and Alt Mode) defined in USB Type-C * Specification. SVID specific connector states are expected to follow and diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index 8d09c2f0a9b8..f2da264d9c14 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -3,6 +3,7 @@ #define __USB_TYPEC_DP_H #include <linux/usb/typec_altmode.h> +#include <linux/bitfield.h> #define USB_TYPEC_DP_SID 0xff01 /* USB IF has not assigned a Standard ID (SID) for VirtualLink, @@ -67,17 +68,26 @@ enum { #define DP_CAP_UFP_D 1 #define DP_CAP_DFP_D 2 #define DP_CAP_DFP_D_AND_UFP_D 3 -#define DP_CAP_DP_SIGNALING BIT(2) /* Always set */ -#define DP_CAP_GEN2 BIT(3) /* Reserved after v1.0b */ +#define DP_CAP_DP_SIGNALLING(_cap_) FIELD_GET(GENMASK(5, 2), _cap_) +#define DP_CAP_SIGNALLING_HBR3 1 +#define DP_CAP_SIGNALLING_UHBR10 2 +#define DP_CAP_SIGNALLING_UHBR20 3 #define DP_CAP_RECEPTACLE BIT(6) #define DP_CAP_USB BIT(7) -#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) -#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16) +#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) FIELD_GET(GENMASK(15, 8), _cap_) +#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) FIELD_GET(GENMASK(23, 16), _cap_) /* Get pin assignment taking plug & receptacle into consideration */ #define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) #define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) +#define DP_CAP_UHBR_13_5_SUPPORT BIT(26) +#define DP_CAP_CABLE_TYPE(_cap_) FIELD_GET(GENMASK(29, 28), _cap_) +#define DP_CAP_CABLE_TYPE_PASSIVE 0 +#define DP_CAP_CABLE_TYPE_RE_TIMER 1 +#define DP_CAP_CABLE_TYPE_RE_DRIVER 2 +#define DP_CAP_CABLE_TYPE_OPTICAL 3 +#define DP_CAP_DPAM_VERSION BIT(30) /* DisplayPort Status Update VDO bits */ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) @@ -97,13 +107,24 @@ enum { #define DP_CONF_CURRENTLY(_conf_) ((_conf_) & 3) #define DP_CONF_UFP_U_AS_DFP_D BIT(0) #define DP_CONF_UFP_U_AS_UFP_D BIT(1) -#define DP_CONF_SIGNALING_DP BIT(2) -#define DP_CONF_SIGNALING_GEN_2 BIT(3) /* Reserved after v1.0b */ +#define DP_CONF_SIGNALLING_MASK GENMASK(5, 2) +#define DP_CONF_SIGNALLING_SHIFT 2 +#define DP_CONF_SIGNALLING_HBR3 1 +#define DP_CONF_SIGNALLING_UHBR10 2 +#define DP_CONF_SIGNALLING_UHBR20 3 #define DP_CONF_PIN_ASSIGNEMENT_SHIFT 8 #define DP_CONF_PIN_ASSIGNEMENT_MASK GENMASK(15, 8) /* Helper for setting/getting the pin assignment value to the configuration */ #define DP_CONF_SET_PIN_ASSIGN(_a_) ((_a_) << 8) -#define DP_CONF_GET_PIN_ASSIGN(_conf_) (((_conf_) & GENMASK(15, 8)) >> 8) +#define DP_CONF_GET_PIN_ASSIGN(_conf_) FIELD_GET(GENMASK(15, 8), _conf_) +#define DP_CONF_UHBR13_5_SUPPORT BIT(26) +#define DP_CONF_CABLE_TYPE_MASK GENMASK(29, 28) +#define DP_CONF_CABLE_TYPE_SHIFT 28 +#define DP_CONF_CABLE_TYPE_PASSIVE 0 +#define DP_CONF_CABLE_TYPE_RE_TIMER 1 +#define DP_CONF_CABLE_TYPE_RE_DRIVER 2 +#define DP_CONF_CABLE_TYPE_OPTICAL 3 +#define DP_CONF_DPAM_VERSION BIT(30) #endif /* __USB_TYPEC_DP_H */ diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index 63dd44b72e0c..fa97d7e00f5c 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -3,6 +3,7 @@ #define __USB_TYPEC_TBT_H #include <linux/usb/typec_altmode.h> +#include <linux/bitfield.h> #define USB_TYPEC_VENDOR_INTEL 0x8087 /* Alias for convenience */ @@ -25,7 +26,7 @@ struct typec_thunderbolt_data { /* TBT3 Device Discover Mode VDO bits */ #define TBT_MODE BIT(0) -#define TBT_ADAPTER(_vdo_) (((_vdo_) & BIT(16)) >> 16) +#define TBT_ADAPTER(_vdo_) FIELD_GET(BIT(16), _vdo_) #define TBT_ADAPTER_LEGACY 0 #define TBT_ADAPTER_TBT3 1 #define TBT_INTEL_SPECIFIC_B0 BIT(26) @@ -35,17 +36,18 @@ struct typec_thunderbolt_data { #define TBT_SET_ADAPTER(a) (((a) & 1) << 16) /* TBT3 Cable Discover Mode VDO bits */ -#define TBT_CABLE_SPEED(_vdo_) (((_vdo_) & GENMASK(18, 16)) >> 16) +#define TBT_CABLE_SPEED(_vdo_) FIELD_GET(GENMASK(18, 16), _vdo_) #define TBT_CABLE_USB3_GEN1 1 #define TBT_CABLE_USB3_PASSIVE 2 #define TBT_CABLE_10_AND_20GBPS 3 -#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) \ - (((_vdo_) & GENMASK(20, 19)) >> 19) +#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) FIELD_GET(GENMASK(20, 19), _vdo_) + #define TBT_GEN3_NON_ROUNDED 0 #define TBT_GEN3_GEN4_ROUNDED_NON_ROUNDED 1 #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) +#define TBT_CABLE_ACTIVE_PASSIVE BIT(25) #define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16) #define TBT_SET_CABLE_ROUNDED(_g_) (((_g_) & GENMASK(1, 0)) << 19) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..6030a8235617 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -65,6 +65,10 @@ enum rlimit_type { UCOUNT_RLIMIT_COUNTS, }; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc; +#endif + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -102,6 +106,10 @@ struct user_namespace { struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; + +#if IS_ENABLED(CONFIG_BINFMT_MISC) + struct binfmt_misc *binfmt_misc; +#endif } __randomize_layout; struct ucounts { diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index ac7b0c96d351..05d59f74fc88 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -36,6 +36,52 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) +/* + * Start with fault_pending_wqh and fault_wqh so they're more likely + * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. + */ +struct userfaultfd_ctx { + /* waitqueue head for the pending (i.e. not read) userfaults */ + wait_queue_head_t fault_pending_wqh; + /* waitqueue head for the userfaults */ + wait_queue_head_t fault_wqh; + /* waitqueue head for the pseudo fd to wakeup poll/read */ + wait_queue_head_t fd_wqh; + /* waitqueue head for events */ + wait_queue_head_t event_wqh; + /* a refile sequence protected by fault_pending_wqh lock */ + seqcount_spinlock_t refile_seq; + /* pseudo fd refcounting */ + refcount_t refcount; + /* userfaultfd syscall flags */ + unsigned int flags; + /* features requested from the userspace */ + unsigned int features; + /* released */ + bool released; + /* + * Prevents userfaultfd operations (fill/move/wp) from happening while + * some non-cooperative event(s) is taking place. Increments are done + * in write-mode. Whereas, userfaultfd operations, which includes + * reading mmap_changing, is done under read-mode. + */ + struct rw_semaphore map_changing_lock; + /* memory mappings are changing because of non-cooperative event */ + atomic_t mmap_changing; + /* mm with one ore more vmas attached to this userfaultfd_ctx */ + struct mm_struct *mm; +}; + extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); /* A combined operation mode + behavior flags. */ @@ -46,6 +92,7 @@ enum mfill_atomic_mode { MFILL_ATOMIC_COPY, MFILL_ATOMIC_ZEROPAGE, MFILL_ATOMIC_CONTINUE, + MFILL_ATOMIC_POISON, NR_MFILL_ATOMIC_MODES, }; @@ -73,22 +120,31 @@ extern int mfill_atomic_install_pte(pmd_t *dst_pmd, unsigned long dst_addr, struct page *page, bool newly_allocated, uffd_flags_t flags); -extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start, +extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long src_start, unsigned long len, - atomic_t *mmap_changing, uffd_flags_t flags); -extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, + uffd_flags_t flags); +extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, unsigned long dst_start, - unsigned long len, - atomic_t *mmap_changing); -extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start, - unsigned long len, atomic_t *mmap_changing, - uffd_flags_t flags); -extern int mwriteprotect_range(struct mm_struct *dst_mm, - unsigned long start, unsigned long len, - bool enable_wp, atomic_t *mmap_changing); + unsigned long len); +extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, + unsigned long len, uffd_flags_t flags); +extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, + unsigned long len, uffd_flags_t flags); +extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, + unsigned long len, bool enable_wp); extern long uffd_wp_range(struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp); +/* move_pages */ +void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); +void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); +ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, + unsigned long src_start, unsigned long len, __u64 flags); +int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, + struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, + unsigned long dst_addr, unsigned long src_addr); + /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) @@ -157,11 +213,22 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) } static inline bool vma_can_userfault(struct vm_area_struct *vma, - unsigned long vm_flags) + unsigned long vm_flags, + bool wp_async) { + vm_flags &= __VM_UFFD_FLAGS; + if ((vm_flags & VM_UFFD_MINOR) && (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) return false; + + /* + * If wp async enabled, and WP is the only mode enabled, allow any + * memory type. + */ + if (wp_async && (vm_flags == VM_UFFD_WP)) + return true; + #ifndef CONFIG_PTE_MARKER_UFFD_WP /* * If user requested uffd-wp but not enabled pte markers for @@ -171,6 +238,8 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) return false; #endif + + /* By default, allow any of anon|shmem|hugetlb */ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || vma_is_shmem(vma); } @@ -193,6 +262,7 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); +extern bool userfaultfd_wp_async(struct vm_area_struct *vma); #else /* CONFIG_USERFAULTFD */ @@ -203,6 +273,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf, return VM_FAULT_SIGBUS; } +static inline long uffd_wp_range(struct vm_area_struct *vma, + unsigned long start, unsigned long len, + bool enable_wp) +{ + return false; +} + static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { @@ -293,6 +370,11 @@ static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) +{ + return false; +} + #endif /* CONFIG_USERFAULTFD */ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index db1b0eaef4eb..7977ca03ac7a 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -7,6 +7,7 @@ #include <linux/interrupt.h> #include <linux/vhost_iotlb.h> #include <linux/virtio_net.h> +#include <linux/virtio_blk.h> #include <linux/if_ether.h> /** @@ -195,6 +196,10 @@ struct vdpa_map_file { * @idx: virtqueue index * Returns int: irq number of a virtqueue, * negative number if no irq assigned. + * @get_vq_size: Get the size of a specific virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Return u16: the size of the virtqueue * @get_vq_align: Get the virtqueue align requirement * for the device * @vdev: vdpa device @@ -204,10 +209,23 @@ struct vdpa_map_file { * @vdev: vdpa device * @idx: virtqueue index * Returns u32: group id for this virtqueue + * @get_vq_desc_group: Get the group id for the descriptor table of + * a specific virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns u32: group id for the descriptor table + * portion of this virtqueue. Could be different + * than the one from @get_vq_group, in which case + * the access to the descriptor table can be + * confined to a separate asid, isolating from + * the virtqueue's buffer address access. * @get_device_features: Get virtio features supported by the device * @vdev: vdpa device * Returns the virtio features support by the * device + * @get_backend_features: Get parent-specific backend features (optional) + * Returns the vdpa features supported by the + * device. * @set_driver_features: Set virtio features supported by the driver * @vdev: vdpa device * @features: feature support by the driver @@ -239,6 +257,17 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) + * @compat_reset: Reset device with compatibility quirks to + * accommodate older userspace. Only needed by + * parent driver which used to have bogus reset + * behaviour, and has to maintain such behaviour + * for compatibility with older userspace. + * Historically compliant driver only has to + * implement .reset, Historically non-compliant + * driver should implement both. + * @vdev: vdpa device + * @flags: compatibility quirks for reset + * Returns integer: success (0) or error (< 0) * @suspend: Suspend the device (optional) * @vdev: vdpa device * Returns integer: success (0) or error (< 0) @@ -314,6 +343,15 @@ struct vdpa_map_file { * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) + * @reset_map: Reset device memory mapping to the default + * state (optional) + * Needed for devices that are using device + * specific DMA translation and prefer mapping + * to be decoupled from the virtio life cycle, + * i.e. device .reset op does not reset mapping + * @vdev: vdpa device + * @asid: address space identifier + * Returns integer: success (0) or error (< 0) * @get_vq_dma_dev: Get the dma device for a specific * virtqueue (optional) * @vdev: vdpa device @@ -353,11 +391,14 @@ struct vdpa_config_ops { (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx); + u16 (*get_vq_size)(struct vdpa_device *vdev, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); + u32 (*get_vq_desc_group)(struct vdpa_device *vdev, u16 idx); u64 (*get_device_features)(struct vdpa_device *vdev); + u64 (*get_backend_features)(const struct vdpa_device *vdev); int (*set_driver_features)(struct vdpa_device *vdev, u64 features); u64 (*get_driver_features)(struct vdpa_device *vdev); void (*set_config_cb)(struct vdpa_device *vdev, @@ -369,6 +410,8 @@ struct vdpa_config_ops { u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); + int (*compat_reset)(struct vdpa_device *vdev, u32 flags); +#define VDPA_RESET_F_CLEAN_MAP 1 int (*suspend)(struct vdpa_device *vdev); int (*resume)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); @@ -390,6 +433,7 @@ struct vdpa_config_ops { u64 iova, u64 size, u64 pa, u32 perm, void *opaque); int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid, u64 iova, u64 size); + int (*reset_map)(struct vdpa_device *vdev, unsigned int asid); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); @@ -481,14 +525,17 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) return vdev->dma_dev; } -static inline int vdpa_reset(struct vdpa_device *vdev) +static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags) { const struct vdpa_config_ops *ops = vdev->config; int ret; down_write(&vdev->cf_lock); vdev->features_valid = false; - ret = ops->reset(vdev); + if (ops->compat_reset && flags) + ret = ops->compat_reset(vdev, flags); + else + ret = ops->reset(vdev); up_write(&vdev->cf_lock); return ret; } diff --git a/include/linux/verification.h b/include/linux/verification.h index f34e50ebcf60..cb2d47f28091 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -8,6 +8,7 @@ #ifndef _LINUX_VERIFICATION_H #define _LINUX_VERIFICATION_H +#include <linux/errno.h> #include <linux/types.h> /* diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2c137ea94a3e..8b1a29820409 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -13,6 +13,7 @@ #include <linux/mm.h> #include <linux/workqueue.h> #include <linux/poll.h> +#include <linux/cdev.h> #include <uapi/linux/vfio.h> #include <linux/iova_bitmap.h> @@ -42,7 +43,11 @@ struct vfio_device { */ const struct vfio_migration_ops *mig_ops; const struct vfio_log_ops *log_ops; +#if IS_ENABLED(CONFIG_VFIO_GROUP) struct vfio_group *group; + struct list_head group_next; + struct list_head iommu_entry; +#endif struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; @@ -51,16 +56,25 @@ struct vfio_device { /* Members below here are private, not for driver use */ unsigned int index; struct device device; /* device.kref covers object life circle */ +#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) + struct cdev cdev; +#endif refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; - struct list_head group_next; - struct list_head iommu_entry; struct iommufd_access *iommufd_access; void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; - bool iommufd_attached; + u8 iommufd_attached:1; +#endif + u8 cdev_opened:1; +#ifdef CONFIG_DEBUG_FS + /* + * debug_root is a static property of the vfio_device + * which must be set prior to registering the vfio_device. + */ + struct dentry *debug_root; #endif }; @@ -73,7 +87,9 @@ struct vfio_device { * @bind_iommufd: Called when binding the device to an iommufd * @unbind_iommufd: Opposite of bind_iommufd * @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the - * bound iommufd. Undo in unbind_iommufd. + * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not + * called. + * @detach_ioas: Opposite of attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -97,6 +113,7 @@ struct vfio_device_ops { struct iommufd_ctx *ictx, u32 *out_device_id); void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); + void (*detach_ioas)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -114,15 +131,31 @@ struct vfio_device_ops { }; #if IS_ENABLED(CONFIG_IOMMUFD) +struct iommufd_ctx *vfio_iommufd_device_ictx(struct vfio_device *vdev); +int vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx); int vfio_iommufd_physical_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev); #else +static inline struct iommufd_ctx * +vfio_iommufd_device_ictx(struct vfio_device *vdev) +{ + return NULL; +} + +static inline int +vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + return VFIO_PCI_DEVID_NOT_OWNED; +} + #define vfio_iommufd_physical_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) @@ -130,6 +163,8 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_physical_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_detach_ioas \ + ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) @@ -137,8 +172,15 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_emulated_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_emulated_detach_ioas \ + ((void (*)(struct vfio_device *vdev)) NULL) #endif +static inline bool vfio_device_cdev_opened(struct vfio_device *device) +{ + return device->cdev_opened; +} + /** * struct vfio_migration_ops - VFIO bus device driver migration callbacks * @@ -239,20 +281,40 @@ void vfio_unregister_group_dev(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); +struct vfio_device * +vfio_find_device_in_devset(struct vfio_device_set *dev_set, + struct device *dev); int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, enum vfio_device_mig_state new_fsm, enum vfio_device_mig_state *next_fsm); +void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, + u32 req_nodes); + /* * External user API */ struct iommu_group *vfio_file_iommu_group(struct file *file); + +#if IS_ENABLED(CONFIG_VFIO_GROUP) bool vfio_file_is_group(struct file *file); +bool vfio_file_has_dev(struct file *file, struct vfio_device *device); +#else +static inline bool vfio_file_is_group(struct file *file) +{ + return false; +} + +static inline bool vfio_file_has_dev(struct file *file, struct vfio_device *device) +{ + return false; +} +#endif +bool vfio_file_is_valid(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); -bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) @@ -294,6 +356,7 @@ struct virqfd { wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; + struct work_struct flush_inject; struct virqfd **pvirqfd; }; @@ -301,5 +364,6 @@ int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), void *data, struct virqfd **pvirqfd, int fd); void vfio_virqfd_disable(struct virqfd **pvirqfd); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); #endif /* VFIO_H */ diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 562e8754869d..a2c8b8bba711 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -127,7 +127,35 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); +int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state); +ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, + void __iomem *io, char __user *buf, + loff_t off, size_t count, size_t x_start, + size_t x_end, bool iswrite); +bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, + loff_t reg_start, size_t reg_cnt, + loff_t *buf_offset, + size_t *intersect_count, + size_t *register_offset); +#define VFIO_IOWRITE_DECLATION(size) \ +int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ + bool test_mem, u##size val, void __iomem *io); + +VFIO_IOWRITE_DECLATION(8) +VFIO_IOWRITE_DECLATION(16) +VFIO_IOWRITE_DECLATION(32) +#ifdef iowrite64 +VFIO_IOWRITE_DECLATION(64) +#endif + +#define VFIO_IOREAD_DECLATION(size) \ +int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ + bool test_mem, u##size *val, void __iomem *io); + +VFIO_IOREAD_DECLATION(8) +VFIO_IOREAD_DECLATION(16) +VFIO_IOREAD_DECLATION(32) #endif /* VFIO_PCI_CORE_H */ diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index b4b9137f9792..97129a1bbb7d 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: MIT */ + /* * The VGA aribiter manages VGA space routing and VGA resource decode to * allow multiple VGA devices to be used in a system in a safe way. @@ -5,27 +7,6 @@ * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com> * (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS - * IN THE SOFTWARE. - * */ #ifndef LINUX_VGA_H @@ -96,7 +77,7 @@ static inline int vga_client_register(struct pci_dev *pdev, static inline int vga_get_interruptible(struct pci_dev *pdev, unsigned int rsrc) { - return vga_get(pdev, rsrc, 1); + return vga_get(pdev, rsrc, 1); } /** @@ -111,7 +92,7 @@ static inline int vga_get_interruptible(struct pci_dev *pdev, static inline int vga_get_uninterruptible(struct pci_dev *pdev, unsigned int rsrc) { - return vga_get(pdev, rsrc, 0); + return vga_get(pdev, rsrc, 0); } static inline void vga_client_unregister(struct pci_dev *pdev) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index de6041deee37..26c4325aa373 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <linux/gfp.h> +#include <linux/dma-mapping.h> /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -61,6 +62,8 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +struct device *virtqueue_dma_dev(struct virtqueue *vq); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); @@ -78,6 +81,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq); unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); +int virtqueue_set_dma_premapped(struct virtqueue *_vq); + bool virtqueue_poll(struct virtqueue *vq, unsigned); bool virtqueue_enable_cb_delayed(struct virtqueue *vq); @@ -95,6 +100,16 @@ dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); + +struct virtio_admin_cmd { + __le16 opcode; + __le16 group_type; + __le64 group_member_id; + struct scatterlist *data_sg; + struct scatterlist *result_sg; +}; /** * struct virtio_device - representation of a device using virtio @@ -155,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); /** * struct virtio_driver - operations for a virtio I/O driver - * @driver: underlying device driver (populate name and owner). + * @driver: underlying device driver (populate name). * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this driver. * @feature_table_size: number of entries in the feature table array. @@ -184,10 +199,8 @@ struct virtio_driver { void (*scan)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev); void (*config_changed)(struct virtio_device *dev); -#ifdef CONFIG_PM int (*freeze)(struct virtio_device *dev); int (*restore)(struct virtio_device *dev); -#endif }; static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) @@ -195,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) return container_of(drv, struct virtio_driver, driver); } -int register_virtio_driver(struct virtio_driver *drv); +/* use a macro to avoid include chaining to get THIS_MODULE */ +#define register_virtio_driver(drv) \ + __register_virtio_driver(drv, THIS_MODULE) +int __register_virtio_driver(struct virtio_driver *drv, struct module *owner); void unregister_virtio_driver(struct virtio_driver *drv); /* module_virtio_driver() - Helper macro for drivers that don't do @@ -206,4 +222,19 @@ void unregister_virtio_driver(struct virtio_driver *drv); #define module_virtio_driver(__virtio_driver) \ module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); +int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); + +bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); +void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); #endif /* _LINUX_VIRTIO_H */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 2b3438de2c4d..da9b271b54db 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -93,6 +93,8 @@ typedef void vq_callback_t(struct virtqueue *); * Returns 0 on success or error status * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. + * @create_avq: create admin virtqueue resource. + * @destroy_avq: destroy admin virtqueue resource. */ struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -120,6 +122,8 @@ struct virtio_config_ops { struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); int (*enable_vq_after_reset)(struct virtqueue *vq); + int (*create_avq)(struct virtio_device *vdev); + void (*destroy_avq)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h deleted file mode 100644 index d2e2785af602..000000000000 --- a/include/linux/virtio_console.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers: - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 - * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011 - */ -#ifndef _LINUX_VIRTIO_CONSOLE_H -#define _LINUX_VIRTIO_CONSOLE_H - -#include <uapi/linux/virtio_console.h> - -int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)); -#endif /* _LINUX_VIRTIO_CONSOLE_H */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 7b4dd69555e4..4dfa9b69ca8d 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,8 +3,10 @@ #define _LINUX_VIRTIO_NET_H #include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> #include <uapi/linux/tcp.h> -#include <uapi/linux/udp.h> #include <uapi/linux/virtio_net.h> static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) @@ -49,6 +51,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) { + unsigned int nh_min_len = sizeof(struct iphdr); unsigned int gso_type = 0; unsigned int thlen = 0; unsigned int p_off = 0; @@ -65,6 +68,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, gso_type = SKB_GSO_TCPV6; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); + nh_min_len = sizeof(struct ipv6hdr); break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; @@ -100,7 +104,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; - p_off = skb_transport_offset(skb) + thlen; + nh_min_len = max_t(u32, nh_min_len, skb_transport_offset(skb)); + p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } else { @@ -140,7 +145,7 @@ retry: skb_set_transport_header(skb, keys.control.thoff); } else if (gso_type) { - p_off = thlen; + p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } @@ -151,9 +156,22 @@ retry: unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); - /* UFO may not include transport header in gso_size. */ - if (gso_type & SKB_GSO_UDP) + switch (gso_type & ~SKB_GSO_TCP_ECN) { + case SKB_GSO_UDP: + /* UFO may not include transport header in gso_size. */ nh_off -= thlen; + break; + case SKB_GSO_UDP_L4: + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + if (skb->csum_offset != offsetof(struct udphdr, check)) + return -EINVAL; + if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; + } /* Kernel has a special handling for GSO_BY_FRAGS. */ if (gso_size == GSO_BY_FRAGS) diff --git a/include/linux/virtio_pci_admin.h b/include/linux/virtio_pci_admin.h new file mode 100644 index 000000000000..f4a100a0fe2e --- /dev/null +++ b/include/linux/virtio_pci_admin.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_PCI_ADMIN_H +#define _LINUX_VIRTIO_PCI_ADMIN_H + +#include <linux/types.h> +#include <linux/pci.h> + +#ifdef CONFIG_VIRTIO_PCI_ADMIN_LEGACY +bool virtio_pci_admin_has_legacy_io(struct pci_dev *pdev); +int virtio_pci_admin_legacy_common_io_write(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_common_io_read(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_device_io_write(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_device_io_read(struct pci_dev *pdev, u8 offset, + u8 size, u8 *buf); +int virtio_pci_admin_legacy_io_notify_info(struct pci_dev *pdev, + u8 req_bar_flags, u8 *bar, + u64 *bar_offset); +#endif + +#endif /* _LINUX_VIRTIO_PCI_ADMIN_H */ diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 067ac1d789bc..c0b1b1ca1163 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -5,44 +5,48 @@ #include <linux/pci.h> #include <linux/virtio_pci.h> -struct virtio_pci_modern_common_cfg { - struct virtio_pci_common_cfg cfg; - - __le16 queue_notify_data; /* read-write */ - __le16 queue_reset; /* read-write */ -}; - +/** + * struct virtio_pci_modern_device - info for modern PCI virtio + * @pci_dev: Ptr to the PCI device struct + * @common: Position of the common capability in the PCI config + * @device: Device-specific data (non-legacy mode) + * @notify_base: Base of vq notifications (non-legacy mode) + * @notify_pa: Physical base of vq notifications + * @isr: Where to read and clear interrupt + * @notify_len: So we can sanity-check accesses + * @device_len: So we can sanity-check accesses + * @notify_map_cap: Capability for when we need to map notifications per-vq + * @notify_offset_multiplier: Multiply queue_notify_off by this value + * (non-legacy mode). + * @modern_bars: Bitmask of BARs + * @id: Device and vendor id + * @device_id_check: Callback defined before vp_modern_probe() to be used to + * verify the PCI device is a vendor's expected device rather + * than the standard virtio PCI device + * Returns the found device id or ERRNO + * @dma_mask: Optional mask instead of the traditional DMA_BIT_MASK(64), + * for vendor devices with DMA space address limitations + */ struct virtio_pci_modern_device { struct pci_dev *pci_dev; struct virtio_pci_common_cfg __iomem *common; - /* Device-specific data (non-legacy mode) */ void __iomem *device; - /* Base of vq notifications (non-legacy mode). */ void __iomem *notify_base; - /* Physical base of vq notifications */ resource_size_t notify_pa; - /* Where to read and clear interrupt */ u8 __iomem *isr; - /* So we can sanity-check accesses. */ size_t notify_len; size_t device_len; + size_t common_len; - /* Capability for when we need to map notifications per-vq. */ int notify_map_cap; - /* Multiply queue_notify_off by this value. (non-legacy mode). */ u32 notify_offset_multiplier; - int modern_bars; - struct virtio_device_id id; - /* optional check for vendor virtio device, returns dev_id or -ERRNO */ int (*device_id_check)(struct pci_dev *pdev); - - /* optional mask for devices with limited DMA space */ u64 dma_mask; }; @@ -121,4 +125,6 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); +u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev); +u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev); #endif diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c58453699ee9..c82089dee0c8 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -12,6 +12,7 @@ struct virtio_vsock_skb_cb { bool reply; bool tap_delivered; + u32 offset; }; #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) @@ -159,6 +160,15 @@ struct virtio_transport { /* Takes ownership of the packet */ int (*send_pkt)(struct sk_buff *skb); + + /* Used in MSG_ZEROCOPY mode. Checks, that provided data + * (number of buffers) could be transmitted with zerocopy + * mode. If this callback is not implemented for the current + * transport - this means that this transport doesn't need + * extra checks and can perform zerocopy transmission by + * default. + */ + bool (*can_msgzerocopy)(int bufs_num); }; ssize_t @@ -246,4 +256,5 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); void virtio_transport_deliver_tap_pkt(struct sk_buff *skb); int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list); int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t read_actor); +int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val); #endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/linux/vlynq.h b/include/linux/vlynq.h deleted file mode 100644 index e9c0cd36c48a..000000000000 --- a/include/linux/vlynq.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org> - */ - -#ifndef __VLYNQ_H__ -#define __VLYNQ_H__ - -#include <linux/device.h> -#include <linux/types.h> - -struct module; - -#define VLYNQ_NUM_IRQS 32 - -struct vlynq_mapping { - u32 size; - u32 offset; -}; - -enum vlynq_divisor { - vlynq_div_auto = 0, - vlynq_ldiv1, - vlynq_ldiv2, - vlynq_ldiv3, - vlynq_ldiv4, - vlynq_ldiv5, - vlynq_ldiv6, - vlynq_ldiv7, - vlynq_ldiv8, - vlynq_rdiv1, - vlynq_rdiv2, - vlynq_rdiv3, - vlynq_rdiv4, - vlynq_rdiv5, - vlynq_rdiv6, - vlynq_rdiv7, - vlynq_rdiv8, - vlynq_div_external -}; - -struct vlynq_device_id { - u32 id; - enum vlynq_divisor divisor; - unsigned long driver_data; -}; - -struct vlynq_regs; -struct vlynq_device { - u32 id, dev_id; - int local_irq; - int remote_irq; - enum vlynq_divisor divisor; - u32 regs_start, regs_end; - u32 mem_start, mem_end; - u32 irq_start, irq_end; - int irq; - int enabled; - struct vlynq_regs *local; - struct vlynq_regs *remote; - struct device dev; -}; - -struct vlynq_driver { - char *name; - struct vlynq_device_id *id_table; - int (*probe)(struct vlynq_device *dev, struct vlynq_device_id *id); - void (*remove)(struct vlynq_device *dev); - struct device_driver driver; -}; - -struct plat_vlynq_ops { - int (*on)(struct vlynq_device *dev); - void (*off)(struct vlynq_device *dev); -}; - -static inline struct vlynq_driver *to_vlynq_driver(struct device_driver *drv) -{ - return container_of(drv, struct vlynq_driver, driver); -} - -static inline struct vlynq_device *to_vlynq_device(struct device *device) -{ - return container_of(device, struct vlynq_device, dev); -} - -extern struct bus_type vlynq_bus_type; - -extern int __vlynq_register_driver(struct vlynq_driver *driver, - struct module *owner); - -static inline int vlynq_register_driver(struct vlynq_driver *driver) -{ - return __vlynq_register_driver(driver, THIS_MODULE); -} - -static inline void *vlynq_get_drvdata(struct vlynq_device *dev) -{ - return dev_get_drvdata(&dev->dev); -} - -static inline void vlynq_set_drvdata(struct vlynq_device *dev, void *data) -{ - dev_set_drvdata(&dev->dev, data); -} - -static inline u32 vlynq_mem_start(struct vlynq_device *dev) -{ - return dev->mem_start; -} - -static inline u32 vlynq_mem_end(struct vlynq_device *dev) -{ - return dev->mem_end; -} - -static inline u32 vlynq_mem_len(struct vlynq_device *dev) -{ - return dev->mem_end - dev->mem_start + 1; -} - -static inline int vlynq_virq_to_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq; -} - -static inline int vlynq_irq_to_virq(struct vlynq_device *dev, int irq) -{ - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq - dev->irq_start; -} - -extern void vlynq_unregister_driver(struct vlynq_driver *driver); -extern int vlynq_enable_device(struct vlynq_device *dev); -extern void vlynq_disable_device(struct vlynq_device *dev); -extern int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_local_irq(struct vlynq_device *dev, int virq); -extern int vlynq_set_remote_irq(struct vlynq_device *dev, int virq); - -#endif /* __VLYNQ_H__ */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 8abfa1240040..747943bc8cc2 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,9 +41,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSTEAL_KHUGEPAGED, - PGDEMOTE_KSWAPD, - PGDEMOTE_DIRECT, - PGDEMOTE_KHUGEPAGED, PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_KHUGEPAGED, @@ -145,6 +142,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_ZSWAP ZSWPIN, ZSWPOUT, + ZSWPWB, #endif #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c720be70c8dd..98ea90e90439 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -35,6 +35,7 @@ struct iov_iter; /* in uio.h */ #else #define VM_DEFER_KMEMLEAK 0 #endif +#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */ /* bits [20..32] reserved for arch specific ioremap internals */ @@ -232,6 +233,10 @@ static inline bool is_vm_area_hugepages(const void *addr) } #ifdef CONFIG_MMU +int vm_area_map_pages(struct vm_struct *area, unsigned long start, + unsigned long end, struct page **pages); +void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, + unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); static inline void set_vm_flush_reset_perms(void *addr) { @@ -253,7 +258,6 @@ extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count); /* * Internals. Don't use.. */ -extern struct list_head vmap_area_list; extern __init void vm_area_add_early(struct vm_struct *vm); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h new file mode 100644 index 000000000000..e1dec1a6a749 --- /dev/null +++ b/include/linux/vmcore_info.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VMCORE_INFO_H +#define LINUX_VMCORE_INFO_H + +#include <linux/linkage.h> +#include <linux/elfcore.h> +#include <linux/elf.h> + +#define CRASH_CORE_NOTE_NAME "CORE" +#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) +#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) +#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) + +/* + * The per-cpu notes area is a list of notes terminated by a "NULL" + * note header. For kdump, the code in vmcore.c runs in the context + * of the second kernel to combine them into one note. + */ +#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + CRASH_CORE_NOTE_NAME_BYTES + \ + CRASH_CORE_NOTE_DESC_BYTES) + +#define VMCOREINFO_BYTES PAGE_SIZE +#define VMCOREINFO_NOTE_NAME "VMCOREINFO" +#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) +#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + VMCOREINFO_NOTE_NAME_BYTES + \ + VMCOREINFO_BYTES) + +typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; +/* Per cpu memory for storing cpu states in case of system crash. */ +extern note_buf_t __percpu *crash_notes; + +void crash_update_vmcoreinfo_safecopy(void *ptr); +void crash_save_vmcoreinfo(void); +void arch_crash_save_vmcoreinfo(void); +__printf(1, 2) +void vmcoreinfo_append_str(const char *fmt, ...); +phys_addr_t paddr_vmcoreinfo_note(void); + +#define VMCOREINFO_OSRELEASE(value) \ + vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_BUILD_ID() \ + ({ \ + static_assert(sizeof(vmlinux_build_id) == 20); \ + vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ + }) + +#define VMCOREINFO_PAGESIZE(value) \ + vmcoreinfo_append_str("PAGESIZE=%ld\n", value) +#define VMCOREINFO_SYMBOL(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SYMBOL_ARRAY(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) +#define VMCOREINFO_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(name)) +#define VMCOREINFO_STRUCT_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(struct name)) +#define VMCOREINFO_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(struct name, field)) +#define VMCOREINFO_TYPE_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(name, field)) +#define VMCOREINFO_LENGTH(name, value) \ + vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) +#define VMCOREINFO_NUMBER(name) \ + vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) +#define VMCOREINFO_CONFIG(name) \ + vmcoreinfo_append_str("CONFIG_%s=y\n", #name) + +extern unsigned char *vmcoreinfo_data; +extern size_t vmcoreinfo_size; +extern u32 *vmcoreinfo_note; + +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len); +void final_note(Elf_Word *buf); +#endif /* LINUX_VMCORE_INFO_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index fed855bae6d8..343906a98d6e 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -556,19 +556,25 @@ static inline void mod_lruvec_state(struct lruvec *lruvec, local_irq_restore(flags); } -void __mod_lruvec_page_state(struct page *page, +void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val); -static inline void mod_lruvec_page_state(struct page *page, +static inline void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { unsigned long flags; local_irq_save(flags); - __mod_lruvec_page_state(page, idx, val); + __lruvec_stat_mod_folio(folio, idx, val); local_irq_restore(flags); } +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + lruvec_stat_mod_folio(page_folio(page), idx, val); +} + #else static inline void __mod_lruvec_state(struct lruvec *lruvec, @@ -583,37 +589,25 @@ static inline void mod_lruvec_state(struct lruvec *lruvec, mod_node_page_state(lruvec_pgdat(lruvec), idx, val); } -static inline void __mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(page_pgdat(page), idx, val); -} - -static inline void mod_lruvec_page_state(struct page *page, +static inline void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { - mod_node_page_state(page_pgdat(page), idx, val); + __mod_node_page_state(folio_pgdat(folio), idx, val); } -#endif /* CONFIG_MEMCG */ - -static inline void __inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) +static inline void lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) { - __mod_lruvec_page_state(page, idx, 1); + mod_node_page_state(folio_pgdat(folio), idx, val); } -static inline void __dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) { - __mod_lruvec_page_state(page, idx, -1); + mod_node_page_state(page_pgdat(page), idx, val); } -static inline void __lruvec_stat_mod_folio(struct folio *folio, - enum node_stat_item idx, int val) -{ - __mod_lruvec_page_state(&folio->page, idx, val); -} +#endif /* CONFIG_MEMCG */ static inline void __lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) @@ -627,24 +621,6 @@ static inline void __lruvec_stat_sub_folio(struct folio *folio, __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -static inline void inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, 1); -} - -static inline void dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, -1); -} - -static inline void lruvec_stat_mod_folio(struct folio *folio, - enum node_stat_item idx, int val) -{ - mod_lruvec_page_state(&folio->page, idx, val); -} - static inline void lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) { diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index c1f5aebef170..d008c3d0a9bb 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -25,7 +25,8 @@ extern int fg_console, last_console, want_console; int vc_allocate(unsigned int console); int vc_cons_allocated(unsigned int console); -int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines); +int __vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines, + bool from_user); struct vc_data *vc_deallocate(unsigned int console); void reset_palette(struct vc_data *vc); void do_blank_screen(int entering_gfx); @@ -42,6 +43,12 @@ void redraw_screen(struct vc_data *vc, int is_switch); #define update_screen(x) redraw_screen(x, 0) #define switch_screen(x) redraw_screen(x, 1) +static inline int vc_resize(struct vc_data *vc, unsigned int cols, + unsigned int lines) +{ + return __vc_resize(vc, cols, lines, false); +} + struct tty_struct; int tioclinux(struct tty_struct *tty, unsigned long arg); @@ -168,7 +175,4 @@ void vt_set_led_state(unsigned int console, int leds); void vt_kbd_con_start(unsigned int console); void vt_kbd_con_stop(unsigned int console); -void vc_scrolldelta_helper(struct vc_data *c, int lines, - unsigned int rolled_over, void *_base, unsigned int size); - #endif /* _VT_KERN_H */ diff --git a/include/linux/w1-gpio.h b/include/linux/w1-gpio.h deleted file mode 100644 index 3495fd0dc790..000000000000 --- a/include/linux/w1-gpio.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * w1-gpio interface to platform code - * - * Copyright (C) 2007 Ville Syrjala <syrjala@sci.fi> - */ -#ifndef _LINUX_W1_GPIO_H -#define _LINUX_W1_GPIO_H - -struct gpio_desc; - -/** - * struct w1_gpio_platform_data - Platform-dependent data for w1-gpio - */ -struct w1_gpio_platform_data { - struct gpio_desc *gpiod; - struct gpio_desc *pullup_gpiod; - void (*enable_external_pullup)(int enable); - unsigned int pullup_duration; -}; - -#endif /* _LINUX_W1_GPIO_H */ diff --git a/include/linux/wait.h b/include/linux/wait.h index a0307b516b09..8aa3372f21a0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -9,7 +9,6 @@ #include <linux/spinlock.h> #include <asm/current.h> -#include <uapi/linux/wait.h> typedef struct wait_queue_entry wait_queue_entry_t; @@ -19,10 +18,9 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int /* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 -#define WQ_FLAG_BOOKMARK 0x04 -#define WQ_FLAG_CUSTOM 0x08 -#define WQ_FLAG_DONE 0x10 -#define WQ_FLAG_PRIORITY 0x20 +#define WQ_FLAG_CUSTOM 0x04 +#define WQ_FLAG_DONE 0x08 +#define WQ_FLAG_PRIORITY 0x10 /* * A single wait-queue entry structure: @@ -210,9 +208,8 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq } int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); -void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, - unsigned int mode, void *key, wait_queue_entry_t *bookmark); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); @@ -237,6 +234,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head); #define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m)) #define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, poll_to_key(m)) +#define wake_up_poll_on_current_cpu(x, m) \ + __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m)) #define wake_up_locked_poll(x, m) \ __wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m)) #define wake_up_interruptible_poll(x, m) \ diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index 45cd42f55d49..429c7b6afead 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -32,7 +32,7 @@ struct watch_filter { DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ - struct watch_type_filter filters[]; + struct watch_type_filter filters[] __counted_by(nr_filters); }; struct watch_queue { diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h index 4ca2842d2842..6a5bb052fcc2 100644 --- a/include/linux/win_minmax.h +++ b/include/linux/win_minmax.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** - * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. +/* + * win_minmax.h: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 763bd382cf2d..63cca3b58d6d 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -11,7 +11,6 @@ #include <linux/device.h> #include <linux/acpi.h> #include <linux/mod_devicetable.h> -#include <uapi/linux/wmi.h> /** * struct wmi_device - WMI device structure @@ -22,11 +21,17 @@ */ struct wmi_device { struct device dev; - - /* private: used by the WMI driver core */ bool setable; }; +/** + * to_wmi_device() - Helper macro to cast a device to a wmi_device + * @device: device struct + * + * Cast a struct device to a struct wmi_device. + */ +#define to_wmi_device(device) container_of(device, struct wmi_device, dev) + extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, u8 instance, u32 method_id, const struct acpi_buffer *in, @@ -35,34 +40,31 @@ extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, extern union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance); -u8 wmidev_instance_count(struct wmi_device *wdev); +acpi_status wmidev_block_set(struct wmi_device *wdev, u8 instance, const struct acpi_buffer *in); -extern int set_required_buffer_size(struct wmi_device *wdev, u64 length); +u8 wmidev_instance_count(struct wmi_device *wdev); /** * struct wmi_driver - WMI driver structure * @driver: Driver model structure * @id_table: List of WMI GUIDs supported by this driver - * @no_notify_data: WMI events provide no event data + * @no_notify_data: Driver supports WMI events which provide no event data + * @no_singleton: Driver can be instantiated multiple times * @probe: Callback for device binding * @remove: Callback for device unbinding * @notify: Callback for receiving WMI events - * @filter_callback: Callback for filtering device IOCTLs * * This represents WMI drivers which handle WMI devices. - * @filter_callback is only necessary for drivers which - * want to set up a WMI IOCTL interface. */ struct wmi_driver { struct device_driver driver; const struct wmi_device_id *id_table; bool no_notify_data; + bool no_singleton; int (*probe)(struct wmi_device *wdev, const void *context); void (*remove)(struct wmi_device *wdev); void (*notify)(struct wmi_device *device, union acpi_object *data); - long (*filter_callback)(struct wmi_device *wdev, unsigned int cmd, - struct wmi_ioctl_buffer *arg); }; extern int __must_check __wmi_driver_register(struct wmi_driver *driver, diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h new file mode 100644 index 000000000000..f6f8f83b15b0 --- /dev/null +++ b/include/linux/wordpart.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_WORDPART_H +#define _LINUX_WORDPART_H + +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) + +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) + +/** + * upper_16_bits - return bits 16-31 of a number + * @n: the number we're accessing + */ +#define upper_16_bits(n) ((u16)((n) >> 16)) + +/** + * lower_16_bits - return bits 0-15 of a number + * @n: the number we're accessing + */ +#define lower_16_bits(n) ((u16)((n) & 0xffff)) + +/** + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +#endif // _LINUX_WORDPART_H diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 683efe29fa69..158784dd189a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -14,12 +14,7 @@ #include <linux/atomic.h> #include <linux/cpumask.h> #include <linux/rcupdate.h> - -struct workqueue_struct; - -struct work_struct; -typedef void (*work_func_t)(struct work_struct *work); -void delayed_work_timer_fn(struct timer_list *t); +#include <linux/workqueue_types.h> /* * The first word is the work queue pointer and the flags rolled into @@ -27,20 +22,54 @@ void delayed_work_timer_fn(struct timer_list *t); */ #define work_data_bits(work) ((unsigned long *)(&(work)->data)) -enum { +enum work_bits { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */ - WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */ - WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ + WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */ + WORK_STRUCT_PWQ_BIT, /* data points to pwq */ + WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ -#else - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */ #endif + WORK_STRUCT_FLAG_BITS, + /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_STRUCT_COLOR_BITS = 4, + /* + * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/ + * debugobjects turned off. This makes pwqs aligned to 256 bytes (512 + * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors. + * + * MSB + * [ pwq pointer ] [ flush color ] [ STRUCT flags ] + * 4 bits 4 or 5 bits + */ + WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, + + /* + * data contains off-queue information when !WORK_STRUCT_PWQ. + * + * MSB + * [ pool ID ] [ OFFQ flags ] [ STRUCT flags ] + * 1 bit 4 or 5 bits + */ + WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, + WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT, + WORK_OFFQ_FLAG_END, + WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, + + /* + * When a work item is off queue, the high bits encode off-queue flags + * and the last pool it was on. Cap pool ID to 31 bits and use the + * highest number to indicate that no pool is associated. + */ + WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, + WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, +}; + +enum work_flags { WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT, WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, @@ -50,35 +79,14 @@ enum { #else WORK_STRUCT_STATIC = 0, #endif +}; +enum wq_misc_consts { WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS), /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, - /* - * Reserve 8 bits off of pwq pointer w/ debugobjects turned off. - * This makes pwqs aligned to 256 bytes and allows 16 workqueue - * flush colors. - */ - WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + - WORK_STRUCT_COLOR_BITS, - - /* data contains off-queue information when !WORK_STRUCT_PWQ */ - WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, - - __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, - - /* - * When a work item is off queue, its high bits point to the last - * pool it was on. Cap at 31 bits and use the highest number to - * indicate that no pool is associated. - */ - WORK_OFFQ_FLAG_BITS = 1, - WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, - WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, - WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, - /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, @@ -88,21 +96,10 @@ enum { }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ -#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING) +#define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) - -#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1) -#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) - -struct work_struct { - atomic_long_t data; - struct list_head entry; - work_func_t func; -#ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; -#endif -}; +#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) #define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ @@ -125,6 +122,17 @@ struct rcu_work { struct workqueue_struct *wq; }; +enum wq_affn_scope { + WQ_AFFN_DFL, /* use system default */ + WQ_AFFN_CPU, /* one pod per CPU */ + WQ_AFFN_SMT, /* one pod poer SMT */ + WQ_AFFN_CACHE, /* one pod per LLC */ + WQ_AFFN_NUMA, /* one pod per NUMA node */ + WQ_AFFN_SYSTEM, /* one pod across the whole system */ + + WQ_AFFN_NR_TYPES, +}; + /** * struct workqueue_attrs - A struct for workqueue attributes. * @@ -138,17 +146,58 @@ struct workqueue_attrs { /** * @cpumask: allowed CPUs + * + * Work items in this workqueue are affine to these CPUs and not allowed + * to execute on other CPUs. A pool serving a workqueue must have the + * same @cpumask. */ cpumask_var_t cpumask; /** - * @no_numa: disable NUMA affinity + * @__pod_cpumask: internal attribute used to create per-pod pools + * + * Internal use only. + * + * Per-pod unbound worker pools are used to improve locality. Always a + * subset of ->cpumask. A workqueue can be associated with multiple + * worker pools with disjoint @__pod_cpumask's. Whether the enforcement + * of a pool's @__pod_cpumask is strict depends on @affn_strict. + */ + cpumask_var_t __pod_cpumask; + + /** + * @affn_strict: affinity scope is strict + * + * If clear, workqueue will make a best-effort attempt at starting the + * worker inside @__pod_cpumask but the scheduler is free to migrate it + * outside. + * + * If set, workers are only allowed to run inside @__pod_cpumask. + */ + bool affn_strict; + + /* + * Below fields aren't properties of a worker_pool. They only modify how + * :c:func:`apply_workqueue_attrs` select pools and thus don't + * participate in pool hash calculations or equality comparisons. + */ + + /** + * @affn_scope: unbound CPU affinity scope * - * Unlike other fields, ``no_numa`` isn't a property of a worker_pool. It - * only modifies how :c:func:`apply_workqueue_attrs` select pools and thus - * doesn't participate in pool hash calculations or equality comparisons. + * CPU pods are used to improve execution locality of unbound work + * items. There are multiple pod types, one for each wq_affn_scope, and + * every CPU in the system belongs to one pod in every pod type. CPUs + * that belong to the same pod share the worker pool. For example, + * selecting %WQ_AFFN_NUMA makes the workqueue use a separate worker + * pool for each NUMA node. + */ + enum wq_affn_scope affn_scope; + + /** + * @ordered: work items must be executed one by one in queueing order */ - bool no_numa; + bool ordered; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) @@ -222,18 +271,16 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * to generate better code. */ #ifdef CONFIG_LOCKDEP -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ - static struct lock_class_key __key; \ - \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ - lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \ + lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #else -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ @@ -242,12 +289,22 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } } while (0) #endif +#define __INIT_WORK(_work, _func, _onstack) \ + do { \ + static __maybe_unused struct lock_class_key __key; \ + \ + __INIT_WORK_KEY(_work, _func, _onstack, &__key); \ + } while (0) + #define INIT_WORK(_work, _func) \ __INIT_WORK((_work), (_func), 0) #define INIT_WORK_ONSTACK(_work, _func) \ __INIT_WORK((_work), (_func), 1) +#define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \ + __INIT_WORK_KEY((_work), (_func), 1, _key) + #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ @@ -301,7 +358,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * Workqueue flags and constants. For details, please refer to * Documentation/core-api/workqueue.rst. */ -enum { +enum wq_flags { + WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ @@ -340,16 +398,23 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ - __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ + /* BH wq only allows the following flags */ + __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI, +}; + +enum wq_consts { WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ - WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ + WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, -}; -/* unbound wq's aren't per-cpu, scale max_active according to #cpus */ -#define WQ_UNBOUND_MAX_ACTIVE \ - max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU) + /* + * Per-node default cap on min_active. Unless explicitly set, min_active + * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see + * workqueue_struct->min_active definition. + */ + WQ_DFL_MIN_ACTIVE = 8, +}; /* * System-wide workqueues which are always present. @@ -378,6 +443,9 @@ enum { * they are same as their non-power-efficient counterparts - e.g. * system_power_efficient_wq is identical to system_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. + * + * system_bh[_highpri]_wq are convenience interface to softirq. BH work items + * are executed in the queueing CPU's BH context in the queueing order. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_highpri_wq; @@ -386,6 +454,11 @@ extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; +extern struct workqueue_struct *system_bh_wq; +extern struct workqueue_struct *system_bh_highpri_wq; + +void workqueue_softirq_action(bool highpri); +void workqueue_softirq_dead(unsigned int cpu); /** * alloc_workqueue - allocate a workqueue @@ -394,8 +467,30 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq; * @max_active: max in-flight work items, 0 for default * remaining args: args for @fmt * - * Allocate a workqueue with the specified parameters. For detailed - * information on WQ_* flags, please refer to + * For a per-cpu workqueue, @max_active limits the number of in-flight work + * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be + * executing at most one work item for the workqueue. + * + * For unbound workqueues, @max_active limits the number of in-flight work items + * for the whole system. e.g. @max_active of 16 indicates that that there can be + * at most 16 work items executing for the workqueue in the whole system. + * + * As sharing the same active counter for an unbound workqueue across multiple + * NUMA nodes can be expensive, @max_active is distributed to each NUMA node + * according to the proportion of the number of online CPUs and enforced + * independently. + * + * Depending on online CPU distribution, a node may end up with per-node + * max_active which is significantly lower than @max_active, which can lead to + * deadlocks if the per-node concurrency limit is lower than the maximum number + * of interdependent work items for the workqueue. + * + * To guarantee forward progress regardless of online CPU distribution, the + * concurrency limit on every node is guaranteed to be equal to or greater than + * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means + * that the sum of per-node max_active's may be larger than @max_active. + * + * For detailed information on %WQ_* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: @@ -418,8 +513,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ - __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) @@ -429,13 +523,16 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); #define create_singlethread_workqueue(name) \ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) +#define from_work(var, callback_work, work_fieldname) \ + container_of(callback_work, typeof(*var), work_fieldname) + extern void destroy_workqueue(struct workqueue_struct *wq); struct workqueue_attrs *alloc_workqueue_attrs(void); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); -int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); +extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); @@ -466,6 +563,8 @@ extern bool flush_rcu_work(struct rcu_work *rwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); +extern void workqueue_set_min_active(struct workqueue_struct *wq, + int min_active); extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); @@ -569,6 +668,7 @@ static inline bool schedule_work(struct work_struct *work) /* * Detect attempt to flush system-wide workqueues at compile time when possible. + * Warn attempt to flush system-wide workqueues at runtime. * * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp * for reasons and steps for converting system-wide workqueues into local workqueues. @@ -576,52 +676,13 @@ static inline bool schedule_work(struct work_struct *work) extern void __warn_flushing_systemwide_wq(void) __compiletime_warning("Please avoid flushing system-wide workqueues."); -/** - * flush_scheduled_work - ensure that any scheduled work has run to completion. - * - * Forces execution of the kernel-global workqueue and blocks until its - * completion. - * - * It's very easy to get into trouble if you don't take great care. - * Either of the following situations will lead to deadlock: - * - * One of the work items currently on the workqueue needs to acquire - * a lock held by your code or its caller. - * - * Your code is running in the context of a work routine. - * - * They will be detected by lockdep when they occur, but the first might not - * occur very often. It depends on what work items are on the workqueue and - * what locks they need, which you have no control over. - * - * In most situations flushing the entire workqueue is overkill; you merely - * need to know that a particular work item isn't queued and isn't running. - * In such cases you should use cancel_delayed_work_sync() or - * cancel_work_sync() instead. - * - * Please stop calling this function! A conversion to stop flushing system-wide - * workqueues is in progress. This function will be removed after all in-tree - * users stopped calling this function. - */ -/* - * The background of commit 771c035372a036f8 ("deprecate the - * '__deprecated' attribute warnings entirely and for good") is that, - * since Linus builds all modules between every single pull he does, - * the standard kernel build needs to be _clean_ in order to be able to - * notice when new problems happen. Therefore, don't emit warning while - * there are in-tree users. - */ +/* Please stop using this function, for this function will be removed in near future. */ #define flush_scheduled_work() \ ({ \ - if (0) \ - __warn_flushing_systemwide_wq(); \ + __warn_flushing_systemwide_wq(); \ __flush_workqueue(system_wq); \ }) -/* - * Although there is no longer in-tree caller, for now just emit warning - * in order to give out-of-tree callers time to update. - */ #define flush_workqueue(wq) \ ({ \ struct workqueue_struct *_wq = (wq); \ @@ -683,8 +744,32 @@ static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) return fn(arg); } #else -long work_on_cpu(int cpu, long (*fn)(void *), void *arg); -long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); +long work_on_cpu_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_key(_cpu, _fn, _arg, &__key); \ +}) + +long work_on_cpu_safe_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); + +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu_safe(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_safe_key(_cpu, _fn, _arg, &__key); \ +}) #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER @@ -714,5 +799,6 @@ int workqueue_offline_cpu(unsigned int cpu); void __init workqueue_init_early(void); void __init workqueue_init(void); +void __init workqueue_init_topology(void); #endif diff --git a/include/linux/workqueue_types.h b/include/linux/workqueue_types.h new file mode 100644 index 000000000000..4c38824f3ab4 --- /dev/null +++ b/include/linux/workqueue_types.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_WORKQUEUE_TYPES_H +#define _LINUX_WORKQUEUE_TYPES_H + +#include <linux/atomic.h> +#include <linux/lockdep_types.h> +#include <linux/timer_types.h> +#include <linux/types.h> + +struct workqueue_struct; + +struct work_struct; +typedef void (*work_func_t)(struct work_struct *work); +void delayed_work_timer_fn(struct timer_list *t); + +struct work_struct { + atomic_long_t data; + struct list_head entry; + work_func_t func; +#ifdef CONFIG_LOCKDEP + struct lockdep_map lockdep_map; +#endif +}; + +#endif /* _LINUX_WORKQUEUE_TYPES_H */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fba937999fbf..9845cb62e40b 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -11,6 +11,7 @@ #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> +#include <linux/pagevec.h> struct bio; @@ -40,6 +41,7 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { + /* public fields that can be set and/or consumed by the caller: */ long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ @@ -60,7 +62,7 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ - unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */ + unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */ /* * When writeback IOs are bounced through async layers, only the @@ -77,6 +79,11 @@ struct writeback_control { */ struct swap_iocb **swap_plug; + /* internal fields used by the ->writepages implementation: */ + struct folio_batch fbatch; + pgoff_t index; + int saved_err; + #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb; /* wb this writeback is issued under */ struct inode *inode; /* inode being written out */ @@ -193,7 +200,6 @@ void inode_io_list_del(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) { - might_sleep(); wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); } @@ -361,11 +367,12 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, bool wb_over_bg_thresh(struct bdi_writeback *wb); +struct folio *writeback_iter(struct address_space *mapping, + struct writeback_control *wbc, struct folio *folio, int *error); + typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc, void *data); -void tag_pages_for_writeback(struct address_space *mapping, - pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); @@ -375,11 +382,6 @@ void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); -void folio_account_redirty(struct folio *folio); -static inline void account_page_redirty(struct page *page) -{ - folio_account_redirty(page_folio(page)); -} bool folio_redirty_for_writepage(struct writeback_control *, struct folio *); bool redirty_page_for_writepage(struct writeback_control *, struct page *); diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 01fa15506286..170fdee6339c 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -16,6 +16,7 @@ * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface * @WWAN_PORT_FIREHOSE: XML based command protocol * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems + * @WWAN_PORT_FASTBOOT: Fastboot protocol control * * @WWAN_PORT_MAX: Highest supported port types * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type @@ -28,6 +29,7 @@ enum wwan_port_type { WWAN_PORT_QCDM, WWAN_PORT_FIREHOSE, WWAN_PORT_XMMRPC, + WWAN_PORT_FASTBOOT, /* Add new port types above this line */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 741703b45f61..cb571dfcf4b1 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -856,6 +856,9 @@ static inline int __must_check xa_insert_irq(struct xarray *xa, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -886,6 +889,9 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -916,6 +922,9 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -949,6 +958,9 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the @@ -983,6 +995,9 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the @@ -1017,6 +1032,9 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the diff --git a/include/linux/xattr.h b/include/linux/xattr.h index d591ef59aa98..d20051865800 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -114,13 +114,15 @@ struct simple_xattr { }; void simple_xattrs_init(struct simple_xattrs *xattrs); -void simple_xattrs_free(struct simple_xattrs *xattrs); +void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); +size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); +void simple_xattr_free(struct simple_xattr *xattr); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); -int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, - const void *value, size_t size, int flags, - ssize_t *removed_size); +struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, + const char *name, const void *value, + size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); void simple_xattr_add(struct simple_xattrs *xattrs, diff --git a/include/linux/zswap.h b/include/linux/zswap.h new file mode 100644 index 000000000000..341aea490070 --- /dev/null +++ b/include/linux/zswap.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ZSWAP_H +#define _LINUX_ZSWAP_H + +#include <linux/types.h> +#include <linux/mm_types.h> + +struct lruvec; + +extern u64 zswap_pool_total_size; +extern atomic_t zswap_stored_pages; + +#ifdef CONFIG_ZSWAP + +struct zswap_lruvec_state { + /* + * Number of pages in zswap that should be protected from the shrinker. + * This number is an estimate of the following counts: + * + * a) Recent page faults. + * b) Recent insertion to the zswap LRU. This includes new zswap stores, + * as well as recent zswap LRU rotations. + * + * These pages are likely to be warm, and might incur IO if the are written + * to swap. + */ + atomic_long_t nr_zswap_protected; +}; + +bool zswap_store(struct folio *folio); +bool zswap_load(struct folio *folio); +void zswap_invalidate(swp_entry_t swp); +int zswap_swapon(int type, unsigned long nr_pages); +void zswap_swapoff(int type); +void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg); +void zswap_lruvec_state_init(struct lruvec *lruvec); +void zswap_folio_swapin(struct folio *folio); +bool is_zswap_enabled(void); +#else + +struct zswap_lruvec_state {}; + +static inline bool zswap_store(struct folio *folio) +{ + return false; +} + +static inline bool zswap_load(struct folio *folio) +{ + return false; +} + +static inline void zswap_invalidate(swp_entry_t swp) {} +static inline int zswap_swapon(int type, unsigned long nr_pages) +{ + return 0; +} +static inline void zswap_swapoff(int type) {} +static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {} +static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {} +static inline void zswap_folio_swapin(struct folio *folio) {} + +static inline bool is_zswap_enabled(void) +{ + return false; +} + +#endif + +#endif /* _LINUX_ZSWAP_H */ |
